# Wordnet

# ![image](./dataset/wordnet.png)

In [1]:
from nltk.corpus import wordnet as wn
wn.synsets('motorcar')

[Synset('car.n.01')]

In [2]:
wn.synsets('trunk')

[Synset('trunk.n.01'),
 Synset('trunk.n.02'),
 Synset('torso.n.01'),
 Synset('luggage_compartment.n.01'),
 Synset('proboscis.n.02')]

In [3]:
# synset 裡的字詞
wn.synset('car.n.01').lemma_names()

['car', 'auto', 'automobile', 'machine', 'motorcar']

In [4]:
for synset in wn.synsets('trunk'):
    print(synset.lemma_names())

['trunk', 'tree_trunk', 'bole']
['trunk']
['torso', 'trunk', 'body']
['luggage_compartment', 'automobile_trunk', 'trunk']
['proboscis', 'trunk']


In [5]:
# 查找 motorcar 所屬的 synset 定義
wn.synset('car.n.01').definition()

'a motor vehicle with four wheels; usually propelled by an internal combustion engine'

In [6]:
# 查找 trunk 所屬的 synset 定義
wn.synset('trunk.n.01').definition()

'the main stem of a tree; usually covered with bark; the bole is usually the part that is commercially useful for lumber'

In [7]:
motorcar = wn.synset('car.n.01')
types_of_motorcar = motorcar.hypernyms()
types_of_motorcar

[Synset('motor_vehicle.n.01')]

In [8]:
motorcar = wn.synset('car.n.01')
types_of_motorcar = motorcar.hyponyms()
types_of_motorcar

[Synset('ambulance.n.01'),
 Synset('beach_wagon.n.01'),
 Synset('bus.n.04'),
 Synset('cab.n.03'),
 Synset('compact.n.03'),
 Synset('convertible.n.01'),
 Synset('coupe.n.01'),
 Synset('cruiser.n.01'),
 Synset('electric.n.01'),
 Synset('gas_guzzler.n.01'),
 Synset('hardtop.n.01'),
 Synset('hatchback.n.01'),
 Synset('horseless_carriage.n.01'),
 Synset('hot_rod.n.01'),
 Synset('jeep.n.01'),
 Synset('limousine.n.01'),
 Synset('loaner.n.02'),
 Synset('minicar.n.01'),
 Synset('minivan.n.01'),
 Synset('model_t.n.01'),
 Synset('pace_car.n.01'),
 Synset('racer.n.02'),
 Synset('roadster.n.01'),
 Synset('sedan.n.01'),
 Synset('sport_utility.n.01'),
 Synset('sports_car.n.01'),
 Synset('stanley_steamer.n.01'),
 Synset('stock_car.n.01'),
 Synset('subcompact.n.01'),
 Synset('touring_car.n.01'),
 Synset('used-car.n.01')]

In [9]:
# 找到下位詞組後後，再從 synset 找出單詞（以詞為中心）
sorted(lemma.name() for synset in types_of_motorcar for lemma in synset.lemmas())

['Model_T',
 'S.U.V.',
 'SUV',
 'Stanley_Steamer',
 'ambulance',
 'beach_waggon',
 'beach_wagon',
 'bus',
 'cab',
 'compact',
 'compact_car',
 'convertible',
 'coupe',
 'cruiser',
 'electric',
 'electric_automobile',
 'electric_car',
 'estate_car',
 'gas_guzzler',
 'hack',
 'hardtop',
 'hatchback',
 'heap',
 'horseless_carriage',
 'hot-rod',
 'hot_rod',
 'jalopy',
 'jeep',
 'landrover',
 'limo',
 'limousine',
 'loaner',
 'minicar',
 'minivan',
 'pace_car',
 'patrol_car',
 'phaeton',
 'police_car',
 'police_cruiser',
 'prowl_car',
 'race_car',
 'racer',
 'racing_car',
 'roadster',
 'runabout',
 'saloon',
 'secondhand_car',
 'sedan',
 'sport_car',
 'sport_utility',
 'sport_utility_vehicle',
 'sports_car',
 'squad_car',
 'station_waggon',
 'station_wagon',
 'stock_car',
 'subcompact',
 'subcompact_car',
 'taxi',
 'taxicab',
 'tourer',
 'touring_car',
 'two-seater',
 'used-car',
 'waggon',
 'wagon']

In [10]:
# 完整路徑（上位詞組再往上走）
motorcar = wn.synset('car.n.01')
motorcar.hypernym_paths()

[[Synset('entity.n.01'),
  Synset('physical_entity.n.01'),
  Synset('object.n.01'),
  Synset('whole.n.02'),
  Synset('artifact.n.01'),
  Synset('instrumentality.n.03'),
  Synset('container.n.01'),
  Synset('wheeled_vehicle.n.01'),
  Synset('self-propelled_vehicle.n.01'),
  Synset('motor_vehicle.n.01'),
  Synset('car.n.01')],
 [Synset('entity.n.01'),
  Synset('physical_entity.n.01'),
  Synset('object.n.01'),
  Synset('whole.n.02'),
  Synset('artifact.n.01'),
  Synset('instrumentality.n.03'),
  Synset('conveyance.n.03'),
  Synset('vehicle.n.01'),
  Synset('wheeled_vehicle.n.01'),
  Synset('self-propelled_vehicle.n.01'),
  Synset('motor_vehicle.n.01'),
  Synset('car.n.01')]]

In [11]:
# 直指頂端上位詞組
motorcar = wn.synset('car.n.01')
motorcar.root_hypernyms()

[Synset('entity.n.01')]

In [12]:
# 以鯨魚為例
right = wn.synset("right_whale.n.01") 
minke = wn.synset("minke_whale.n.01")
# 「露脊鯨」與「小鬚鯨」在上位詞組中最低位的詞組
right.lowest_common_hypernyms(minke)

[Synset('baleen_whale.n.01')]

In [13]:
# 露脊鯨 vs 虎鯨
orca = wn.synset("orca.n.01")
right.lowest_common_hypernyms(orca)

[Synset('whale.n.02')]

In [14]:
# 露脊鯨 vs 陸龜
tortoise = wn.synset("tortoise.n.01")
right.lowest_common_hypernyms(tortoise)

[Synset('vertebrate.n.01')]

In [15]:
# 露脊鯨 vs 小說
novel = wn.synset("novel.n.01")
right.lowest_common_hypernyms(novel)

[Synset('entity.n.01')]

In [16]:
# 計算由當前 synset 而上的階層數
print(wn.synset('baleen_whale.n.01').min_depth())
print(wn.synset('whale.n.02').min_depth())
print(wn.synset('vertebrate.n.01').min_depth())
print(wn.synset('entity.n.01').min_depth())

14
13
8
0


In [17]:
# 上下位詞組結構的相似程度 (數字接近1代表path越像)
print(right.path_similarity(right))     #露脊鯨和自己本身
print(right.path_similarity(minke))     #露脊鯨和小鬚鯨
print(right.path_similarity(orca))      #露脊鯨和虎鯨
print(right.path_similarity(tortoise))  #露脊鯨和陸龜
print(right.path_similarity(novel))     #露脊鯨和小說

1.0
0.25
0.16666666666666666
0.07692307692307693
0.043478260869565216
