### We need Gensim to load our models

In [4]:
from gensim.models.wrappers import FastText
import numpy as np

### Load the FastText model

- The path leads to a model that I downloaded from: https://fasttext.cc/docs/en/pretrained-vectors.html

In [2]:
model = FastText.load_fasttext_format(r"C:\Users\Jacob Thompson\OneDrive\EnhanceIT\Vector Semantic Embedding\FastText_CommonCrawl_English.bin")

#### This is the vector for water

we use `model.wv` to access the vector dictionary in the model

In [3]:
model.wv['water']

array([-0.02764769,  0.10620716, -0.01843078,  0.07740073,  0.01232646,
        0.01454496,  0.1573341 ,  0.07692765, -0.02799442, -0.00236035,
        0.00649283,  0.00439246,  0.08124923,  0.07792549,  0.05345523,
       -0.01470464,  0.00044429,  0.08845089,  0.07179119, -0.03711618,
        0.13562647,  0.03201184, -0.08579989,  0.08018851,  0.00276714,
       -0.09432636, -0.01265868, -0.05588473,  0.04186226,  0.11035786,
        0.01416378,  0.02874554,  0.02892343,  0.02113707,  0.11138544,
        0.02110403,  0.03224842,  0.07664809,  0.01861649, -0.13109696,
       -0.04479764, -0.03965852,  0.03575751, -0.02198079, -0.12455519,
       -0.00994011, -0.00213207,  0.03578394,  0.03736476,  0.09824882,
        0.06353479, -0.06149272,  0.06794042, -0.01980706,  0.05419092,
        0.02825904,  0.08767466, -0.06995759, -0.10396952, -0.07568816,
       -0.05268468, -0.08930409, -0.03211305, -0.07188852,  0.02063234,
       -0.00593469,  0.04729244, -0.15446323, -0.09130683, -0.02

You'll see the size of these vectors is 300, much more manageable than the one-hot encoded version

In [5]:
model.wv['water'].shape

(300,)

Lets try some math

In [13]:
out = model.wv['king'] - model.wv['man'] + model.wv['woman']
model.wv.similar_by_vector(out)

[('king', 0.7286674380302429),
 ('queen', 0.6542679071426392),
 ('kings', 0.5410281419754028),
 ('queen-mother', 0.5250692367553711),
 ('Queen', 0.5074419975280762),
 ('royal', 0.500452995300293),
 ('king-', 0.4945007264614105),
 ('queens', 0.49149513244628906),
 ('monarch', 0.4913707375526428),
 ('queenship', 0.48369985818862915)]

In [14]:
out = model.wv['London'] - model.wv['England'] + model.wv['Germany']
model.wv.similar_by_vector(out)

[('Berlin', 0.7931755781173706),
 ('Munich', 0.7399019002914429),
 ('Germany', 0.7242324352264404),
 ('London', 0.6978325247764587),
 ('Frankfurt', 0.6862188577651978),
 ('Hamburg', 0.670927882194519),
 ('Duesseldorf', 0.6635984182357788),
 ('Dusseldorf', 0.6538292169570923),
 ('Leipzig', 0.6535604000091553),
 ('Franfurt', 0.6458690166473389)]

In [34]:
out = model.wv["Chipotle"] - model.wv['burrito'] + model.wv['burger']
model.wv.similar_by_vector(out)

[('burger', 0.7229743003845215),
 ('Burger', 0.674201250076294),
 ('Chipotle', 0.6666013598442078),
 ('burgers', 0.6554458141326904),
 ('Smashburger', 0.6461240649223328),
 ('BurgerFi', 0.6402727961540222),
 ('Burgers', 0.578823447227478),
 ('Fuddruckers', 0.5720731019973755),
 ('hamburger', 0.5714389085769653),
 ('Bareburger', 0.5682088732719421)]

In [60]:
out = model.wv["Trump"] - model.wv["man"] + model.wv["woman"]
model.wv.similar_by_vector(out)

[('Trump', 0.7101290822029114),
 ('Trump.Trump', 0.5579900145530701),
 ('PEOTUS', 0.5473158359527588),
 ('Trumps', 0.5447920560836792),
 ('.Trump', 0.5193414688110352),
 ('it.Trump', 0.5128458142280579),
 ('Trump-related', 0.5020406246185303),
 ('Trump.The', 0.5000512599945068),
 ('Trump-', 0.4815754294395447),
 ('Ivanka', 0.4794865846633911)]

In [70]:
out = model.wv["programming"] + 0.5 * model.wv["snake"]
model.wv.similar_by_vector(out)

[('snake', 0.806853711605072),
 ('snakes', 0.6734123826026917),
 ('programming', 0.6470806002616882),
 ('python', 0.6413437128067017),
 ('programing', 0.597991943359375),
 ('reptile', 0.5921888947486877),
 ('snake-', 0.5827924013137817),
 ('rattlesnake', 0.5799773931503296),
 ('cobra', 0.5570800304412842),
 ('scorpion', 0.5564751625061035)]

In [71]:
out = model.wv["Taylor"] + model.wv["Kanye"]
model.wv.similar_by_vector(out)

[('Kanye', 0.9208813905715942),
 ('KanYe', 0.7060033082962036),
 ('Kanyes', 0.6950237154960632),
 ('Jay-Z', 0.6784842014312744),
 ('Teyana', 0.6738912463188171),
 ('kanye', 0.6657601594924927),
 ('Kayne', 0.6561713814735413),
 ('Rihanna', 0.6509965062141418),
 ('Yeezus', 0.6507629156112671),
 ('Taylor', 0.6480417251586914)]

In [86]:
out = model.wv["America"] - model.wv["military"]
model.wv.similar_by_vector(out)

[('America', 0.6680967211723328),
 ('Amercia', 0.5242589712142944),
 ('America.The', 0.504277229309082),
 ('Americas', 0.4907873272895813),
 ('America.This', 0.4752237796783447),
 ('Amerca', 0.47177693247795105),
 ('America-', 0.46747326850891113),
 ('America.', 0.4628288745880127),
 ('America.So', 0.4596697986125946),
 ('America.But', 0.4589976370334625)]