# Word2vec in gensim

Many word vector models are available [here](https://github.com/3Top/word2vec-api) 

In [None]:
from gensim.models import KeyedVectors

In [None]:
model = KeyedVectors.load_word2vec_format("../GoogleNews-vectors-negative300.bin", binary=True, limit=100000)

### Get a word vector

In [None]:
model["boy"]

### Combine word vectors

In [None]:
model["boy"] + model["girl"]

### Find most similar

In [None]:
model.most_similar("boy")

In [None]:
model.most_similar("good")

In [None]:
model.most_similar("bad")

In [None]:
model.similar_by_vector(model["boy"])

In [None]:
model.similar_by_word("boy")

### Combine word vectors and find similar

In [None]:
model.most_similar(positive=["king", "woman"], negative=["man"])

In [None]:
model.most_similar(positive=["France", "London"], negative=["Paris"])

In [None]:
model.most_similar(positive=["winter", "sun"], negative=["snow"])

### Gensim API

In [None]:
model.index2word[0]

In [None]:
model.index2word[1]

In [None]:
model.index2word[2]

In [None]:
type(model.index2word), len(model.index2word)

In [None]:
model.vector_size

### Visualization

In [None]:
from MulticoreTSNE import MulticoreTSNE as TSNE

In [None]:
model.vectors.shape

In [None]:
tsne = TSNE(n_jobs=10, perplexity=10)
transformed_vectors = tsne.fit_transform(model.vectors[:10000])

In [None]:
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go

In [None]:
trace = go.Scatter(
    x = transformed_vectors[:500,0],
    y = transformed_vectors[:500,1],
    text = model.index2word[:500],
    mode = 'markers',
    showlegend = False,
    marker = dict(
        size = 8,
        colorscale ='Jet',
        showscale = False,
        line = dict(
            width = 2,
            color = 'rgb(255, 255, 255)'
        ),
        opacity = 0.8
    )
)
data = [trace]

layout = dict(title = "Word2vec",
              hovermode= 'closest',
              yaxis = dict(zeroline = False),
              xaxis = dict(zeroline = False),
              showlegend= True
             )

fig = dict(data=data, layout=layout)
py.iplot(fig, filename='styled-scatter')