In [None]:
# Install necessary libraries if not already installed
!pip install gensim matplotlib scikit-learn

import gensim.downloader as api
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import numpy as np
import gensim

In [None]:
# Load pre-trained word embeddings (GloVe)
model = api.load('glove-wiki-gigaword-100')  # 100-dimensional embeddings
#model = api.load('word2vec-google-news-300')  # 300-dimensional embeddings

In [None]:
# Select a subset of words for visualization
words = [
    'king', 'queen', 'man', 'woman', 'prince', 'princess',
    'apple', 'orange', 'banana', 'grape', 'fruit', 'vegetable',
    'cat', 'dog', 'pet', 'animal', 'car', 'truck', 'vehicle',
    'city', 'town', 'village', 'country', 'continent', 'planet'
]

In [None]:
#Example of Word Embedding
model['man']

In [None]:
# Get the word vectors for the selected words
word_vectors = [model[word] for word in words]

# Apply PCA to reduce dimensions to 2D
pca = PCA(n_components=2)
result = pca.fit_transform(word_vectors)

# Create a scatter plot
plt.figure(figsize=(12, 9))
plt.scatter(result[:, 0], result[:, 1])

# Annotate the points with the word labels
for i, word in enumerate(words):
    plt.annotate(word, xy=(result[i, 0]+0.1, result[i, 1]+0.1))

plt.title('PCA Visualization of Word Embeddings')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.grid(True)
plt.show()

In [None]:
# Get the word vectors for the selected words
word_vectors = [model[word] for word in words]
# Convert word_vectors to a NumPy array
word_vectors = np.array(word_vectors)

# Apply t-SNE to reduce dimensions to 2D
tsne = TSNE(n_components=2, random_state=0, perplexity=5)
result = tsne.fit_transform(word_vectors)

# Create a scatter plot
plt.figure(figsize=(12, 9))
plt.scatter(result[:, 0], result[:, 1])

# Annotate the points with the word labels
for i, word in enumerate(words):
    plt.annotate(
        word,
        xy=(result[i, 0] + 1, result[i, 1] + 1),
        fontsize=12
    )

plt.title('t-SNE Visualization of Word Embeddings')
plt.xlabel('Dimension 1')
plt.ylabel('Dimension 2')
plt.grid(True)
plt.show()


In [None]:
#Finding the top n words that are similar to a target word is simple.
#The result is the list of n words with the score.
model.most_similar(positive=['mit'], topn = 10)

In [None]:
# word analogy example
# king is to man as what is to woman?
king = model['king']
man = model['man']
woman = model['woman']

# resulting vector
result = king - man + woman

In [None]:
# function to compute cosine similarity
def cosine_similarity(v1, v2):
############# YOUR CODE HERE ################


In [None]:
# Get all words in the model's vocabulary
allwords = [w for w in model.index_to_key if w != 'king' and w != 'man' and w != 'woman']

# Calculate cosine similarity between the result vector and each word in the vocabulary
############# YOUR CODE HERE ################

# Ranking (Sort) the result based on the similarity score
############# YOUR CODE HERE ################

# Print the top 5 most similar words
############# YOUR CODE HERE ################