# Lab 6: Word Embeddings (Enhanced)

## Visualizing Word Vectors (PCA)
Since word vectors are high-dimensional (e.g., 50 dimensions), we can't see them. We use **PCA** (Principal Component Analysis) to squash them down to 2D so we can plot them.

In [None]:
%pip install gensim scikit-learn matplotlib

In [None]:
from gensim.models import Word2Vec
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

sentences = [
    ['king', 'queen', 'prince', 'princess', 'man', 'woman'],
    ['king', 'power', 'throne', 'majesty'],
    ['queen', 'beauty', 'grace', 'majesty'],
    ['man', 'strong', 'work'],
    ['woman', 'smart', 'work']
] * 10 # Repeat to give model something to chew on

model = Word2Vec(sentences, min_count=1, vector_size=50, window=3, sg=1)

def plot_words(model, words):
    # Extract vectors
    vectors = [model.wv[w] for w in words]
    
    # Reduce to 2D
    pca = PCA(n_components=2)
    result = pca.fit_transform(vectors)
    
    # Plot
    plt.figure(figsize=(8, 6))
    plt.scatter(result[:, 0], result[:, 1])
    
    for i, word in enumerate(words):
        plt.annotate(word, xy=(result[i, 0], result[i, 1]))
        
    plt.grid(True)
    plt.show()

words_to_plot = ['king', 'queen', 'man', 'woman']
plot_words(model, words_to_plot)