In [None]:
from gensim.models import Word2Vec
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

# Sample sentence
sentence = "The transformer model processes words in parallel and uses self-attention to find relationships"
tokens = sentence.lower().split()

# Train a Word2Vec model
model = Word2Vec([tokens], vector_size=50, window=5, min_count=1, workers=4)

# Get word embeddings
word_vectors = np.array([model.wv[word] for word in tokens])
words = tokens  # Keeping words for labeling

# Reduce dimensions (PCA -> t-SNE for better visualization)
pca = PCA(n_components=10)
word_vectors_pca = pca.fit_transform(word_vectors)

tsne = TSNE(n_components=2, perplexity=5, random_state=42)
word_vectors_2d = tsne.fit_transform(word_vectors_pca)

# Plot the embeddings
plt.figure(figsize=(8, 6))
plt.scatter(word_vectors_2d[:, 0], word_vectors_2d[:, 1], marker='o', color='blue')

# Annotate words
for i, word in enumerate(words):
    plt.text(word_vectors_2d[i, 0] + 0.05, word_vectors_2d[i, 1] + 0.05, word, fontsize=12)

plt.title("Word Embeddings Visualization")
plt.xlabel("Dimension 1")
plt.ylabel("Dimension 2")
plt.grid(True)
plt.show()
