# Skip-Gram
This is a word embedding technique under the Word2Vec method\
Here we use a shallow neural netword to create our word embeddings\
We feed in a single word to the NN to predict the surrouding words.

In [33]:
import torch
import torch.nn as nn
import torch.optim as optim

In [34]:
class SkipGram(nn.Module):
    def __init__(self, vocab_size, window_size, embedding_size):
        super(SkipGram, self).__init__()

        self.embeddings = nn.Embedding(vocab_size, embedding_size)
        self.linear = nn.Linear(embedding_size, vocab_size)

    def forward(self, target):
        target_embedding = self.embeddings(target)
        res = self.linear(target_embedding)
        return res

In [35]:
window_size = 2
doc = [
    "i am henry",
    "i like college",
    "do henry like college",
    "i am do i like college",
    "i do like henry",
    "do i like henry",
]
raw_text = " ".join(doc)
tokens = raw_text.split()
vocab = set(tokens)
vocab_size = len(vocab)

In [36]:
data = []
word_index = {word: i for i, word in enumerate(vocab)}
# onehot = {word: [0 for _ in vocab] for word in vocab}
# for i, word in enumerate(vocab):
#     onehot[word][i] = 1

for i in range(2, len(tokens) - 2):
    context = word_index[tokens[i]]
    for j in range(-window_size, window_size + 1):
        if j == 0:
            continue
        data.append((context, word_index[tokens[i + j]]))
print(data)

[(1, 5), (1, 4), (1, 5), (1, 2), (5, 4), (5, 1), (5, 2), (5, 3), (2, 1), (2, 5), (2, 3), (2, 0), (3, 5), (3, 2), (3, 0), (3, 1), (0, 2), (0, 3), (0, 1), (0, 2), (1, 3), (1, 0), (1, 2), (1, 3), (2, 0), (2, 1), (2, 3), (2, 5), (3, 1), (3, 2), (3, 5), (3, 4), (5, 2), (5, 3), (5, 4), (5, 0), (4, 3), (4, 5), (4, 0), (4, 5), (0, 5), (0, 4), (0, 5), (0, 2), (5, 4), (5, 0), (5, 2), (5, 3), (2, 0), (2, 5), (2, 3), (2, 5), (3, 5), (3, 2), (3, 5), (3, 0), (5, 2), (5, 3), (5, 0), (5, 2), (0, 3), (0, 5), (0, 2), (0, 1), (2, 5), (2, 0), (2, 1), (2, 0), (1, 0), (1, 2), (1, 0), (1, 5), (0, 2), (0, 1), (0, 5), (0, 2), (5, 1), (5, 0), (5, 2), (5, 1)]


In [37]:
embed_size = 10
learning_rate = 0.01
epochs = 1000

model = SkipGram(vocab_size, window_size, embed_size)
lossfn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

In [38]:
for epoch in range(epochs):
    total_loss = 0
    for context, target in data:
        optimizer.zero_grad()
        output = model(torch.tensor([context]))
        loss = lossfn(output, torch.tensor([target]))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    if epoch % 50 == 0:
        print(epoch, total_loss / len(data))

0 1.8511932469904422
50 1.545641028881073
100 1.5361952051520347
150 1.5330601558089256
200 1.5311076186597348
250 1.529517511278391
300 1.5280845649540424
350 1.5267456322908401
400 1.5254790008068084
450 1.524276676028967
500 1.5231351152062416
550 1.5220524609088897
600 1.5210270047187806
650 1.5200570434331895
700 1.5191406615078449
750 1.518275821954012
800 1.5174602322280406
850 1.5166916601359843
900 1.515967819094658
950 1.5152863003313541


In [39]:
word_to_lookup = "henry"
wi = word_index[word_to_lookup]
embedding = model.embeddings(torch.tensor([wi]))
print(f"Embedding for '{word_to_lookup}': {embedding.detach().numpy()}")

Embedding for 'henry': [[-1.7774011   1.436296   -0.60040134  1.001669    0.47328883  0.277007
  -1.6477649   1.3790239   0.685      -0.6399059 ]]
