In [66]:
import torch
import torch.nn as nn
import numpy as np

torch.manual_seed(1)

def make_context_vector(context, word_to_ix):
    idxs = [word_to_ix[w] for w in context]
    return torch.tensor(idxs, dtype=torch.long)

CONTEXT_SIZE = 2  
EMDEDDING_DIM = 100

raw_text = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells."""

# 预处理
raw_text = raw_text.lower().split()
# 去重
vocab = set(raw_text)
vocab_size = len(vocab)
print("词典大小：", vocab_size)

word_to_ix = {word:ix for ix, word in enumerate(vocab)}
ix_to_word = {ix:word for ix, word in enumerate(vocab)}

data = []
for i in range(2, len(raw_text) - 2):
    target = [raw_text[i - 2], raw_text[i - 1],
               raw_text[i + 1], raw_text[i + 2]]
    current = raw_text[i]
    data.append((current, target))

class SG(torch.nn.Module):
    def __init__(self, vocab_size, embedding_dim):
        super(SG, self).__init__()

        #out: 1 x emdedding_dim
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.linear1 = nn.Linear(embedding_dim, 128)
        
        #out: 1 x vocab_size
        self.linear2 = nn.Linear(128, 2 * CONTEXT_SIZE * vocab_size)
        self.activation_function = nn.LogSoftmax(dim = -1)
        

    def forward(self, x):
        embeds = self.embeddings(x).view(1,-1)
        out = self.linear1(embeds)
        out = self.linear2(out)
        out = self.activation_function(out).view(2 * CONTEXT_SIZE, -1)
        return out

    def get_word_emdedding(self, word):
        word = torch.tensor([word_to_ix[word]])
        return self.embeddings(word).view(1,-1)


model = SG(vocab_size, EMDEDDING_DIM)

loss_function = nn.NLLLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

#TRAINING
for epoch in range(100):
    total_loss = 0

    for context, target in data:
        target_vector = make_context_vector(target, word_to_ix)  

        log_probs = model(torch.tensor(word_to_ix[context], dtype=torch.long))

        total_loss += loss_function(log_probs, target_vector)

    optimizer.zero_grad()
    total_loss.backward()
    optimizer.step()

#TESTING
context = 'programs'.lower()
a = model(torch.tensor([word_to_ix[context]]))
print(a.shape)
print("current word:", context)
with torch.no_grad():
  print(a)
  top_sorted_inds = np.argsort(a, axis=1)[:,-1]
  print(top_sorted_inds)
  print(f'prediction word: { [ix_to_word[each.item()] for each in top_sorted_inds]}')

词典大小： 46
torch.Size([4, 46])
current word: programs
tensor([[-5.1347, -5.3645, -5.5516, -5.4489, -5.4821, -4.8878, -4.8304, -5.6430,
         -5.4174, -5.6412, -4.9771, -5.5069, -6.4267, -5.1708, -4.9736, -5.4522,
         -5.5594, -4.8263, -5.9877, -5.3219, -6.0296, -5.8647, -5.5328, -4.8078,
         -4.8278, -5.4324, -4.5045, -5.3767, -5.6754, -6.4788, -5.2866, -5.9858,
         -5.6003, -6.4129, -5.4551, -5.5919, -5.1701, -6.3561, -5.2716, -5.1689,
         -4.7864, -3.5410, -5.5445, -5.6117, -5.0596, -5.5424],
        [-4.9652, -5.2716, -5.8952, -5.5290, -5.5970, -3.5152, -4.8213, -5.3860,
         -5.6386, -5.2781, -5.8112, -4.9606, -5.9130, -5.8385, -5.6059, -4.5330,
         -5.3929, -5.8497, -5.4240, -5.1187, -5.3182, -5.3124, -4.7617, -4.8988,
         -5.5571, -5.2416, -5.4553, -5.6438, -4.8204, -5.6555, -5.5503, -5.8562,
         -4.8738, -5.3060, -6.2866, -3.5540, -6.1209, -5.6004, -5.4876, -5.3045,
         -5.4194, -5.3682, -5.8631, -5.4061, -5.2984, -5.9024],
        [-