In [1]:
pip install torch

Note: you may need to restart the kernel to use updated packages.


DEPRECATION: jupyter-server 2.0.0 has a non-standard dependency specifier jupyter-core!=~5.0,>=4.12. pip 24.0 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of jupyter-server or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063


In [1]:
import tensorflow as tf
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

In [2]:
torch.manual_seed(1)

<torch._C.Generator at 0x276334f83d0>

In [3]:
# Implementing CBOW model for the exercise given by a tutorial in pytorch.org/tutorials/beginner/nlp/word_embeddings_tutorial.html
context_size = 2  # {w_i-2 ... w_i ... w_i+2}
embedding_dim = 10

In [4]:
raw_text = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells.""".split()


In [5]:
def make_context_vector(context, word_to_idx):
    idxs = [word_to_idx[w] for w in context]
    return torch.tensor(idxs, dtype=torch.long)

vocab = set(raw_text)
vocab_size = len(vocab)
word_to_idx = {word: i for i, word in enumerate(vocab)}
idx_to_word = {i: word for i, word in enumerate(vocab)}

data = []


In [6]:
for i in range(2, len(raw_text) - 2):
    context = [raw_text[i-2], raw_text[i-1],
               raw_text[i+1], raw_text[i+2]]
    target = raw_text[i]
    data.append((context, target))


In [7]:
class CBOW(nn.Module):
    
    def __init__(self, vocab_size, embedding_dim):
        super(CBOW, self).__init__()
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.proj = nn.Linear(embedding_dim, 128)
        self.output = nn.Linear(128, vocab_size)
        
    def forward(self, inputs):
        embeds = sum(self.embeddings(inputs)).view(1, -1)
        out = F.relu(self.proj(embeds))
        out = self.output(out)
        nll_prob = F.log_softmax(out, dim=-1)
        return nll_prob


In [8]:
model = CBOW(vocab_size, embedding_dim)
optimizer = optim.SGD(model.parameters(), lr=0.001)
losses = []
loss_function = nn.NLLLoss()

In [9]:
for epoch in range(100):
    total_loss = 0
    for context, target in data:
        context_vector = make_context_vector(context, word_to_idx)
        
        # Remember PyTorch accumulates gradients; zero them out
        model.zero_grad()
        
        nll_prob = model(context_vector)
        loss = loss_function(nll_prob, Variable(torch.tensor([word_to_idx[target]])))

        # backpropagation
        loss.backward()
        # update the parameters
        optimizer.step()
        total_loss += loss.item()
    losses.append(total_loss)
    
print(losses)

[239.6652946472168, 234.54632878303528, 229.6411373615265, 224.9347264766693, 220.41772770881653, 216.08088946342468, 211.91495537757874, 207.91273832321167, 204.0630340576172, 200.35830783843994, 196.78840339183807, 193.34669864177704, 190.02271473407745, 186.80784857273102, 183.69445252418518, 180.67438316345215, 177.73886942863464, 174.88250494003296, 172.09957695007324, 169.3826471567154, 166.72495329380035, 164.12362611293793, 161.57426297664642, 159.07367157936096, 156.61933022737503, 154.2070912718773, 151.83610290288925, 149.50277471542358, 147.20486271381378, 144.94336569309235, 142.71611845493317, 140.5222275853157, 138.35894763469696, 136.22854536771774, 134.12878745794296, 132.05810642242432, 130.01956993341446, 128.00715965032578, 126.02572816610336, 124.0705561041832, 122.14357197284698, 120.24240964651108, 118.37052285671234, 116.52590346336365, 114.70748099684715, 112.91517791152, 111.14819213747978, 109.40512651205063, 107.6880858540535, 105.99504125118256, 104.3243944

In [10]:
# Let's see if our CBOW model works or not
print("*************************************************************************")
context = ['process.','Computational','are', 'abstract']
context_vector = make_context_vector(context, word_to_idx)
a = model(context_vector).data.numpy()
print('Raw text: {}\n'.format(' '.join(raw_text)))
print('Test Context: {}\n'.format(context))
max_idx = np.argmax(a)
print('Prediction: {}'.format(idx_to_word[max_idx]))

*************************************************************************
Raw text: We are about to study the idea of a computational process. Computational processes are abstract beings that inhabit computers. As they evolve, processes manipulate other abstract things called data. The evolution of a process is directed by a pattern of rules called a program. People create programs to direct processes. In effect, we conjure the spirits of the computer with our spells.

Test Context: ['process.', 'Computational', 'are', 'abstract']

Prediction: processes


In [11]:
context = ['processes','manipulate','abstract', 'things']
context_vector = make_context_vector(context, word_to_idx)
a = model(context_vector).data.numpy()
print('Raw text: {}\n'.format(' '.join(raw_text)))
print('Test Context: {}\n'.format(context))
max_idx = np.argmax(a)
print('Prediction: {}'.format(idx_to_word[max_idx]))

Raw text: We are about to study the idea of a computational process. Computational processes are abstract beings that inhabit computers. As they evolve, processes manipulate other abstract things called data. The evolution of a process is directed by a pattern of rules called a program. People create programs to direct processes. In effect, we conjure the spirits of the computer with our spells.

Test Context: ['processes', 'manipulate', 'abstract', 'things']

Prediction: other
