In [1]:
# Author: Robert Guthrie

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x7f291409ef70>

In [4]:
lin = nn.Linear(5, 3)
data = torch.randn(2, 5)
print(data)
print(lin(data))
print(F.relu(data))

tensor([[-0.4145,  0.8561, -0.4429,  1.4079, -0.0336],
        [ 1.0339,  0.8779, -1.3920, -0.4033,  1.7280]])
tensor([[ 0.2980,  0.6653,  0.9296],
        [-0.0303, -0.6826,  0.2214]])
tensor([[ 0.0000,  0.8561,  0.0000,  1.4079,  0.0000],
        [ 1.0339,  0.8779,  0.0000,  0.0000,  1.7280]])


In [5]:
data = torch.randn(5)
print(data)
print(F.softmax(data, dim=0))
print(F.softmax(data, dim=0).sum())
print(F.log_softmax(data, dim=0))

tensor([-2.1355, -0.0016,  1.0415,  0.8091,  0.5292])
tensor([ 0.0150,  0.1265,  0.3589,  0.2845,  0.2151])
tensor(1.)
tensor([-4.2016, -2.0676, -1.0246, -1.2569, -1.5368])


## Sample Network

In [28]:
data = [("me gusta comer en la cafeteria".split(), "SPANISH"),
        ("Give it to me".split(), "ENGLISH"),
        ("No creo que sea una buena idea".split(), "SPANISH"),
        ("No it is not a good idea to get lost at sea".split(), "ENGLISH")]

test_data = [("Yo creo que si".split(), "SPANISH"),
             ("it is lost on me".split(), "ENGLISH")]

print(data)

[(['me', 'gusta', 'comer', 'en', 'la', 'cafeteria'], 'SPANISH'), (['Give', 'it', 'to', 'me'], 'ENGLISH'), (['No', 'creo', 'que', 'sea', 'una', 'buena', 'idea'], 'SPANISH'), (['No', 'it', 'is', 'not', 'a', 'good', 'idea', 'to', 'get', 'lost', 'at', 'sea'], 'ENGLISH')]


In [7]:
# word_to_ix maps each word in the vocab to a unique integer, which will be its
# index into the Bag of words vector
word_to_ix = {}
for sent, _ in data + test_data:
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
print(word_to_ix)

VOCAB_SIZE = len(word_to_ix)
NUM_LABELS = 2

{'good': 19, 'a': 18, 'Give': 6, 'sea': 12, 'lost': 21, 'que': 11, 'comer': 2, 'get': 20, 'una': 13, 'buena': 14, 'No': 9, 'not': 17, 'Yo': 23, 'si': 24, 'en': 3, 'at': 22, 'gusta': 1, 'creo': 10, 'idea': 15, 'is': 16, 'it': 7, 'cafeteria': 5, 'la': 4, 'on': 25, 'me': 0, 'to': 8}


In [26]:
class BoWClassifier(nn.Module):
    def __init__(self, num_labels, vocab_size):
        super(BoWClassifier, self).__init__()
        self.linear = nn.Linear(vocab_size, num_labels)
        
    def forward(self, bow_vec):
        return F.log_softmax(self.linear(bow_vec), dim=1)
    
def make_bow_vector(sentence, word_to_ix):
    vec = torch.zeros(len(word_to_ix))
    for word in sentence:
        vec[word_to_ix[word]] += 1
    return vec.view(1, -1)

def make_target(label, label_to_ix):
    return torch.LongTensor([label_to_ix[label]])

model = BoWClassifier(NUM_LABELS, VOCAB_SIZE)

for param in model.parameters():
    print(param)
    
with torch.no_grad():
    sample = data[0]
    print(sample)
    bow_vector = make_bow_vector(sample[0], word_to_ix)
    log_probs = model(bow_vector)
    print(log_probs)

Parameter containing:
tensor([[ 0.1905, -0.0700,  0.0290, -0.1010, -0.1255, -0.0993, -0.0934,
          0.0372, -0.0225,  0.0138, -0.1503, -0.1232, -0.1698, -0.1410,
          0.1642,  0.0729, -0.1870, -0.0327,  0.0157,  0.1048,  0.1819,
          0.0474,  0.1382, -0.0698, -0.0064, -0.1184],
        [ 0.0452,  0.1810,  0.1232,  0.1952, -0.0884, -0.0245,  0.0405,
          0.1577, -0.1803, -0.0963,  0.0284, -0.1134,  0.0733, -0.0134,
         -0.0508,  0.1052,  0.1402,  0.1908,  0.0888, -0.0054,  0.1603,
          0.1561, -0.0983,  0.0893,  0.0230, -0.0848]])
Parameter containing:
tensor(1.00000e-02 *
       [-4.6704, -4.6309])
(['me', 'gusta', 'comer', 'en', 'la', 'cafeteria'], 'SPANISH')
tensor([[-1.0429, -0.4345]])


In [31]:
with torch.no_grad():
    for instance, label in test_data:
        bow_vec = make_bow_vector(instance, word_to_ix)
        log_probs = model(bow_vec)
        print(log_probs)
        
# Print the matrix column corresponding to "creo"
print("Print the matrix column corresponding to "creo"")
print(next(model.parameters())[:, word_to_ix["creo"]])

label_to_ix = {"SPANISH": 0, "ENGLISH": 1}

loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

        
    
# Usually you want to pass over the training data several times.
# 100 is much bigger than on a real data set, but real datasets have more than
# two instances.  Usually, somewhere between 5 and 30 epochs is reasonable.
for epoch in range(100):
    for instance, label in data:
        # Step 1. Remember that PyTorch accumulates gradients.
        # We need to clear them out before each instance
        model.zero_grad()

        # Step 2. Make our BOW vector and also we must wrap the target in a
        # Tensor as an integer. For example, if the target is SPANISH, then
        # we wrap the integer 0. The loss function then knows that the 0th
        # element of the log probabilities is the log probability
        # corresponding to SPANISH
        bow_vec = make_bow_vector(instance, word_to_ix)
        target = make_target(label, label_to_ix)

        # Step 3. Run our forward pass.
        log_probs = model(bow_vec)

        # Step 4. Compute the loss, gradients, and update the parameters by
        # calling optimizer.step()
        loss = loss_function(log_probs, target)
        loss.backward()
        optimizer.step()

with torch.no_grad():
    for instance, label in test_data:
        bow_vec = make_bow_vector(instance, word_to_ix)
        log_probs = model(bow_vec)
        print(log_probs)

# Index corresponding to Spanish goes up, English goes down!
print(next(model.parameters())[:, word_to_ix["creo"]])    
        

tensor([[-0.1798, -1.8044]])
tensor([[-2.9552, -0.0535]])
tensor([ 0.2866, -0.4084])
tensor([[-0.1408, -2.0298]])
tensor([[-3.3230, -0.0367]])
Inference
tensor([ 0.3475, -0.4694])
