In [2]:
## Deep Learning Building Blocks: Affine maps, non-linearities and objectives

# Affine Maps

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

lin = nn.Linear(5, 3)
data = torch.randn(2, 5)
print(lin(data))

tensor([[ 0.1755, -0.3268, -0.5069],
        [-0.6602,  0.2260,  0.1089]])


In [3]:
# Non-Linearities

data = torch.randn(2, 3)
print(data)
print(F.relu(data))

tensor([[-0.5404, -2.2102,  2.1130],
        [-0.0040,  1.3800, -1.3505]])
tensor([[ 0.0000,  0.0000,  2.1130],
        [ 0.0000,  1.3800,  0.0000]])


In [4]:
# Softmax and Probabilities

data = torch.randn(5)
print(data)
print(F.softmax(data, dim=0))
print(F.softmax(data, dim=0).sum())
print(F.log_softmax(data, dim=0))

tensor([ 0.3455,  0.5046,  1.8213, -0.1814, -0.9515])
tensor([ 0.1349,  0.1582,  0.5903,  0.0797,  0.0369])
tensor(1.)
tensor([-2.0030, -1.8439, -0.5271, -2.5299, -3.2999])


In [5]:
## Creating Network Components in PyTorch

# Example: Logistic Regression Bog-of-Words classifier

data = [("me gusta comer en la cafeteria".split(), "SPANISH"),
        ("Give it to me".split(), "ENGLISH"),
        ("No creo que sea una buena idea".split(), "SPANISH"),
        ("No it is not a good idea to get lost at sea".split(), "ENGLISH")]

test_data = [("Yo creo que si".split(), "SPANISH"),
             ("it is lost on me".split(), "ENGLISH")]

word_to_ix = {}
for sent, _ in data + test_data:
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
print(word_to_ix)

VOCAB_SIZE = len(word_to_ix)
NUM_LABELS = 2


class BoWClassifier(nn.Module):
    
    def __init__(self, num_labels, vocab_size):
        super(BoWClassifier, self).__init__()
        self.linear = nn.Linear(vocab_size, num_labels)
        
    def forward(self, bow_vec):
        return F.log_softmax(self.linear(bow_vec), dim=1)
    
    
def make_bow_vector(sentence, word_to_ix):
    vec = torch.zeros(len(word_to_ix))
    for word in sentence:
        vec[word_to_ix[word]] += 1
    return vec.view(1, -1)


def make_target(label, label_to_ix):
    return torch.LongTensor([label_to_ix[label]])


model = BoWClassifier(NUM_LABELS, VOCAB_SIZE)

for param in model.parameters():
    print(param)
    
with torch.no_grad():
    sample = data[0]
    bow_vector = make_bow_vector(sample[0], word_to_ix)
    log_probs = model(bow_vector)
    print(log_probs)

{'a': 18, 'idea': 15, 'at': 22, 'lost': 21, 'Give': 6, 'good': 19, 'cafeteria': 5, 'Yo': 23, 'si': 24, 'get': 20, 'to': 8, 'buena': 14, 'una': 13, 'not': 17, 'que': 11, 'la': 4, 'en': 3, 'is': 16, 'sea': 12, 'it': 7, 'comer': 2, 'gusta': 1, 'me': 0, 'creo': 10, 'No': 9, 'on': 25}
Parameter containing:
tensor([[ 0.1191,  0.1739, -0.1099, -0.0323, -0.0038,  0.0286, -0.1488,
         -0.1392,  0.1067, -0.0460,  0.0958,  0.0112,  0.0644,  0.0431,
          0.0713,  0.0972, -0.1816,  0.0987, -0.1379, -0.1480,  0.0119,
         -0.0334,  0.1152, -0.1136, -0.1743,  0.1427],
        [-0.0291,  0.1103,  0.0630, -0.1471,  0.0394,  0.0471, -0.1313,
         -0.0931,  0.0669,  0.0351, -0.0834, -0.0594,  0.1796, -0.0363,
          0.1106,  0.0849, -0.1268, -0.1668,  0.1882,  0.0102,  0.1344,
          0.0406,  0.0631,  0.1465,  0.1860, -0.1301]])
Parameter containing:
tensor([ 0.0245,  0.1464])
tensor([[-0.7082, -0.6783]])


In [6]:
label_to_ix = {"SPANISH": 0, "ENGLISH": 1}

In [7]:
with torch.no_grad():
    for instance, label in test_data:
        bow_vec = make_bow_vector(instance, word_to_ix)
        log_probs = model(bow_vec)
        print(log_probs)
        
print(next(model.parameters())[:, word_to_ix["creo"]])

loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

for epoch in range(100):
    for instance, label in data:
        model.zero_grad()
        bow_vec = make_bow_vector(instance, word_to_ix)
        target = make_target(label, label_to_ix)
        log_probs = model(bow_vec)
        loss = loss_function(log_probs, target)
        loss.backward()
        optimizer.step()
        
with torch.no_grad():
    for instance, label in test_data:
        bow_vec = make_bow_vector(instance, word_to_ix)
        log_probs = model(bow_vec)
        print(log_probs)
        
print(next(model.parameters())[:, word_to_ix["creo"]])

tensor([[-0.9694, -0.4769]])
tensor([[-0.6330, -0.7571]])
tensor(1.00000e-02 *
       [ 9.5798, -8.3422])
tensor([[-0.2270, -1.5941]])
tensor([[-2.4246, -0.0927]])
tensor([ 0.5134, -0.5010])
