In [36]:
import torch 
import torch.nn as nn 
import torch.nn.functional as F
import torch.optim as optim 


torch.manual_seed(1)

<torch._C.Generator at 0x1e72cb9ff90>

In [32]:
lin = nn.Linear(6 , 3) # input feature , output feature 
print(lin)
data = torch.randn(2 , 6)
print(lin(data))

Linear(in_features=6, out_features=3, bias=True)
tensor([[-0.3530,  0.3824,  0.1449],
        [ 0.3118, -1.1792, -1.1101]], grad_fn=<AddmmBackward0>)


In [22]:
data = torch.randn(2, 2)
print(data)
print(F.relu(data))

tensor([[ 0.5848,  0.2149],
        [-0.4090, -0.1663]])
tensor([[0.5848, 0.2149],
        [0.0000, 0.0000]])


In [23]:
# Softmax is also in torch.nn.functional
data = torch.randn(5)
print(data)
print(F.softmax(data, dim=0))
print(F.softmax(data, dim=0).sum())  # Sums to 1 because it is a distribution!
print(F.log_softmax(data, dim=0))  # theres also log_softmax


tensor([ 0.6696,  0.1177, -0.3584,  0.6513, -1.0273])
tensor([0.3227, 0.1858, 0.1154, 0.3169, 0.0591])
tensor(1.)
tensor([-1.1310, -1.6829, -2.1590, -1.1493, -2.8279])


In [37]:
data = [("me gusta comer en la cafeteria".split(), "SPANISH"),
        ("Give it to me".split(), "ENGLISH"),
        ("No creo que sea una buena idea".split(), "SPANISH"),
        ("No it is not a good idea to get lost at sea".split(), "ENGLISH")]

test_data = [("Yo creo que si".split(), "SPANISH"),
             ("it is lost on me".split(), "ENGLISH")]

# word_to_ix maps each word in the vocab to a unique integer, which will be its
# index into the Bag of words vector
word_to_ix = {}
for sent, _ in data + test_data:  # _ going to take spanish
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
print(word_to_ix)

VOCAB_SIZE = len(word_to_ix)
NUM_LABELS = 2


class BoWClassifier(nn.Module):
    
    def __init__(self , num_labels , vocab_size):
        
        super(BoWClassifier , self).__init__()
    
    
        self.linear = nn.Linear(VOCAB_SIZE , NUM_LABELS)
    
    
    
    def forward(self  , bow_vec):
        return F.log_softmax(self.linear(bow_vec) , dim = 1)
    
    

def make_bow_vector(sentence , word_to_ix):
    vec = torch.zeros(len(word_to_ix))
    for word in sentence: 
        vec[word_to_ix[word]] += 1
    return vec.view(1 , -1)

def make_target(label , label_to_ix):
    return torch.LongTensor([label_to_ix[label]])


model  = BoWClassifier(NUM_LABELS , VOCAB_SIZE)

for param in model.parameters():
    print(param)


with torch.no_grad():
    sample = data[0]
    bow_vector = make_bow_vector(sample[0] , word_to_ix)
    log_probs = model(bow_vector)
    print(log_probs)

{'me': 0, 'gusta': 1, 'comer': 2, 'en': 3, 'la': 4, 'cafeteria': 5, 'Give': 6, 'it': 7, 'to': 8, 'No': 9, 'creo': 10, 'que': 11, 'sea': 12, 'una': 13, 'buena': 14, 'idea': 15, 'is': 16, 'not': 17, 'a': 18, 'good': 19, 'get': 20, 'lost': 21, 'at': 22, 'Yo': 23, 'si': 24, 'on': 25}
Parameter containing:
tensor([[ 0.1011, -0.0866, -0.0380,  0.0921, -0.1846,  0.1176, -0.0403,  0.0998,
          0.0273, -0.0240,  0.0544,  0.0097,  0.0716, -0.0764, -0.0143, -0.0177,
          0.0284, -0.0008,  0.1714,  0.0610, -0.0730, -0.1184, -0.0329, -0.0846,
         -0.0628,  0.0094],
        [ 0.1169,  0.1066, -0.1917,  0.1216,  0.0548,  0.1860,  0.1294, -0.1787,
         -0.1865, -0.0946,  0.1722, -0.0327,  0.0839, -0.0911,  0.1924, -0.0830,
          0.1471,  0.0023, -0.1033,  0.1008, -0.1041,  0.0577, -0.0566, -0.0215,
         -0.1885, -0.0935]], requires_grad=True)
Parameter containing:
tensor([ 0.1064, -0.0477], requires_grad=True)
tensor([[-0.8195, -0.5810]])


In [38]:
label_to_ix = {"SPANISH": 0, "ENGLISH": 1}

In [39]:
# Run on test data before we train, just to see a before-and-after
with torch.no_grad():
    for instance, label in test_data:
        bow_vec = make_bow_vector(instance, word_to_ix)
        log_probs = model(bow_vec)
        print(log_probs)

# Print the matrix column corresponding to "creo"
print(next(model.parameters())[:, word_to_ix["creo"]])

tensor([[-0.6250, -0.7662]])
tensor([[-0.5870, -0.8119]])
tensor([0.0544, 0.1722], grad_fn=<SelectBackward0>)


In [40]:
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

# Usually you want to pass over the training data several times.
# 100 is much bigger than on a real data set, but real datasets have more than
# two instances.  Usually, somewhere between 5 and 30 epochs is reasonable.
for epoch in range(100):
    for instance, label in data:
        # Step 1. Remember that PyTorch accumulates gradients.
        # We need to clear them out before each instance
        model.zero_grad()

        # Step 2. Make our BOW vector and also we must wrap the target in a
        # Tensor as an integer. For example, if the target is SPANISH, then
        # we wrap the integer 0. The loss function then knows that the 0th
        # element of the log probabilities is the log probability
        # corresponding to SPANISH
        bow_vec = make_bow_vector(instance, word_to_ix)
        target = make_target(label, label_to_ix)

        # Step 3. Run our forward pass.
        log_probs = model(bow_vec)

        # Step 4. Compute the loss, gradients, and update the parameters by
        # calling optimizer.step()
        loss = loss_function(log_probs, target)
        loss.backward()
        optimizer.step()

with torch.no_grad():
    for instance, label in test_data:
        bow_vec = make_bow_vector(instance, word_to_ix)
        log_probs = model(bow_vec)
        print(log_probs)

# Index corresponding to Spanish goes up, English goes down!
print(next(model.parameters())[:, word_to_ix["creo"]])

tensor([[-0.1210, -2.1721]])
tensor([[-2.7767, -0.0643]])
tensor([ 0.5004, -0.2738], grad_fn=<SelectBackward0>)
