In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
torch.manual_seed(1)

<torch._C.Generator at 0x1bbbcfb21d0>

In [10]:
lin=nn.Linear(5,3)
data=torch.rand(2,5)
print(lin(data))

tensor([[-0.1146,  0.3167,  0.1321],
        [-0.2446,  0.2364, -0.0377]], grad_fn=<AddmmBackward0>)


In [11]:
print(lin.weight)

Parameter containing:
tensor([[ 0.2304, -0.1974, -0.0867,  0.2099, -0.4210],
        [ 0.2682, -0.0920,  0.2275,  0.0622, -0.0548],
        [ 0.1240,  0.0221,  0.1633, -0.1743, -0.0326]], requires_grad=True)


In [12]:
print(data)

tensor([[0.9371, 0.6556, 0.3138, 0.1980, 0.4162],
        [0.2843, 0.3398, 0.5239, 0.7981, 0.7718]])


In [13]:
print(lin.bias)

Parameter containing:
tensor([-0.0403,  0.0648, -0.0018], requires_grad=True)


In [14]:
data=torch.randn(2,2)
print(data)
print(F.relu(data))

tensor([[ 0.4224,  0.2673],
        [-0.4212, -0.5107]])
tensor([[0.4224, 0.2673],
        [0.0000, 0.0000]])


In [18]:
data=torch.randn(5)
print(data)
print(F.softmax(data,dim=0))
print(F.softmax(data,dim=0).sum())
print(F.log_softmax(data,dim=0))

tensor([-2.2590,  0.6067, -0.1383,  0.8310, -0.2477])
tensor([0.0177, 0.3117, 0.1480, 0.3900, 0.1326])
tensor(1.0000)
tensor([-4.0316, -1.1659, -1.9109, -0.9415, -2.0203])


In [None]:
data = [("me gusta comer en la cafeteria".split(), "SPANISH"),
        ("Give it to me".split(), "ENGLISH"),
        ("No creo que sea una buena idea".split(), "SPANISH"),
        ("No it is not a good idea to get lost at sea".split(), "ENGLISH")]

test_data = [("Yo creo que si".split(), "SPANISH"),
             ("it is lost on me".split(), "ENGLISH")]

word_to_ix={}
for sent,_ in data+test_data:
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word]=len(word_to_ix)
print(word_to_ix)

VOCAB_SIZE =len(word_to_ix)
NUM_LABELS=2

class BowClassifier(nn.Module):
    
    def __init__(self,num_labels,vocab_size):
        super(BowClassifier,self).__init__()
        #input:vocab_size, output:num_labels
        self.linear=nn.Linear(vocab_size,num_labels)

    def forward(self,bow_vec):
        #Why to set dim =1, reference https://discuss.pytorch.org/t/how-to-choose-dim-0-1-for-softmax-or-logsoftmax/52676
        return F.log_softmax(self.linear(bow_vec),dim=1)
    
def make_bow_vector(sentence,word_to_ix):
    vec=torch.zeros(len(word_to_ix))
    for word in sentence:
        vec[word_to_ix[word]]+=1
    return vec.view(1,-1)

def make_target(label,label_to_ix):
    return torch.LongTensor([label_to_ix[label]])

model=BowClassifier(NUM_LABELS,VOCAB_SIZE)
for param in model.parameters():
    print(param)

with torch.no_grad():
    sample=data[0]
    bow_vector=make_bow_vector(sample[0],word_to_ix)
    log_probs=model(bow_vector)
    print(log_probs)


{'me': 0, 'gusta': 1, 'comer': 2, 'en': 3, 'la': 4, 'cafeteria': 5, 'Give': 6, 'it': 7, 'to': 8, 'No': 9, 'creo': 10, 'que': 11, 'sea': 12, 'una': 13, 'buena': 14, 'idea': 15, 'is': 16, 'not': 17, 'a': 18, 'good': 19, 'get': 20, 'lost': 21, 'at': 22, 'Yo': 23, 'si': 24, 'on': 25}
Parameter containing:
tensor([[ 0.1887,  0.1260, -0.0973, -0.1513,  0.1100, -0.1074,  0.0900, -0.1680,
          0.1431,  0.0588, -0.0225,  0.0899, -0.1396, -0.0429,  0.0019, -0.1133,
         -0.0501, -0.0846,  0.1869,  0.1666,  0.1691,  0.1603, -0.1878,  0.0656,
          0.0723, -0.1438],
        [ 0.0795, -0.1064,  0.0312,  0.0650,  0.1692, -0.1029, -0.1543,  0.1815,
          0.0823,  0.0953,  0.0706, -0.0694,  0.1180, -0.1712,  0.0273,  0.0887,
          0.1340, -0.0979, -0.1326,  0.0735, -0.1557, -0.1630, -0.1764, -0.0951,
         -0.0374, -0.1161]], requires_grad=True)
Parameter containing:
tensor([-0.0829, -0.1737], requires_grad=True)
tensor([[-0.6813, -0.7051]])


In [38]:
label_to_ix={"SPANISH":0,"ENGLISH":1}

In [39]:
# Run on test data before we train, just to see a before-and-after
with torch.no_grad():
    for instance, label in test_data:
        bow_vec = make_bow_vector(instance, word_to_ix)
        log_probs = model(bow_vec)
        print(log_probs)

print(next(model.parameters())[:,word_to_ix['creo']])

#How to choose correct loss function?
loss_function=nn.NLLLoss()
#How to choose corresponding optimizer?
optimizer=optim.SGD(model.parameters(),lr=0.1)

for epoch in range(100):
    for instance,label in data:
        # Step 1. Remember that PyTorch accumulates gradients.
        # We need to clear them out before each instance
        model.zero_grad()

        #Input Preparation
        bow_vec=make_bow_vector(instance,word_to_ix)
        target=make_target(label,label_to_ix)

        #Forwad
        log_probs=model(bow_vec)

        loss=loss_function(log_probs,target)
        #Calculate gradient descent?
        loss.backward()
        optimizer.step()

with torch.no_grad():
    for instance, label in test_data:
        bow_vec = make_bow_vector(instance, word_to_ix)
        log_probs = model(bow_vec)
        print(log_probs)

print(next(model.parameters())[:,word_to_ix["creo"]])





tensor([[-0.5021, -0.9295]])
tensor([[-0.7124, -0.6743]])
tensor([-0.0225,  0.0706], grad_fn=<SelectBackward0>)
tensor([[-0.0856, -2.5002]])
tensor([[-2.8329, -0.0606]])
tensor([ 0.4310, -0.3830], grad_fn=<SelectBackward0>)
