# MCM 2020

In [50]:
from utils import MyData
from torch import nn, Tensor
import torch
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader

In [51]:
batch_size = 64
learning_rate = 1e-1
num_epochs = 10

class_num = 100

In [52]:
data = MyData('1.tsv')
train_data = data.train_data()
test_data = data.test_data()

In [53]:
def collate_fn(batch):
    sentence_matrix = [Tensor(item[0]).t() for item in batch]
    labels = [item[1]-1 for item in batch]
    labels = torch.Tensor(labels).long()
    return [sentence_matrix,labels]

train_loader = DataLoader(train_data, batch_size=128, shuffle=True,collate_fn=collate_fn)
test_loader = DataLoader(test_data, batch_size=128, shuffle=True,collate_fn=collate_fn)

In [56]:
# 线性模型实现
class LinerClassifier(nn.Module):
    def __init__(self):
        super(LinerClassifier, self).__init__()
        self.L1 = nn.Sequential(
            nn.Linear(100,20),
            nn.ReLU(True))
        self.L2 = nn.Sequential(
            nn.Linear(20,5),
            nn.LogSoftmax())
    
    def forward(self, x):
        sum_matrix = Tensor([])
        for each in x:
            # print(each.size())
            sum_matrix = torch.cat((sum_matrix, each.sum(1).reshape(1, 100)), 0)
        x = self.L1(sum_matrix)
        x = self.L2(x)
        return x

In [67]:
# LSTM模型实现
class LSTMClassifier(nn.Module):
    def __init__(self):
        super(LSTMClassifier, self).__init__()
        self.L1 = nn.LSTM(
            input_size=class_num,
            hidden_size=16,
            num_layers=1,
            batch_first=True,
            bias=False
        )
        self.L2 = nn.Sequential(
            nn.Linear(16 ,5),
            nn.LogSoftmax()
        )
    
    def forward(self, x):
        x = [each.t() for each in x]
        x = pad_sequence(x, batch_first=True, padding_value=0)
        output,(h_n,c_n) = self.L1(x)
        # print(output.size())
        output_in_last_timestep=h_n[-1,:,:]
        x = self.L2(output_in_last_timestep)
        return x

In [68]:
model = LSTMClassifier()
# criterion = nn.MSELoss()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

acc = []

for epoch in range(num_epochs):
    print('*' * 10)
    print(f'epoch {epoch+1}')
    running_loss, running_acc = .0, .0
    for sentence_matrixs, label in train_loader:
        out = model(sentence_matrixs)
        # print(out)
        # print(out.size())
        loss = criterion(out, label)
        running_loss += loss.item()
        _, pred = torch.max(out, 1)
        running_acc += (pred == label).float().mean()
        # 向后传播
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f'Train Loss: {running_loss/len(train_loader):.6f}, Acc: {running_acc/len(train_loader):.6f}')
    model.eval()
    eval_loss, eval_acc = .0, .0
    for sentence_matrixs, label in test_loader:
        with torch.no_grad():
            out = model(sentence_matrixs)
            loss = criterion(out, label)
        eval_loss += loss.item()
        _, pred = torch.max(out, 1)
        eval_acc += (pred == label).float().mean()
    acc.append(eval_acc/len(test_loader))
    print(f'Test Loss: {eval_loss/len(test_loader):.6f}, Acc: {eval_acc/len(test_loader):.6f}\n')
    
# 保存模型
torch.save(model.state_dict(), './ReNet-5.pth')

**********
epoch 1


KeyboardInterrupt: 

In [63]:
# from word2vec import load_model

w2v_model = load_model()
text = """
Great while it worked	This was a great hair dryer....for 8 months.  Yesterday the heating element stopped working and now it only blows cool air.  I thought since it was a Revlon product and it wasn't cheap that it would last for years.  I was wrong :(
"""
matrix = [Tensor([w2v_model[each] for each in text.split(' ') if each in w2v_model.wv.vocab]).t()]
# print(matrix.size())
out = model(matrix)
print(out)
_, pred = torch.max(out, 1)
print(pred)

tensor([[-2.2246, -2.6913, -2.2769, -1.6287, -0.6438]],
       grad_fn=<LogSoftmaxBackward>)
tensor([4])
  import sys


In [45]:
for sentence_matrixs, label in train_loader:
    print(sentence_matrixs[0].size())

torch.Size([100, 13])
torch.Size([100, 19])
torch.Size([100, 11])
torch.Size([100, 86])
torch.Size([100, 57])
torch.Size([100, 23])
torch.Size([100, 55])
torch.Size([100, 180])
torch.Size([100, 22])
torch.Size([100, 24])
torch.Size([100, 24])
torch.Size([100, 34])
torch.Size([100, 157])
torch.Size([100, 4])
torch.Size([100, 38])
torch.Size([100, 62])
torch.Size([100, 162])
torch.Size([100, 106])
torch.Size([100, 107])
torch.Size([100, 67])
torch.Size([100, 7])
torch.Size([100, 15])
torch.Size([100, 29])
torch.Size([100, 36])
torch.Size([100, 38])
torch.Size([100, 36])
torch.Size([100, 9])
torch.Size([100, 59])
torch.Size([100, 16])
torch.Size([100, 58])
torch.Size([100, 34])
torch.Size([100, 11])
torch.Size([100, 190])
torch.Size([100, 45])
torch.Size([100, 39])
torch.Size([100, 11])
torch.Size([100, 25])
torch.Size([100, 8])
torch.Size([100, 58])
torch.Size([100, 66])
torch.Size([100, 49])
torch.Size([100, 212])
torch.Size([100, 13])
torch.Size([100, 145])
torch.Size([100, 17])
torch.

* 1
* 1
* 1
* 1
* 1
* 1
* 1
* 1
* 1
* 1
* 1
* 1
* 1
* 1
* 1
* 1
* 1
* 1
* 1
* 1
* 1
* 1
* 1
* 1
* 1
* 1
* 1
