## LSTM

In [None]:
from utils.MyData import MyData
from utils.tools import list_to_one_hot
from torch import nn, Tensor
import torch
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader

In [None]:
batch_size = 64
learning_rate = 1e-1
num_epochs = 10

class_num = 21284

In [None]:
data = MyData('./wbqg3541/train.csv')
train_data = data.train_data()
test_data = data.test_data()

def collate_fn(batch):
    sentence_matrix = [list_to_one_hot(item[0]) for item in batch]
    labels = [item[1] for item in batch]
    labels = torch.Tensor(labels).long()
    return [sentence_matrix,labels]

train_loader = DataLoader(train_data, batch_size=128, shuffle=True,collate_fn=collate_fn)
test_loader = DataLoader(test_data, batch_size=128, shuffle=True,collate_fn=collate_fn)

In [None]:
# 线性模型实现
class LinerClassifier(nn.Module):
    def __init__(self):
        super(LinerClassifier, self).__init__()
        self.L1 = nn.Sequential(
            nn.Linear(21284,20),
            nn.ReLU(True))
        self.L2 = nn.Sequential(
            nn.Linear(20,2),
            nn.LogSoftmax())
    
    def forward(self, x):
        # 这里的x是一个长度为batchsize的list，
        # list中的每一个元素是class_num * sentence_len的one-hot矩阵
        # 因为这是一个线性模型，不是一个不定长序列模型
        # 我们将矩阵压缩到class_num * 1的向量sentence_v
        # 其中i元素的值代表第i个单词在句子中出现的次数
        # 将batch中的sentence_v组成一个新的矩阵sum_matrix作为新的输入
        sum_matrix = Tensor([])
        for each in x:
            sum_matrix = torch.cat((sum_matrix, each.sum(1).reshape(1, class_num)), 0)
        x = self.L1(sum_matrix)
        x = self.L2(x)
        return x

In [None]:
# LSTM模型实现
class LSTMClassifier(nn.Module):
    def __init__(self):
        super(LSTMClassifier, self).__init__()
        self.L1 = nn.LSTM(
            input_size=class_num,
            hidden_size=16,
            num_layers=1,
            batch_first=True,
            bias=False
        )
        self.L2 = nn.Sequential(
            nn.Linear(16 ,2),
            nn.LogSoftmax()
        )
    
    def forward(self, x):
        x = [each.t() for each in x]
        x = pad_sequence(x, batch_first=True, padding_value=0)
        output,(h_n,c_n) = self.L1(x)
        # print(output.size())
        output_in_last_timestep=h_n[-1,:,:]
        x = self.L2(output_in_last_timestep)
        return x

In [None]:
for sentence_matrixs, label in train_loader:
    sentence_matrixs = [each.t() for each in sentence_matrixs]
    sentence_matrixs = pad_sequence(sentence_matrixs, batch_first=True, padding_value=0)
    print(sentence_matrixs.size())
    break

In [None]:
model = LSTMClassifier()

for sentence_matrixs, label in train_loader:
    print(model(sentence_matrixs).size())
    break

In [None]:
model = LSTMClassifier()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

acc = []

for epoch in range(num_epochs):
    print('*' * 10)
    print(f'epoch {epoch+1}')
    running_loss, running_acc = .0, .0
    for sentence_matrixs, label in train_loader:
        out = model(sentence_matrixs)
        loss = criterion(out, label)
        running_loss += loss.item()
        _, pred = torch.max(out, 1)
        running_acc += (pred == label).float().mean()
        # 向后传播
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f'Train Loss: {running_loss/len(train_loader):.6f}, Acc: {running_acc/len(train_loader):.6f}')
    model.eval()
    eval_loss, eval_acc = .0, .0
    for sentence_matrixs, label in test_loader:
        with torch.no_grad():
            out = model(sentence_matrixs)
            loss = criterion(out, label)
        eval_loss += loss.item()
        _, pred = torch.max(out, 1)
        eval_acc += (pred == label).float().mean()
    acc.append(eval_acc/len(test_loader))
    print(f'Test Loss: {eval_loss/len(test_loader):.6f}, Acc: {eval_acc/len(test_loader):.6f}\n')
    
# 保存模型
torch.save(model.state_dict(), './ReNet-5.pth')