In [1]:
# necessary imports
import torch
from torch import nn
from torch.utils.data import DataLoader
from torch import optim
from tqdm import tqdm
from dataload import MyDataset
from lgg_model import vanilla_LSTM

In [2]:
# generate dataset and dataloader
BS = 32
input_word_count = 10
data_path = input("请输入语料库文件名(无需添加后缀)：")
data_path = "texts/" + data_path + '.txt'
my_dataset = MyDataset(data_path, input_word_count, save_word_model=True)
my_dataloader = DataLoader(my_dataset, batch_size=BS, shuffle=True)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
# test
my_dataset.vocabulary_length

709

In [4]:
from torch.utils import tensorboard
writer = tensorboard.SummaryWriter()

In [5]:
# some components in the training process
LR = 0.001 # the learning rate of 0.001 is still too large, maybe needs lr_decay or batch_norm
num_epoches = 200
net = vanilla_LSTM(my_dataset.vocabulary_length, 100, 100, 4).to(device)
optimizer = optim.Adam(net.parameters(), lr=LR)
criterion = nn.CrossEntropyLoss()

# 2022/2/27 add a lr decay controller
ctrl = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.98)

# start training!
for epoch in tqdm(range(num_epoches)):
    for i, data in enumerate(my_dataloader):
        data = data.to(device)
        data = data.to(torch.long)
        label = data[:,1:]
        out = net(data)[:,:-1,:]
        out = torch.transpose(out, 2, 1)

        optimizer.zero_grad()
        loss = criterion(out, label)
        loss.backward()
        optimizer.step()

    writer.add_scalar("Loss/Epoch", loss.item(), epoch+1)
    writer.flush()
    ctrl.step() # lr decay
    
print("Finish training!")

100%|██████████| 200/200 [30:36<00:00,  9.18s/it]

Finish training!





In [7]:
# if you want to save your language model...
str = input("请输入语言模型的名称：")
torch.save(net, "lgg_model_paths/"+str)