In [1]:
# necessary imports
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
import numpy as np
from torch.utils.data import Dataset, DataLoader
from gensim.models import Word2Vec
from tqdm import tqdm

from encoder_decoder import *
from dataload import *

In [2]:
wv_classical = Word2Vec.load("word_model_paths/classical_wm_lunyu").wv
wv_modern = Word2Vec.load("word_model_paths/modern_wm_lunyu").wv

In [3]:
BS = 64
full_dataset = MyDataset_embed(wm_paths, data_path)

train_in_all = 0.8 # train_in_all is the proportion of the dataset that will be used for training

train_size = int(train_in_all * len(full_dataset))
val_size = len(full_dataset) - train_size

train_dataset, val_dataset = torch.utils.data.random_split(full_dataset, [train_size, val_size])
train_dataloader = DataLoader(train_dataset, batch_size=BS, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=BS, shuffle=True)

In [4]:
next(iter(train_dataloader))

[tensor([[[-0.7334,  0.1840, -0.3414,  ..., -0.9254, -0.0097, -0.1916],
          [-0.3039,  0.8708, -0.9427,  ..., -1.8476,  0.1600,  0.3162],
          [-0.8228, -1.4249, -1.0220,  ..., -0.7598, -0.5913,  0.2913],
          ...,
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],
 
         [[-0.8787, -2.2798,  0.0856,  ..., -0.4607,  0.7535,  0.3063],
          [ 0.0638, -0.0541,  0.9432,  ..., -0.0452, -1.8661,  0.3621],
          [ 2.0901, -1.1587, -0.5705,  ..., -0.8278, -0.6187,  1.0071],
          ...,
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],
 
         [[-0.2556, -0.0378, -0.2397,  ..., -0.4543,  0.4115, -0.2153],
          [-0.2912, -0.5154,

In [4]:
from datetime import datetime as dt

now = str(dt.now())
time_path = now[:10] + "_" + now[11:13] + "_" + now[14:16] + "_" + now[17:19]
print(time_path)
tb_path = "cc/" + time_path
tb_train_path = tb_path + "/train"
tb_val_path = tb_path + "/val"
tb_train_path, tb_val_path

2022-03-27_10_23_01


('cc/2022-03-27_10_23_01/train', 'cc/2022-03-27_10_23_01/val')

In [5]:
from torch.utils.tensorboard import SummaryWriter 
writer_train = SummaryWriter(tb_train_path)
writer_val = SummaryWriter(tb_val_path)

In [6]:
LR = 0.003
num_epoches = 100
net = LSTM_seq2seq(100, 100).to(device)
optimizer = optim.Adam(net.parameters(), lr=LR)
criterion = nn.MSELoss()
ctrl = optim.lr_scheduler.ExponentialLR(optimizer, 0.99)

In [8]:
# Start training!
for epoch in tqdm(range(num_epoches)):
    net.train()
    train_loss = 0
    for data in train_dataloader:
        data, label = data
        optimizer.zero_grad()
        output = net(data)

        if (output.shape[1] > label.shape[1]):
            for k in range(output.shape[1] - label.shape[1]):
                label = torch.cat((label, label[:,-1:,...]), dim=1)
        elif (output.shape[1] < label.shape[1]):
            for k in range(label.shape[1] - output.shape[1]):
                output = torch.cat((output, output[:,-1:,...]), dim=1)

        loss = criterion(output, label)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    train_avg_loss = train_loss / len(train_dataloader)
    writer_train.add_scalar('Loss/Epoch', train_avg_loss, epoch+1) # epoch+1 because epoch starts from 0
    writer_train.flush()
    ctrl.step() # lr decay

    net.eval()
    val_loss = 0
    with torch.no_grad():
        for data in val_dataloader:
            data, label = data
            output = net(data)
            
            if (output.shape[1] > label.shape[1]):
                for k in range(output.shape[1] - label.shape[1]):
                    label = torch.cat((label, label[:,-1:,...]), dim=1)
            elif (output.shape[1] < label.shape[1]):
                for k in range(label.shape[1] - output.shape[1]):
                    output = torch.cat((output, output[:,-1:,...]), dim=1)

            loss = criterion(output, label)
            val_loss += loss.item()

    val_avg_loss = val_loss / len(val_dataloader)
    writer_val.add_scalar('Loss/Epoch', val_avg_loss, epoch+1) # epoch+1 because epoch starts from 0
    writer_val.flush()

print("Training finished!")

100%|██████████| 100/100 [22:04<00:00, 13.24s/it]

Training finished!





In [9]:
# if you want to save your language model...
model_name = input("请输入语言模型的名称：")
model_name = model_name + '_' + time_path
torch.save(net, "nmt_model_paths/" + model_name)

In [3]:
model_name = "论语_2022-03-27_10_23_01"
net = torch.load("nmt_model_paths/" + model_name)

In [28]:
from encoder_decoder import *

In [10]:
to_be_translated = input("Please input the sentence you want to translate: ")
# to_be_translated = "何谓也"
count = 10
# input = [wv_classical[i] for i in input]
# out = net.predict(to_be_translated, 20, wv_modern)
lst = []
for i in to_be_translated:
    lst.append(wv_classical[i])
input_ = torch.tensor(lst)
# 升高一维
input_ = input_.reshape((1, -1, 100))
data, hidden = net.encoder(input_)
for i in range(count):
    y = net.decoder(data, hidden)[0][-1]
    p = y.detach().numpy()
    p = softmax(p)

    idx = np.random.choice(np.arange(100), p=p)
    # idx = torch.argmax(y, dim=0)
    new_word = wv_modern.index_to_key[idx]
    print(new_word, end='')
    data = torch.cat((data, y.reshape((1, 1, 100))), dim=1)


下老回要也这地能于？