# Import Modules

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from data_loader import generate_conversation_dataset
from trans_model.torch_transformer import TransformerTransModel, generate_padding_mask, generate_square_subsequent_mask
from datetime import datetime
from utils import translate, save_checkpoint, load_checkpoint

print("PyTorch version:[%s]." % (torch.__version__))
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print("device:[%s]." % (device))

PyTorch version:[1.8.1+cu111].
device:[cuda:0].


# Generate Dataset

In [5]:
batch_size = 3
random_seed = 0

train_set, validation_set, test_set = generate_conversation_dataset("../raw_data/conversations.csv", batch_size=batch_size, seed=random_seed, validation_ratio=0, test_ratio=0)

  0%|          | 5/4563 [00:00<01:52, 40.65it/s]Torkenizing and generating vocabs...
100%|██████████| 4563/4563 [00:49<00:00, 91.84it/s]Spliting dataset...
Indexing Senteces (Test set)...
Indexing Senteces (Validation set)...
Indexing Senteces (Train set)...



In [6]:
for item in train_set:
    print(item)
    break

(tensor([[   2,    2],
        [3674,  590],
        [   9, 2661],
        [  34,   67],
        [3674, 3194],
        [   9, 4179],
        [  43,  231],
        [3674,  627],
        [1990,  893],
        [2994, 4153],
        [1442, 3237],
        [2967, 2047],
        [4103, 4103],
        [  88, 2641],
        [1990, 2563],
        [2994,   13],
        [  56, 2047],
        [3674, 4103],
        [1990,  627],
        [3035,  744],
        [1263, 3035],
        [4103, 3579],
        [2596, 3194],
        [  13, 4179],
        [ 947, 3035],
        [1362, 1265],
        [1423,  289],
        [   9, 3035],
        [2057, 2028],
        [ 127, 1605],
        [3434, 2985],
        [2994, 1322],
        [3428, 2994],
        [4103, 2282],
        [ 378, 2928],
        [4023, 4103],
        [   9,  893],
        [2075, 1637],
        [ 691, 3035],
        [4103,  107],
        [ 893, 2216],
        [1990, 1090],
        [4251, 3183],
        [3587,  252],
        [  13,   13],
        [

# Define Models and Hyperparameters

In [None]:
num_epochs = 32
learning_rate = 1e-4

src_vocab_size = len(train_set.kor2idx)
trg_vocab_size = len(train_set.eng2idx)
d_model = 512
num_heads = 8
num_encoder_layers = 3
num_decoder_layers = 3
dim_feed_forward = 2048
dropout_rate = 0.1
padding_vocab = train_set.kor2idx["<pad>"]

model = TransformerTransModel(
    src_vocab_size, trg_vocab_size,
    d_model=d_model,
    num_heads=num_heads,
    num_encoder_layers=num_encoder_layers,
    num_decoder_layers=num_decoder_layers,
    dim_feed_forward=dim_feed_forward,
    dropout=dropout_rate,
    padding_vocab_index=padding_vocab
).to(device)


load_model = True
save_model = True

optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss(ignore_index=padding_vocab)

if load_model:
    load_checkpoint(torch.load("./checkpoint/checkpoint_last.pth.tar"), model, optimizer)

# Test Translation

학습이 안되었으므로 당연히 제대로 결과가 나오지 않는다.

In [None]:
output = translate(model, "안녕하세요? 또 뵙는군요.", train_set.kor2idx, train_set.eng2idx, train_set.idx2eng, device=device, max_length=16)
print()
print("Final Output: ")
print(output)
print(len(output))

# Training

In [None]:
losses = []

model.train()
for epoch in range(num_epochs):
    print(f"Start Epoch {epoch}: {datetime.now()}")
    for index, batch in enumerate(train_set):
        source = batch[0].to(device)
        target = batch[1].to(device)        

        optimizer.zero_grad()

        output = model(source, target[:-1, :])

        output = output.reshape(-1, output.shape[2])
        target = target[1:].reshape(-1)

        loss = criterion(output, target)
        losses.append(loss.item())

        loss.backward()
        optimizer.step()

    # if save_model:
    #     checkpoint = {
    #         "state_dict": model.state_dict(),
    #         "optimizer": optimizer.state_dict(),
    #     }
    #     save_checkpoint(checkpoint, filename=f"./checkpoint/checkpoint_epoch_{epoch}.pth.tar")
    
    if epoch % 10 == 9:
        print(f"Loss({epoch} / {num_epochs}): {loss}")
        print(f"End: {datetime.now()}")
        print("\n=================================\n")


if save_model:
    checkpoint = {
        "state_dict": model.state_dict(),
        "optimizer": optimizer.state_dict(),
        "kor2idx": train_set.kor2idx
    }
    save_checkpoint(checkpoint, filename="./checkpoint/checkpoint_last.pth.tar")

# Check Result

In [None]:
test_text = "오늘 수업은 뭔가요?"
output = translate(model, test_text, train_set.kor2idx, train_set.eng2idx, train_set.idx2eng, device=device, max_length=16)
print()
print("Input: ")
print(test_text)
print()
print("Final Output: ")
print(output)
# running on entire test data takes a while

# 해야 할 일

Load가 이루어지면 vocab 사전도 함께 로드할 수 있도록 data_loader를 수정

Save시 vocab이 유지될 수 있도록 수정

BLEU Score 함수 작성 

프론트엔드, 백엔드 작성 (새 프로젝트)
