In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import torch.nn as nn
import torch.nn.utils as U
from torch.utils.data import Dataset, DataLoader
import json
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR

In [3]:
import sys
sys.path.append('./preprocessing')
sys.path.append('./seq2seq')
from processor import Code_Intent_Pairs
from rerank_model import ScoreNet, get_train_loader

### Load Data

In [4]:
with open('./corpus/rerank_data.json', 'r') as f:
    dataset = json.load(f)

In [5]:
hyperP = {
    ## training parameters
    'batch_size': 32,
    'lr': 1e-3,
    'max_epochs': 20,
    'lr_keep_rate': 0.8,  # set to 1.0 to not decrease lr overtime

    ## encoder architecture
    'encoder_layers': 2,
    'encoder_embed_size': 128,
    'encoder_hidden_size': 256,
    'encoder_dropout_rate': 0.3,

    ## visualization
    'print_every': 30
}

In [6]:
trainloader = get_train_loader(dataset, hyperP)

In [7]:
path = 'vocab/'
code_intent_pair = Code_Intent_Pairs()
code_intent_pair.load_dict(path)
word_size = code_intent_pair.get_word_size()
code_size = code_intent_pair.get_code_size()

### Train

In [9]:
model = ScoreNet(word_size, code_size, hyperP).cuda()

optimizer = optim.Adam(model.parameters(), lr=hyperP['lr'], weight_decay=1e-4)
lr_keep_rate = hyperP['lr_keep_rate']
if lr_keep_rate != 1.0:
    lr_reduce_f = lambda epoch: lr_keep_rate ** epoch
    scheduler = LambdaLR(optimizer, lr_lambda=lr_reduce_f)

loss_f = torch.nn.MSELoss()

In [10]:
def train(model, trainloader, optimizer, loss_f, hyperP, e):
    total_len = len(trainloader) * hyperP['batch_size']
    model.train()
    total_loss = 0
    loss_sum = 0
    cnt = 0
    print_every = hyperP['print_every']

    for i, (intents, codes, slot_nums, scores, intents_seq_order, codes_seq_order) in enumerate(trainloader):
        predict_scores = model(intents, codes, slot_nums, intents_seq_order, codes_seq_order)
        loss = loss_f(predict_scores, scores.cuda())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # show stats
        loss_sum += loss.item()
        total_loss += loss.item()
        cnt += hyperP['batch_size']
        if (i + 1) % print_every == 0:
            print('Epoch {}, prograss {}%, \tTrain loss:{}\t'.format(e, 
                100 * cnt / total_len, loss_sum / print_every), end='\r')
            loss_sum = 0
            
    print()

    return total_loss / len(trainloader)

In [11]:
model.load()

In [12]:
losses = []
for e in range(hyperP['max_epochs']):
    loss = train(model, trainloader, optimizer, loss_f, hyperP, e)
    losses.append(loss)
    model.save()
    print('model saved')
    if lr_keep_rate != 1.0:
        scheduler.step()

Epoch 0, prograss 99.57389549226284%, 	Train loss:0.03394944096604983			
model saved
Epoch 1, prograss 99.57389549226284%, 	Train loss:0.02907513485600551			
model saved
Epoch 2, prograss 99.57389549226284%, 	Train loss:0.03227131379147371			
model saved
Epoch 3, prograss 99.57389549226284%, 	Train loss:0.03264158957948287			
model saved
Epoch 4, prograss 99.57389549226284%, 	Train loss:0.030484154323736825		
model saved
Epoch 5, prograss 99.57389549226284%, 	Train loss:0.02767517122750481			
model saved
Epoch 6, prograss 99.57389549226284%, 	Train loss:0.027316372406979404		
model saved
Epoch 7, prograss 99.57389549226284%, 	Train loss:0.02615900772313277			
model saved
Epoch 8, prograss 99.57389549226284%, 	Train loss:0.02730207865436872			
model saved
Epoch 9, prograss 99.57389549226284%, 	Train loss:0.02611682554706931			
model saved
Epoch 10, prograss 99.57389549226284%, 	Train loss:0.021806607178101938		
model saved
Epoch 11, prograss 99.57389549226284%, 	Train loss:0.02287415172