In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('./preprocessing')
sys.path.append('./seq2seq')

In [3]:
from processor import Code_Intent_Pairs
from model import Seq2Seq
from data import get_train_loader, get_test_loader

### Define Hyperparameters

In [18]:
hyperP = {
    ## training parameters
    'batch_size' : 32,
    'lr' : 1e-3,
    'teacher_force_rate' : 0.90,
    'max_epochs' : 24,
    'lr_keep_rate' : 0.95,  # set to 1.0 to not decrease lr overtime
    'load_pretrain_code_embed': False,
    'freeze_embed': False,
    
    ## encoder architecture
    'encoder_layers' : 2,
    'encoder_embed_size' : 128,
    'encoder_hidden_size' : 384,
    'encoder_dropout_rate' : 0.3,
    
    ## decoder architecture
    'decoder_layers' : 2,
    'decoder_embed_size' : 128,
    'decoder_hidden_size' : 384,
    'decoder_dropout_rate' : 0.3,
    
    ## attn architecture
    'attn_hidden_size' : 384,
    
    ## visualization
    'print_every': 10,
}

### Load Data

In [19]:
code_intent_pair = Code_Intent_Pairs()

In [20]:
path = 'vocab/'
code_intent_pair.load_dict(path)
special_symbols = code_intent_pair.get_special_symbols()
word_size = code_intent_pair.get_word_size()
code_size = code_intent_pair.get_code_size()

In [21]:
train_path = 'processed_corpus/train.json'
train_entries = code_intent_pair.load_entries(train_path)
code_intent_pair.pad()

In [22]:
trainloader = get_train_loader(train_entries, special_symbols, hyperP)

### Define Model

In [23]:
model = Seq2Seq(word_size, code_size, hyperP)

In [24]:
import torch
if hyperP['load_pretrain_code_embed']:
    model.decoder.embed[0].load_state_dict(torch.load('./pretrain_code_lm/embedding-1556211835.t7'))
    if hyperP['freeze_embed']:
        for param in model.decoder.embed[0].parameters():
            param.requires_grad = False

In [25]:
# model = model.cuda()

In [26]:
# inp_seq, original_out_seq, padded_out_seq, out_lens = next(iter(trainloader))

In [27]:
# logits = model(inp_seq, padded_out_seq, out_lens)

### Training

In [28]:
import torch
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
optimizer = optim.Adam(model.parameters(), lr=hyperP['lr'], weight_decay = 1e-4)
loss_f = torch.nn.CrossEntropyLoss()

In [29]:
lr_keep_rate = hyperP['lr_keep_rate']
if lr_keep_rate != 1.0:
    lr_reduce_f = lambda epoch: lr_keep_rate ** epoch
    scheduler = LambdaLR(optimizer, lr_lambda=lr_reduce_f)

In [30]:
def train(model, trainloader, optimizer, loss_f, hyperP):
    model.train()
    total_loss = 0
    loss_sum = 0
    total_correct = 0
    size = 0
    print_every = hyperP['print_every']
    
    for i, (inp_seq, original_out_seq, padded_out_seq, out_lens) in enumerate(trainloader):
        logits = model(inp_seq, padded_out_seq, out_lens)
        loss = loss_f(logits, original_out_seq)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # show stats
        loss_sum += loss.item()
        total_loss += loss.item()
        _, predictions = torch.max(logits, dim=1)
        total_correct += (predictions == original_out_seq).sum()
        size += len(original_out_seq)

        if (i+1) % print_every == 0:
            print('Train: loss:{}\tacc:{}'.format(loss_sum/print_every, float(total_correct)/size), end='\r')
            loss_sum = 0
            total_correct = 0
            size = 0
    print()
    return total_loss / len(trainloader)

In [31]:
best_acc = 0.0

In [35]:
losses = []
for e in range(hyperP['max_epochs']):
    loss = train(model, trainloader, optimizer, loss_f, hyperP)
    losses.append(loss)
    model.save()
    print('model saved')
    if lr_keep_rate != 1.0:
        scheduler.step()

Train: loss:0.2351858139038086	acc:0.94243223794674988
model saved
Train: loss:0.22016897201538085	acc:0.9438714319980811
model saved
Train: loss:0.2359562799334526	acc:0.94483089469896858
model saved
Train: loss:0.21127869188785553	acc:0.9529863276565124
model saved
Train: loss:0.18552034795284272	acc:0.9637802830414968
model saved
Train: loss:0.20184051096439362	acc:0.9563444471096186
model saved
Train: loss:0.16487321853637696	acc:0.9688174622211562
model saved
Train: loss:0.17498648911714554	acc:0.9633005516910538
model saved
Train: loss:0.1664627216756344	acc:0.96905732789637834
model saved
Train: loss:0.21169093251228333	acc:0.9546204620462047

KeyboardInterrupt: 

In [None]:
model.load()

### Decoding

In [27]:
from decoder import Decoder
from decoder import post_process_dummy
from evaluate import get_bleu_all, get_bleu_sent

In [28]:
beam_decoder = Decoder(model)

In [29]:
model.eval()
sos = special_symbols['code_sos']
eos = special_symbols['code_eos']
unk = special_symbols['code_unk']

In [None]:
test_path = 'processed_corpus/test.json'
test_entries = code_intent_pair.load_entries(test_path)
testloader = get_test_loader(test_entries)

#### Decoding Result Using Beam Search Alone

In [50]:
dummy_code_list = []
true_code_list = []

for i, (src_seq, slot_map, code, intent) in enumerate(testloader):
    beams = beam_decoder.decode(src_seq, sos, eos, unk, beam_width=3)
    dummy_code =  post_process_dummy(slot_map, beams, code_intent_pair.idx2code)
    dummy_code_list.append(dummy_code)
    true_code_list.append(code)

In [51]:
get_bleu_all(dummy_code_list, true_code_list)

0.2686967398716274