In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('./preprocessing')
sys.path.append('./seq2seq')

In [3]:
from processor import Code_Intent_Pairs
from model import Seq2Seq
from data import get_train_loader, get_test_loader

### Define Hyperparameters

In [4]:
hyperP = {
    ## training parameters
    'batch_size' : 32,
    'lr' : 1e-3,
    'teacher_force_rate' : 1.0,
    'max_epochs' : 45,
    'lr_keep_rate' : 0.97,  # set to 1.0 to not decrease lr overtime
    'load_pretrain_code_embed': False,
    'freeze_embed': False,
    
    ## encoder architecture
    'encoder_layers' : 2,
    'encoder_embed_size' : 128,
    'encoder_hidden_size' : 384,
    'encoder_dropout_rate' : 0.3,
    
    ## decoder architecture
    'decoder_layers' : 2,
    'decoder_embed_size' : 128,
    'decoder_hidden_size' : 384,
    'decoder_dropout_rate' : 0.3,
    
    ## attn architecture
    'attn_hidden_size' : 384,
    
    ## visualization
    'print_every': 10,
}

### Load Data

In [5]:
code_intent_pair = Code_Intent_Pairs()

In [6]:
path = 'vocab/'
code_intent_pair.load_dict(path)
special_symbols = code_intent_pair.get_special_symbols()
word_size = code_intent_pair.get_word_size()
code_size = code_intent_pair.get_code_size()

In [7]:
train_path = 'processed_corpus/train.json'
train_entries = code_intent_pair.load_entries(train_path)
code_intent_pair.pad()

In [8]:
trainloader = get_train_loader(train_entries, special_symbols, hyperP)

### Define Model

In [9]:
model = Seq2Seq(word_size, code_size, hyperP)

In [10]:
import torch
if hyperP['load_pretrain_code_embed']:
    model.decoder.embed[0].load_state_dict(torch.load('./pretrain_code_lm/embedding-1556211835.t7'))
    if hyperP['freeze_embed']:
        for param in model.decoder.embed[0].parameters():
            param.requires_grad = False

### Training

In [11]:
import torch
import torch.optim as optim
from torch.optim.lr_scheduler import LambdaLR
optimizer = optim.Adam(model.parameters(), lr=hyperP['lr'])
loss_f = torch.nn.CrossEntropyLoss()

In [12]:
lr_keep_rate = hyperP['lr_keep_rate']
if lr_keep_rate != 1.0:
    lr_reduce_f = lambda epoch: lr_keep_rate ** epoch
    scheduler = LambdaLR(optimizer, lr_lambda=lr_reduce_f)

In [13]:
def train(model, trainloader, optimizer, loss_f, hyperP):
    model.train()
    total_loss = 0
    loss_sum = 0
    total_correct = 0
    size = 0
    print_every = hyperP['print_every']
    
    for i, (inp_seq, original_out_seq, padded_out_seq, out_lens) in enumerate(trainloader):
        logits = model(inp_seq, padded_out_seq, out_lens)
        loss = loss_f(logits, original_out_seq)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # show stats
        loss_sum += loss.item()
        total_loss += loss.item()
        _, predictions = torch.max(logits, dim=1)
        total_correct += (predictions == original_out_seq).sum()
        size += len(original_out_seq)

        if (i+1) % print_every == 0:
            print('Train: loss:{}\tacc:{}'.format(loss_sum/print_every, float(total_correct)/size), end='\r')
            loss_sum = 0
            total_correct = 0
            size = 0
    print()
    return total_loss / len(trainloader)

In [14]:
best_acc = 0.0

In [16]:
losses = []
teacher_force_rate = hyperP['teacher_force_rate']
for e in range(hyperP['max_epochs']):
    loss = train(model, trainloader, optimizer, loss_f, hyperP)
    losses.append(loss)
    if lr_keep_rate != 1.0:
        scheduler.step()
        
    # change teacher force rate
    teacher_force_rate = max(0.7, 0.99 * teacher_force_rate)
    model.change_teacher_force_rate(teacher_force_rate)
    
    if e == 19:
        model.save('model_20.t7')
        print('model saved')
    elif e == 29:
        model.save('model_30.t7')
        print('model saved')
    elif e == 39:
        model.save('model_40.t7')
        print('model saved')
    elif e == 44:
        model.save('model_45.t7')
        print('model saved')
    elif e == 49:
        model.save('model_50.t7')
        print('model saved')

Train: loss:2.6172940731048584	acc:0.40561285680019194
Train: loss:2.093934726715088	acc:0.490765171503957766
Train: loss:1.7993767738342286	acc:0.54065723195010864
Train: loss:1.6287524700164795	acc:0.5713600383785086
Train: loss:1.509039855003357	acc:0.58431278484048932
Train: loss:1.395941424369812	acc:0.61813384504677382
Train: loss:1.2572482585906983	acc:0.6380426960901895
Train: loss:1.1904706418514253	acc:0.6531542336291677
Train: loss:1.050810205936432	acc:0.68049892060446158
Train: loss:0.9978546142578125	acc:0.7011273686735429
Train: loss:0.8884929299354554	acc:0.7294315183497242
Train: loss:0.7955094099044799	acc:0.7555768769489086
Train: loss:0.7768161058425903	acc:0.7673302950347806
Train: loss:0.6650876104831696	acc:0.7970736387622931
Train: loss:0.7694241106510162	acc:0.7824418325737587
Train: loss:0.7301493734121323	acc:0.8076277284720557
Train: loss:0.5397615402936935	acc:0.8392899976013433
Train: loss:0.48029122650623324	acc:0.8615975053969777
Train: loss:0.5293080121

In [None]:
teacher_force_rate = 0.7
model.change_teacher_force_rate(teacher_force_rate)
for e in range(50):
    loss = train(model, trainloader, optimizer, loss_f, hyperP)
    losses.append(loss)
    
    if e == 9:
        model.save('model_60.t7')
        print('model saved')
    elif e == 19:
        model.save('model_70.t7')
        print('model saved')
    elif e == 29:
        model.save('model_80.t7')
        print('model saved')
    elif e == 39:
        model.save('model_90.t7')
        print('model saved')
    elif e == 44:
        model.save('model_95.t7')
        print('model saved')
    elif e == 49:
        model.save('model_100.t7')
        print('model saved')