In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('./preprocessing')
sys.path.append('./seq2seq')

In [3]:
from processor import Code_Intent_Pairs
from model import Seq2Seq
from data import get_test_loader

### Get Data Loader

In [4]:
code_intent_pair = Code_Intent_Pairs()

In [5]:
path = 'vocab/'
code_intent_pair.load_dict(path)
special_symbols = code_intent_pair.get_special_symbols()
word_size = code_intent_pair.get_word_size()
code_size = code_intent_pair.get_code_size()

In [6]:
test_path = 'processed_corpus/test.json'
test_entries = code_intent_pair.load_entries(test_path)

In [7]:
testloader = get_test_loader(test_entries)

### Get Model

In [8]:
hyperP = {
    ## training parameters
    'batch_size' : 32,
    'lr' : 1e-3,
    'teacher_force_rate' : 0.90,
    'max_epochs' : 50,
    'lr_keep_rate' : 0.95,  # set to 1.0 to not decrease lr overtime
    'load_pretrain_code_embed': False,
    'freeze_embed': False,
    
    ## encoder architecture
    'encoder_layers' : 2,
    'encoder_embed_size' : 128,
    'encoder_hidden_size' : 384,
    'encoder_dropout_rate' : 0.3,
    
    ## decoder architecture
    'decoder_layers' : 2,
    'decoder_embed_size' : 128,
    'decoder_hidden_size' : 384,
    'decoder_dropout_rate' : 0.3,
    
    ## attn architecture
    'attn_hidden_size' : 384,
    
    ## visualization
    'print_every': 10,
}

In [9]:
model = Seq2Seq(word_size, code_size, hyperP)

In [10]:
import torch
if hyperP['load_pretrain_code_embed']:
    model.decoder.embed[0].load_state_dict(torch.load('./pretrain_code_lm/embedding-1556211835.t7'))
    if hyperP['freeze_embed']:
        for param in model.decoder.embed[0].parameters():
            param.requires_grad = False

In [11]:
model.load()

### Test Decoding

In [17]:
from decoder import Decoder
from decoder import post_process_test, post_process_hand
from decoder import post_process_dummy, post_process_model
from evaluate import get_bleu_all, get_bleu_sent

In [18]:
beam_decoder = Decoder(model)

In [19]:
model.eval()
sos = special_symbols['code_sos']
eos = special_symbols['code_eos']
unk = special_symbols['code_unk']

In [20]:
idx2code = code_intent_pair.idx2code

In [26]:
intent2idx = code_intent_pair.intent2idx

#### Beam Search Results

In [15]:
dummy_code_list = []
true_code_list = []

for i, (src_seq, slot_map, code, intent) in enumerate(testloader):
    beams = beam_decoder.decode(src_seq, sos, eos, unk, beam_width=3)
    dummy_code =  post_process_dummy(slot_map, beams, idx2code)
    dummy_code_list.append(dummy_code)
    true_code_list.append(code)

In [16]:
get_bleu_all(dummy_code_list, true_code_list)

0.23920585053370877

#### Rerank with Hand Features

In [21]:
hand_code_list = []
true_code_list = []

for i, (src_seq, slot_map, code, intent) in enumerate(testloader):
    beams = beam_decoder.decode(src_seq, sos, eos, unk, beam_width=3)
    hand_code =  post_process_hand(intent, slot_map, beams, idx2code)
    hand_code_list.append(hand_code)
    true_code_list.append(code)

In [22]:
get_bleu_all(hand_code_list, true_code_list)

0.2572834545025351

#### Rerank with Neural Model

In [30]:
from processor import process_intent


model_code_list = []
true_code_list = []

for i, (src_seq, _, code, intent) in enumerate(testloader):
    beams = beam_decoder.decode(src_seq, sos, eos, unk, beam_width=3)
    model_code =  post_process_model(intent, beams, idx2code, None, process_intent, intent2idx)
    
    if i == 1:
        break
    
#     model_code_list.append(model_code)
#     true_code_list.append(code)

[([433, 3, 433, 5, 8, 17, 174, 283], [114, 22, 441, 1, 29, 17], 3), ([433, 3, 433, 5, 8, 17, 174, 283], [114, 22, 441, 22, 242, 1, 29, 17], 3), ([433, 3, 433, 5, 8, 17, 174, 283], [114, 22, 120, 1, 29, 17], 3)]
[([178, 3, 115, 13, 5, 8, 179, 155], [25, 22, 242, 1, 101, 17], 0), ([178, 3, 115, 13, 5, 8, 179, 155], [25, 22, 242, 1, 145, 17], 0), ([178, 3, 115, 13, 5, 8, 179, 155], [25, 22, 242, 1, 202, 17], 0)]


In [None]:
from data import write_answer_json
write_answer_json(model_code_list)