In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('./preprocessing')
sys.path.append('./seq2seq')

In [3]:
from processor import Code_Intent_Pairs
from model import Seq2Seq
from data import get_test_loader

### Get Data Loader

In [4]:
code_intent_pair = Code_Intent_Pairs()

In [5]:
path = 'vocab/'
code_intent_pair.load_dict(path)
special_symbols = code_intent_pair.get_special_symbols()
word_size = code_intent_pair.get_word_size()
code_size = code_intent_pair.get_code_size()

In [6]:
test_path = 'processed_corpus/test.json'
test_entries = code_intent_pair.load_entries(test_path)

In [7]:
testloader = get_test_loader(test_entries)

### Get Model

In [8]:
hyperP = {
    ## training parameters
    'batch_size' : 32,
    'lr' : 1e-3,
    'teacher_force_rate' : 0.90,
    'max_epochs' : 50,
    'lr_keep_rate' : 0.95,  # set to 1.0 to not decrease lr overtime
    'load_pretrain_code_embed': False,
    'freeze_embed': False,
    
    ## encoder architecture
    'encoder_layers' : 2,
    'encoder_embed_size' : 128,
    'encoder_hidden_size' : 384,
    'encoder_dropout_rate' : 0.3,
    
    ## decoder architecture
    'decoder_layers' : 2,
    'decoder_embed_size' : 128,
    'decoder_hidden_size' : 384,
    'decoder_dropout_rate' : 0.3,
    
    ## attn architecture
    'attn_hidden_size' : 384,
    
    ## visualization
    'print_every': 10,
}

In [9]:
model = Seq2Seq(word_size, code_size, hyperP)

In [10]:
import torch
if hyperP['load_pretrain_code_embed']:
    model.decoder.embed[0].load_state_dict(torch.load('./pretrain_code_lm/embedding-1556211835.t7'))
    if hyperP['freeze_embed']:
        for param in model.decoder.embed[0].parameters():
            param.requires_grad = False

In [11]:
model.load()

### Test Decoding

In [12]:
from decoder import Decoder
from decoder import post_process_test, post_process_hand
from decoder import post_process_dummy, post_process_model
from evaluate import get_bleu_all, get_bleu_sent

In [13]:
beam_decoder = Decoder(model)

In [14]:
model.eval()
sos = special_symbols['code_sos']
eos = special_symbols['code_eos']
unk = special_symbols['code_unk']

In [15]:
idx2code = code_intent_pair.idx2code

In [16]:
intent2idx = code_intent_pair.intent2idx

#### Beam Search Results

In [17]:
dummy_code_list = []
true_code_list = []

for i, (src_seq, slot_map, code, intent) in enumerate(testloader):
    beams = beam_decoder.decode(src_seq, sos, eos, unk, beam_width=3)
    dummy_code =  post_process_dummy(slot_map, beams, idx2code)
    dummy_code_list.append(dummy_code)
    true_code_list.append(code)

In [18]:
get_bleu_all(dummy_code_list, true_code_list)

0.2642075877054355

#### Rerank with Hand Features

In [None]:
hand_code_list = []
true_code_list = []

for i, (src_seq, slot_map, code, intent) in enumerate(testloader):
    beams = beam_decoder.decode(src_seq, sos, eos, unk, beam_width=20)
    hand_code =  post_process_hand(intent, slot_map, beams, idx2code)
    hand_code_list.append(hand_code)
    true_code_list.append(code)

In [None]:
get_bleu_all(hand_code_list, true_code_list)

#### Test Hnad Features

In [237]:
src_seq, slot_map, code, intent = testloader[107]
beams = beam_decoder.decode(src_seq, sos, eos, unk, beam_width=10)
post_process_test(intent, slot_map, beams, idx2code, code)

joining data from dataframe `df1` with data from dataframe `df2` based on matching values of column 'Date_Time' in both dataframes
df1.merge(df2, on='Date_Time')
{'var_0': 'df1', 'var_1': 'df2', 'str_0': 'Date_Time'}
before process:
b_score:-0.34	score:0.31:	pd . merge ( df1 , df2 , how = 'Date_Time' , on = 'str_1' )
b_score:-0.33	score:0.10:	pd . concat ( [ df1 , df2 ] ) . merge ( ) . reset_index ( )
b_score:-0.32	score:0.28:	df1 . loc ( index = 'Date_Time' , columns = 'Date_Time' )
b_score:-0.32	score:0.08:	pd . concat ( [ df1 , df2 ] ) . merge ( ) . reset_index ( ) . reset_index ( )
b_score:-0.31	score:0.39:	pd . merge ( df1 , df2 , how = 'Date_Time' , on = 'Date_Time' )
b_score:-0.31	score:0.19:	pd . merge ( df1 , df2 , how = [ 'Date_Time' ] )
b_score:-0.31	score:0.11:	pd . concat ( [ df1 , df2 ] , axis = [ 'Date_Time' ] )
b_score:-0.30	score:0.22:	pd . concat ( [ df1 , df2 ] ) . merge ( ) . reset_index ( ) = 'Date_Time' )
b_score:-0.29	score:0.27:	pd . concat ( [ df1 , df2 ] , axi

In [170]:
lemmatizer = nltk.wordnet.WordNetLemmatizer()
lemmatizer.lemmatize('running')

'running'

#### Rerank with Neural Model

In [47]:
from rerank_model import ScoreNet

In [48]:
hyperP = {
    ## encoder architecture
    'encoder_layers': 2,
    'encoder_embed_size': 128,
    'encoder_hidden_size': 256,
    'encoder_dropout_rate': 0.3,
}

In [49]:
score_net = ScoreNet(word_size, code_size, hyperP).cuda()

In [50]:
score_net.load()

In [51]:
from processor import process_intent

model_code_list = []
true_code_list = []

for i, (src_seq, _, code, intent) in enumerate(testloader):
    beams = beam_decoder.decode(src_seq, sos, eos, unk, beam_width=20)
    model_code =  post_process_model(intent, beams, idx2code, score_net, process_intent, intent2idx)
    model_code_list.append(model_code)
    true_code_list.append(code)

In [62]:
get_bleu_all(hand_code_list, true_code_list)

0.301039007821032

#### Write Hand Featured Rerank

In [132]:
from data import write_answer_json
write_answer_json(hand_code_list)

In [68]:
!zip('answer.txt')

/bin/sh: 1: Syntax error: word unexpected (expecting ")")
