In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import json
import preprocess_temp as P
import model.parsers as M

### Load Data

In [3]:
directory = './conala-corpus/'
train_file = directory + 'train.json'
test_file = directory + 'test.json'

with open(train_file) as f:
    train_data = json.load(f)
    
with open(test_file) as f:
    test_data = json.load(f)

### Let's preprocess the data. Everything is in Preprocess.py

In [4]:
# intent processing includes lowercase, remove punctuation'?'
train_intent, train_codes = P.process_data(train_data)
test_intent, test_codes = P.process_data(test_data)

In [5]:
# this class is used for code2actions and actions2code
ast_action = P.Ast_Action()

In [6]:
train_actions = []

for code in train_codes:
    train_actions.append(ast_action.code2actions(code))

In [7]:
word_lst = P.vocab_list(train_intent, cut_freq=5)
act_lst, token_lst = P.action_list(train_actions, cut_freq=5)

In [8]:
word2num = dict(zip(word_lst, range(0,len(word_lst))))
act2num = dict(zip(act_lst, range(0,len(act_lst))))
token2num = dict(zip(token_lst, range(0,len(token_lst))))

In [9]:
train_loader = P.get_train_loader(train_intent, train_actions, word2num, act2num, token2num)

In [10]:
test_loader = P.get_test_loader(test_intent, word2num)

In [11]:
action_index_copy = act2num[P.GenTokenAction('copy')]
action_index_gen = act2num[P.GenTokenAction('token')]

### Model

In [12]:
from collections import namedtuple
hyperParamMap = {
    #### General configuration ####
    'cuda': True,      # Use gpu
    'asdl_file': '',   # Path to ASDL grammar specification
    'mode': 'train',   # train or test

    #### Modularized configuration ####
    'parser': 'default_parser',  # which parser model to use

    #### Model configuration ####
    'lstm': 'lstm',    # Type of LSTM used, currently only standard LSTM cell is supported

    #### Embedding sizes ####
    'embed_size': 128,         # Size of word embeddings
    'action_embed_size': 128,  # Size of ApplyRule/GenToken action embeddings
    'field_embed_size': 64,    # Embedding size of ASDL fields
    'type_embed_size': 64,     # Embeddings ASDL types

    #### Hidden sizes ####
    'hidden_size': 256,        # Size of LSTM hidden states
    'ptrnet_hidden_dim': 32,   # Hidden dimension used in pointer network
    'att_vec_size': 256,       # Size of attentional vector

    #### readout layer ####
    'no_query_vec_to_action_map': False,    # Do not use additional linear layer to transform the attentional vector for computing action probabilities
    'readout': 'identity',                  # Type of activation if using additional linear layer
    'query_vec_to_action_diff_map': False,  # Use different linear mapping 

    #### parent information switch for decoder LSTM ####
    'no_parent_production_embed': False,    # Do not use embedding of parent ASDL production to update decoder LSTM state
    'no_parent_field_embed': False,         # Do not use embedding of parent field to update decoder LSTM state
    'no_parent_field_type_embed': False,    # Do not use embedding of the ASDL type of parent field to update decoder LSTM state
    'no_parent_state': True,                # Do not use the parent hidden state to update decoder LSTM state
    'no_input_feed': False,                 # Do not use input feeding in decoder LSTM
    'no_copy': False,                       # Do not use copy mechanism

    #### Training ####
    'vocab': '',                            # Path of the serialized vocabulary
    'train_file': '',                       # path to the training target file
    'dev_file': '',                         # path to the dev source file
    'batch_size': 10,                       # Batch size
    'dropout': 0.,                          # dropout rate
    'word_dropout': 0.,                     # Word dropout rate
    'decoder_word_dropout': 0.,             # Word dropout rate on decoder
    'primitive_token_label_smoothing': 0.0, # Apply label smoothing when predicting primitive tokens
    'src_token_label_smoothing': 0.0,       # Apply label smoothing in reconstruction model when predicting source tokens
    'negative_sample_type': 'best',         # 

    #### training schedule details ####
    'valid_metric': 'acc',                # Metric used for validation
    'valid_every_epoch': 1,               # Perform validation every x epoch
    'log_every': 10,                      # Log training statistics every n iterations
    'save_to': 'model',                   # Save trained model to
    'save_all_models': False,             # Save all intermediate checkpoints
    'patience': 5,                        # Training patience
    'max_num_trial': 10,                  # Stop training after x number of trials
    'glorot_init': False,                 # Use glorot initialization
    'clip_grad': 5.,                      # Clip gradients
    'max_epoch': 10,                      # Maximum number of training epoches
    'optimizer': 'Adam',                  # optimizer
    'lr': 0.001,                          # Learning rate
    'lr_decay': 0.5,                      # decay learning rate if the validation performance drops
    'lr_decay_after_epoch': 0,            # Decay learning rate after x epoch
    'decay_lr_every_epoch': False,        # force to decay learning rate after each epoch
    'reset_optimizer': False,             # Whether to reset optimizer when loading the best checkpoint
    'verbose': False,                     # Verbose mode

    #### decoding/validation/testing ####
    'load_model': None,                   # Load a pre-trained model
    'beam_size': 5,                       # Beam size for beam search
    'decode_max_time_step': 100,          # Maximum number of time steps used in decoding and sampling
    'sample_size': 5,                     # Sample size
    'test_file': '',                      # Path to the test file
    'save_decode_to': None,               # Save decoding results to file
}

HyperParams = namedtuple('HyperParams', list(hyperParamMap.keys()), verbose=False)
hyperParams = HyperParams(**hyperParamMap)

In [63]:
model = M.Model(hyperParams, action_size=len(act_lst), token_size=len(token_lst), word_size=len(word_lst), 
                      action_index_copy=action_index_copy, action_index_gen=action_index_gen)

In [64]:
import torch
import time

In [65]:
optimizer = torch.optim.Adam(model.parameters(), lr = 3e-4)
lossFunc = torch.nn.CrossEntropyLoss()

In [76]:
epoch_begin = time.time()

for batch_ind, x in enumerate(train_loader):
    optimizer.zero_grad()

    (action_logits, action_labels), (copy_logits, copy_labels), (token_logits, token_labels) = model(x)
    
    loss1 = lossFunc(action_logits, action_labels)
    loss2 = torch.DoubleTensor([0.0])
    if len(copy_logits) > 0:
        loss2 = lossFunc(copy_logits, copy_labels)
    loss3 = torch.DoubleTensor([0.0])
    if len(token_logits) > 0:
        loss3 = lossFunc(token_logits, token_labels)

    total_loss = loss1 + loss2.double() + loss3.double()
    total_loss.backward()

    # clip gradient
    if hyperParams.clip_grad > 0.:
        grad_norm = torch.nn.utils.clip_grad_norm(model.parameters(), hyperParams.clip_grad)

    optimizer.step()

    if batch_ind % 10 == 0:
        print("Action loss: {}".format(loss1.data))
        print("Copy loss: {}".format(loss2.data))
        print("Token loss: {}".format(loss3.data))
        print("GenToken total loss: {}".format(loss2.data + loss3.data))
        report_loss = report_examples = 0.

print('[Epoch %d] epoch elapsed %ds' % (epoch, time.time() - epoch_begin))



Action loss: 4.318506553986384
Copy loss: 2.7684662342071533
Token loss: 5.924683570861816
GenToken total loss: 8.69314956665039
Action loss: 3.3948511498305707
Copy loss: 2.645263195037842
Token loss: 5.739128112792969
GenToken total loss: 8.384391784667969
Action loss: 2.327069810590407
Copy loss: 2.6278035640716553
Token loss: 4.679891109466553
GenToken total loss: 7.307694435119629
Action loss: 1.9776235216942923
Copy loss: 2.4266655445098877
Token loss: 4.631178855895996
GenToken total loss: 7.057844161987305
Action loss: 1.5254496265981778
Copy loss: 2.1510934829711914
Token loss: 4.460443019866943
GenToken total loss: 6.611536502838135
Action loss: 1.3542082856396551
Copy loss: 2.4200665950775146
Token loss: 4.084047794342041
GenToken total loss: 6.504114151000977
Action loss: 1.6463950478807117
Copy loss: 2.3245389461517334
Token loss: 4.207257270812988
GenToken total loss: 6.531796455383301
Action loss: 1.4776575757490935
Copy loss: 1.969358205795288
Token loss: 4.503574848175

NameError: name 'epoch' is not defined