In [4]:
import argparse
import numpy as np
import pickle
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.metrics import roc_auc_score

import transformer.Constants as Constants

from preprocess.Dataset import get_dataloader
from transformer.Models import Transformer
from tqdm import tqdm


def prepare_dataloader(opt):
    """ Load data and prepare dataloader. """

    def load_data(name, dict_name):
        with open(name, 'rb') as f:
            data = pickle.load(f, encoding='latin-1')
            num_types = data['dim_process']
            data = data[dict_name]
            return data, int(num_types)

    print('[Info] Loading train data...')
    train_data, num_types = load_data(opt.data + 'train.pkl', 'train')
    print('[Info] Loading dev data...')
    dev_data, _ = load_data(opt.data + 'dev.pkl', 'dev')
#     print('[Info] Loading test data...')
#     test_data, _ = load_data(opt.data + 'test.pkl', 'test')

    trainloader = get_dataloader(train_data[0:100], opt.batch_size, shuffle=True)
    devloader = get_dataloader(dev_data[0:64], opt.batch_size, shuffle=False)
    return trainloader, devloader, num_types


def train_epoch(model, training_data, optimizer, opt):
    """ Epoch operation in training phase. """

    model.train()

#     total_event_ll = 0  # cumulative event log-likelihood
#     total_time_se = 0  # cumulative time prediction squared-error
#     total_event_rate = 0  # cumulative number of correct prediction
#     total_num_event = 0  # number of total events
#     total_num_pred = 0  # number of predictions
    for batch in tqdm(training_data, mininterval=2,
                      desc='  - (Training)   ', leave=False):
        """ prepare data """
        event_time, time_gap, event_type = map(lambda x: x.to(opt.device), batch)

        """ forward """
        optimizer.zero_grad()
        
        event_type_0 = torch.hstack([torch.zeros(event_type.shape[0],1,44).int().to('cpu'),event_type])
        
        event_time_0 = torch.hstack([torch.zeros(event_time.shape[0],1).int().to('cpu'),event_time])

        time_gap_0 = torch.hstack([torch.zeros(time_gap.shape[0],1).int().to('cpu'),time_gap])

        enc_out, non_pad_mask = model(event_type_0, event_time_0)
        
        a,b,c = enc_out[:,:-1,:].shape[0],enc_out[:,:-1,:].shape[1],enc_out[:,:-1,:].shape[2]
        

        """ backward """
        # calculate P*(t_i+1) by mgn : log_loss_time: batch*len x 1
        log_loss_time = model.MGN.loss(enc_out[:,:-1,:].reshape(a*b,c), torch.log(time_gap_0[:,1:]+1e-9).reshape(a*b,1))

        # calculate P*(y_i+1) by mbn: 
     #   enc_out_time = torch.cat([enc_out[:,:-1,:], torch.log(time_gap_0[:,1:]+1e-9).unsqueeze(2)],dim=2)
        log_loss_type = model.MBN.loss(enc_out[:,:-1,:].reshape(a*b,c), event_type.reshape(a*b,model.num_types) )
        # sum log loss
        loss = torch.sum((log_loss_time + log_loss_type).reshape(a,b)  * non_pad_mask[:,1:,0])

#         loss = torch.sum((log_loss_type).reshape(a,b) * non_pad_mask[:,1:,0])
       # print("time loss {} type loss {}".format( torch.sum(log_loss_type.reshape(a,b) * non_pad_mask[:,1:,0]) ,torch.sum(log_loss_time.reshape(a,b) * non_pad_mask[:,1:,0])))
              
        loss.backward()

        """ update parameters """
        optimizer.step()

        """ note keeping """

    return loss


def eval_epoch(model, validation_data, opt):
    """ Epoch operation in evaluation phase. """

    model.eval()
    
    total_ll = 0
    total_time_se = 0
    total_num_pred = 0
    pred_label = []
    true_label = []

    with torch.no_grad():
        for batch in tqdm(validation_data, mininterval=2,
                          desc='  - (Validation) ', leave=False):
            """ prepare data """

            event_time, time_gap, event_type = map(lambda x: x.to(opt.device), batch)
            
            event_type_0 = torch.hstack([torch.zeros(event_type.shape[0],1,44).int().to('cpu'),event_type])

            event_time_0 = torch.hstack([torch.zeros(event_time.shape[0],1).int().to('cpu'),event_time])

            time_gap_0 = torch.hstack([torch.zeros(time_gap.shape[0],1).int().to('cpu'),time_gap])

            enc_out, non_pad_mask = model(event_type_0, event_time_0)

            a,b,c = enc_out[:,:-1,:].shape[0],enc_out[:,:-1,:].shape[1],enc_out[:,:-1,:].shape[2]


            # calculate P*(t_i+1) by mgn : log_loss_time: batch*len x 1
            log_loss_time = model.MGN.loss(enc_out[:,:-1,:].reshape(a*b,c), torch.log(time_gap_0[:,1:]+1e-9).reshape(a*b,1))
            
            log_timegap = model.MGN.mean(enc_out[:,:-1,:].reshape(a*b,c))
            
            diff = torch.exp(log_timegap) - time_gap.reshape(a*b,)
            se = torch.sum(diff * diff)
            
            # calculate P*(y_i+1) by mbn: 
            log_loss_type = model.MBN.loss(enc_out[:,:-1,:].reshape(a*b,c), event_type.reshape(a*b,model.num_types))
            
            pred_type = model.MBN.predict(enc_out[:,:-1,:].reshape(a*b,c) )[(non_pad_mask[:,1:,:].repeat(1,1, model.num_types)==1).reshape(a*b,model.num_types)]
            
            pred_label += list(pred_type.cpu().numpy())

            true_type = event_type[(non_pad_mask[:,1:,:].repeat(1,1, model.num_types)==1)] #.reshape(a*b,model.num_types).flatten()
           
            true_label += list(true_type.cpu().numpy())
           
            
            #  log loss
            loss = torch.sum((log_loss_time + log_loss_type).reshape(a,b) * non_pad_mask[:,1:,0])


            """ note keeping """
            total_ll += loss.item()
            total_time_se += se.item()
            total_num_pred += event_time.ne(Constants.PAD).sum().item() - event_time.shape[0]

                  
    roc_auc = roc_auc_score(y_true=true_label, y_score=pred_label,multi_class='ovo', average="samples")
    rmse = np.sqrt(total_time_se / total_num_pred)
    
    return total_ll, roc_auc, rmse

def train(model, training_data, validation_data, optimizer, scheduler, opt):
    """ Start training. """


    for epoch_i in range(opt.epoch):
        epoch = epoch_i + 1
        print('[ Epoch', epoch, ']')

        start = time.time()
        train_event = train_epoch(model, training_data, optimizer, opt)
        print('  - (Train)    negative loglikelihood: {ll: 8.4f}, '
              'elapse: {elapse:3.3f} min'
              .format(ll=train_event, elapse=(time.time() - start) / 60))

        start = time.time()
        valid_event, type_ll_seq, time_ll = eval_epoch(model, validation_data, opt)
        print('  - (dev)    nll: {ll: 8.4f}, '
              ' roc_auc :{type:8.4f},'
              'rmse :{time:8.4},'
              'elapse: {elapse:3.3f} min'
              
              .format(ll=valid_event, type=type_ll_seq,time=time_ll, elapse=(time.time() - start) / 60))

#         print('  - [Info] Maximum ll: {event: 8.5f}, '
#               'Maximum accuracy: {pred: 8.5f}, Minimum RMSE: {rmse: 8.5f}'
#               .format(event=max(valid_event_losses), pred=max(valid_pred_losses), rmse=min(valid_rmse)))

        # logging
#         with open(opt.log, 'a') as f:
#             f.write('{epoch}, {ll: 8.5f}, {acc: 8.5f}, {rmse: 8.5f}\n'
#                     .format(epoch=epoch, ll=valid_event, acc=valid_type, rmse=valid_time))

        scheduler.step()


# def main():
#     """ Main function. """

#     parser = argparse.ArgumentParser()

#     parser.add_argument('-data', required=True)

#     parser.add_argument('-epoch', type=int, default=30)
#     parser.add_argument('-batch_size', type=int, default=16)

#     parser.add_argument('-d_model', type=int, default=64)
#     parser.add_argument('-d_inner_hid', type=int, default=128)
#     parser.add_argument('-d_k', type=int, default=16)
#     parser.add_argument('-d_v', type=int, default=16)

#     parser.add_argument('-n_head', type=int, default=4)
#     parser.add_argument('-n_layers', type=int, default=4)

#     parser.add_argument('-dropout', type=float, default=0.1)
#     parser.add_argument('-lr', type=float, default=1e-4)

#     parser.add_argument('-log', type=str, default='log.txt')

 

# if __name__ == '__main__':
#     main()



In [5]:
import sys
sys.argv=['']
del sys
import argparse

parsed_args = argparse.ArgumentParser()
parsed_args.device = 1
parsed_args.data = "data/dunnhumby/split_1/"
parsed_args.batch_size = 32
parsed_args.n_head = 4
parsed_args.n_layers = 4
parsed_args.d_model = 64
parsed_args.d_inner = 32
parsed_args.d_k=32
parsed_args.d_v=32
parsed_args.ber_comps = 12
parsed_args.gau_comps = 12
parsed_args.dropout=0.1
parsed_args.lr=1e-3
parsed_args.epoch=1
parsed_args.log='log.txt'



In [6]:
opt = parsed_args

# default device is CUDA
opt.device = torch.device('cpu')

#     # setup the log file
#     with open(opt.log, 'w') as f:
#         f.write('Epoch, Log-likelihood, Accuracy, RMSE\n')

print('[Info] parameters: {}'.format(opt))

""" prepare dataloader """
trainloader, testloader, num_types = prepare_dataloader(opt)

""" prepare model """
model = Transformer(
    num_types=num_types,
    d_model=opt.d_model,
    d_inner=opt.d_inner,
    n_layers=opt.n_layers,
    n_head=opt.n_head,
    d_k=opt.d_k,
    d_v=opt.d_v,
    b_comps=opt.ber_comps,
    g_comps=opt.gau_comps,
    dropout=opt.dropout,
)
model.to(opt.device)

""" optimizer and scheduler """
optimizer = optim.Adam(filter(lambda x: x.requires_grad, model.parameters()),
                       opt.lr, betas=(0.9, 0.999), eps=1e-05)
scheduler = optim.lr_scheduler.StepLR(optimizer, 10, gamma=0.9)


""" number of parameters """
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print('[Info] Number of parameters: {}'.format(num_params))

""" train the model """
train(model, trainloader, testloader, optimizer, scheduler, opt)



[Info] parameters: ArgumentParser(prog='', usage=None, description=None, formatter_class=<class 'argparse.HelpFormatter'>, conflict_handler='error', add_help=True)
[Info] Loading train data...
[Info] Loading dev data...


  - (Training)   :   0%|          | 0/4 [00:00<?, ?it/s]

[Info] Number of parameters: 206016
[ Epoch 1 ]


  - (Validation) :   0%|          | 0/2 [00:00<?, ?it/s]        

  - (Train)    negative loglikelihood:  8426.4121, elapse: 0.050 min


                                                        

  - (dev)    nll:  234033.9297,  nll per seq :  0.6915,nll by time :   10.76,elapse: 0.030 min


In [None]:
model.MGN.mean()

In [None]:
from torch.distributions import Normal, OneHotCategorical
m = Categorical(torch.tensor([ 0.25, 0.25, 0.25, 0.25 ]))
m.sample()  # equal probability of 0, 1, 2, 3