In [21]:
from train_functions import train_sahp
import torch
import pickle
import numpy as np

from utils.load_synth_data import process_loaded_sequences
from utils.util import get_batch,count_parameters
from utils.atten_optimizer import NoamOpt
from train_functions.train_sahp import make_model,eval_sahp,prediction_evaluation,MaskBatch
import seaborn as sns
import matplotlib.pyplot as plt

import torch.optim as optim
import random


## Synth Model

In [3]:
model = train_sahp.make_model(max_sequence_length=324)
model_dict =torch.load('saved_models/sahp-synthetic_hidden16-20210622-205430',map_location=torch.device('cpu'))
model.load_state_dict(model_dict)

<All keys matched successfully>

In [2]:
with open('data/simulated/hawkes_synthetic_random_2d_20191130-180837.pkl', 'rb') as f:
    loaded_hawkes_data = pickle.load(f)

In [6]:
loaded_hawkes_data['types'][0]

array([1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1,
       1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1,
       1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1,
       1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1,
       0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0],
      dtype=int64)

In [5]:
loaded_hawkes_data['timestamps'][0]

array([  5.60763443,   6.10909592,   8.14957647,   8.34786877,
        11.74765717,  12.09120782,  12.7038931 ,  12.92263578,
        13.27106319,  13.9827004 ,  14.40075975,  14.40657523,
        15.65816619,  17.47021075,  22.1415383 ,  22.18185583,
        22.25583239,  25.19507215,  25.72558125,  26.73629369,
        27.66141843,  29.90981069,  30.74234429,  37.87818156,
        46.43551261,  46.92593361,  47.68628126,  48.4036156 ,
        49.27436552,  50.98216357,  51.41173408,  51.41306198,
        53.30754214,  54.47245528,  55.07976453,  55.58598659,
        55.60622739,  55.60733836,  55.79603789,  56.18765199,
        56.71815713,  57.69584592,  58.44575486,  58.44984819,
        59.61075839,  59.83157271,  59.91939916,  64.38615531,
        64.44525006,  65.45187781,  65.79173705,  67.33287862,
        68.40510542,  68.70232507,  69.15380095,  69.44215237,
        69.4541937 ,  69.51216464,  69.54023449,  70.02401056,
        70.16574478,  71.1551833 ,  71.27132983,  73.87

In [4]:
## Load Data Set
with open('data/simulated/hawkes_synthetic_random_2d_20191130-180837.pkl', 'rb') as f:
    loaded_hawkes_data = pickle.load(f)

tmax = loaded_hawkes_data['tmax']

seq_times, seq_types, seq_lengths, _ = process_loaded_sequences(loaded_hawkes_data, 2)

total_sample_size = seq_times.size(0)
train_ratio = 0.8
train_size = int(train_ratio * total_sample_size)
dev_ratio =0.1
dev_size = int(dev_ratio * total_sample_size)
## Split Traning and Test Sets
train_seq_times = seq_times[:train_size]
train_seq_types = seq_types[:train_size]
train_seq_lengths = seq_lengths[:train_size]


dev_seq_times = seq_times[train_size:train_size + dev_size]  # train_size+dev_size
dev_seq_types = seq_types[train_size:train_size + dev_size]
dev_seq_lengths = seq_lengths[train_size:train_size + dev_size]

test_seq_times = seq_times[-dev_size:]
test_seq_types = seq_types[-dev_size:]
test_seq_lengths = seq_lengths[-dev_size:]


## sequence length
train_seq_lengths, reorder_indices_train = train_seq_lengths.sort(descending=True)
# # Reorder by descending sequence length
train_seq_times = train_seq_times[reorder_indices_train]
train_seq_types = train_seq_types[reorder_indices_train]
#
dev_seq_lengths, reorder_indices_dev = dev_seq_lengths.sort(descending=True)
# # Reorder by descending sequence length
dev_seq_times = dev_seq_times[reorder_indices_dev]
dev_seq_types = dev_seq_types[reorder_indices_dev]

test_seq_lengths, reorder_indices_test = test_seq_lengths.sort(descending=True)
# # Reorder by descending sequence length
test_seq_times = test_seq_times[reorder_indices_test]
test_seq_types = test_seq_types[reorder_indices_test]

max_sequence_length = max(train_seq_lengths[0], dev_seq_lengths[0], test_seq_lengths[0])
print('max_sequence_length: {}'.format(max_sequence_length))

max_sequence_length: 323


In [5]:
torch.manual_seed(42)
batch_size = 32
test_size = test_seq_times.size(0)
device = 'cpu'
test_loop_range = list(range(0, test_size, batch_size))

## Get test Loss
test_event_num, epoch_test_loss = eval_sahp(batch_size, test_loop_range, test_seq_lengths, test_seq_times,
                                            test_seq_types, model, device, 0)

test_loss = epoch_test_loss/test_event_num

## Get Result Metrics
avg_rmse, types_predict_score, results = prediction_evaluation(
    device, model, test_seq_lengths, test_seq_times, test_seq_types, test_size, tmax)

print(test_loss)


100%|████████████████████████████████████████████████████████████████████████████████| 400/400 [00:16<00:00, 23.91it/s]


rmse 17.319760118898063
Type prediction score: 0.5575
tensor(0.6343)


## Retweet

In [10]:
## Load Data Set
dataset = 'retweet'
process_dim =3 

train_path = 'data/' + dataset + '/train_manifold_format.pkl'
dev_path = 'data/' + dataset + '/dev_manifold_format.pkl'
test_path = 'data/' + dataset + '/test_manifold_format.pkl'

with open(train_path, 'rb') as f:
    train_hawkes_data = pickle.load(f)
with open(dev_path, 'rb') as f:
    dev_hawkes_data = pickle.load(f)
with open(test_path, 'rb') as f:
    test_hawkes_data = pickle.load(f)

train_seq_times, train_seq_types, train_seq_lengths, train_tmax = \
process_loaded_sequences(train_hawkes_data, process_dim)
dev_seq_times, dev_seq_types, dev_seq_lengths, dev_tmax = \
process_loaded_sequences(dev_hawkes_data, process_dim)
test_seq_times, test_seq_types, test_seq_lengths, test_tmax = \
process_loaded_sequences(test_hawkes_data, process_dim)

tmax = max([train_tmax, dev_tmax, test_tmax])


In [14]:
device = 'cpu'

train_sample_size = train_seq_times.size(0)
print("Train sample size: {}".format(train_sample_size))

dev_sample_size = dev_seq_times.size(0)
print("Dev sample size: {}".format(dev_sample_size))

test_sample_size = test_seq_times.size(0)
print("Test sample size: {}".format(test_sample_size))


# Define training data
train_seq_times = train_seq_times.to(device)
train_seq_types = train_seq_types.to(device)
train_seq_lengths = train_seq_lengths.to(device)
print("No. of event tokens in training subset:", train_seq_lengths.sum())

# Define development data
dev_seq_times = dev_seq_times.to(device)
dev_seq_types = dev_seq_types.to(device)
dev_seq_lengths = dev_seq_lengths.to(device)
print("No. of event tokens in development subset:", dev_seq_lengths.sum())

# Define test data
test_seq_times = test_seq_times.to(device)
test_seq_types = test_seq_types.to(device)
test_seq_lengths = test_seq_lengths.to(device)
print("No. of event tokens in test subset:", test_seq_lengths.sum())


## sequence length
train_seq_lengths, reorder_indices_train = train_seq_lengths.sort(descending=True)
# # Reorder by descending sequence length
train_seq_times = train_seq_times[reorder_indices_train]
train_seq_types = train_seq_types[reorder_indices_train]
#
dev_seq_lengths, reorder_indices_dev = dev_seq_lengths.sort(descending=True)
# # Reorder by descending sequence length
dev_seq_times = dev_seq_times[reorder_indices_dev]
dev_seq_types = dev_seq_types[reorder_indices_dev]

test_seq_lengths, reorder_indices_test = test_seq_lengths.sort(descending=True)
# # Reorder by descending sequence length
test_seq_times = test_seq_times[reorder_indices_test]
test_seq_types = test_seq_types[reorder_indices_test]

max_sequence_length = max(train_seq_lengths[0], dev_seq_lengths[0], test_seq_lengths[0])
print('max_sequence_length: {}'.format(max_sequence_length))

Train sample size: 20000
Dev sample size: 2000
Test sample size: 2000
No. of event tokens in training subset: tensor(2176116)
No. of event tokens in development subset: tensor(215521)
No. of event tokens in test subset: tensor(218465)
max_sequence_length: 264


In [18]:
model = train_sahp.make_model(max_sequence_length=max_sequence_length+1, process_dim=3)
model_dict =torch.load('replicated_models/sahp-retweet_hidden16-20210623-055702',map_location=torch.device('cpu'))
model.load_state_dict(model_dict)

torch.manual_seed(42)
batch_size = 32
test_size = test_seq_times.size(0)
device = 'cpu'
test_loop_range = list(range(0, test_size, batch_size))


In [19]:

## Get test Loss
test_event_num, epoch_test_loss = eval_sahp(batch_size, test_loop_range, test_seq_lengths, test_seq_times,
                                            test_seq_types, model, device, 0)

test_loss = epoch_test_loss/test_event_num

## Get Result Metrics
avg_rmse, types_predict_score, results = prediction_evaluation(
    device, model, test_seq_lengths, test_seq_times, test_seq_types, test_size, tmax)

print(test_loss)

100%|██████████████████████████████████████████████████████████████████████████████| 2000/2000 [00:57<00:00, 34.61it/s]


rmse 27389707.904998768
Type prediction score: 0.5155
tensor(4.2995)


In [59]:
np.sqrt(np.mean(incr_errors[keep_indices]))

234.04366

### Training

In [50]:
process_dim = 2
device = 'cpu'
train_ratio = 0.8
lr = 5e-5
batch_size = 32


torch.manual_seed(42)
random.seed(42)
np.random.seed(42)


with open('data/simulated/hawkes_synthetic_random_2d_20191130-180837.pkl', 'rb') as f:
    loaded_hawkes_data = pickle.load(f)
    
process_dim = loaded_hawkes_data['process_dim'] if 'process_dim' in loaded_hawkes_data.keys() else process_dim

seq_times, seq_types, seq_lengths, _ = process_loaded_sequences(loaded_hawkes_data, process_dim)

seq_times = seq_times.to(device)
seq_types = seq_types.to(device)
seq_lengths = seq_lengths.to(device)

total_sample_size = seq_times.size(0)
print("Total sample size: {}".format(total_sample_size))

train_size = int(train_ratio * total_sample_size)
dev_ratio = 0.1
dev_size = int(dev_ratio * total_sample_size)
print("Train sample size: {:}/{:}".format(train_size, total_sample_size))
print("Dev sample size: {:}/{:}".format(dev_size, total_sample_size))

# Define training data
train_seq_times = seq_times[:train_size]
train_seq_types = seq_types[:train_size]
train_seq_lengths = seq_lengths[:train_size]
print("No. of event tokens in training subset:", train_seq_lengths.sum())

# Define development data
dev_seq_times = seq_times[train_size:train_size + dev_size]  # train_size+dev_size
dev_seq_types = seq_types[train_size:train_size + dev_size]
dev_seq_lengths = seq_lengths[train_size:train_size + dev_size]
print("No. of event tokens in development subset:", dev_seq_lengths.sum())

test_seq_times = seq_times[-dev_size:]
test_seq_types = seq_types[-dev_size:]
test_seq_lengths = seq_lengths[-dev_size:]

print("No. of event tokens in test subset:", test_seq_lengths.sum())

## sequence length
train_seq_lengths, reorder_indices_train = train_seq_lengths.sort(descending=True)
# # Reorder by descending sequence length
train_seq_times = train_seq_times[reorder_indices_train]
train_seq_types = train_seq_types[reorder_indices_train]
#
dev_seq_lengths, reorder_indices_dev = dev_seq_lengths.sort(descending=True)
# # Reorder by descending sequence length
dev_seq_times = dev_seq_times[reorder_indices_dev]
dev_seq_types = dev_seq_types[reorder_indices_dev]

test_seq_lengths, reorder_indices_test = test_seq_lengths.sort(descending=True)
# # Reorder by descending sequence length
test_seq_times = test_seq_times[reorder_indices_test]
test_seq_types = test_seq_types[reorder_indices_test]

max_sequence_length = max(train_seq_lengths[0], dev_seq_lengths[0], test_seq_lengths[0])
print('max_sequence_length: {}'.format(max_sequence_length))


d_model = 16
atten_heads = 1
dropout = 0.1

model = make_model(nLayers=1, d_model=d_model, atten_heads=atten_heads,
                   dropout=dropout, process_dim=process_dim, device=device, pe='add',
                   max_sequence_length=max_sequence_length + 1).to(device)

print("the number of trainable parameters: " + str(count_parameters(model)))


optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.98), eps=1e-9, weight_decay=3e-4)
model_opt = NoamOpt(d_model, 1, 100, initial_lr=lr, optimizer=optimizer)


## Size of the traing dataset
train_size = train_seq_times.size(0)
dev_size = dev_seq_times.size(0)
test_size = test_seq_times.size(0)
tr_loop_range = list(range(0, train_size, batch_size))
de_loop_range = list(range(0, dev_size, batch_size))
test_loop_range = list(range(0, test_size, batch_size))

last_dev_loss = 0.0
early_step = 0

random_seeds = list(range(0, 1000))
random.shuffle(random_seeds)

model.train()


Total sample size: 4000
Train sample size: 3200/4000
Dev sample size: 400/4000
No. of event tokens in training subset: tensor(498611)
No. of event tokens in development subset: tensor(63349)
No. of event tokens in test subset: tensor(61657)
max_sequence_length: 323
the number of trainable parameters: 4186


SAHP(
  (gelu): GELU()
  (type_emb): TypeEmbedding(3, 16, padding_idx=2)
  (position_emb): BiasedPositionalEmbedding(
    (Wt): Linear(in_features=1, out_features=8, bias=False)
  )
  (attention): MultiHeadedAttention(
    (linear_layers): ModuleList(
      (0): Linear(in_features=16, out_features=16, bias=True)
      (1): Linear(in_features=16, out_features=16, bias=True)
      (2): Linear(in_features=16, out_features=16, bias=True)
    )
    (output_linear): Linear(in_features=16, out_features=16, bias=True)
    (attention): Attention()
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (feed_forward): PositionwiseFeedForward(
    (w_1): Linear(in_features=16, out_features=64, bias=True)
    (w_2): Linear(in_features=64, out_features=16, bias=True)
    (dropout): Dropout(p=0.1, inplace=False)
    (activation): GELU()
  )
  (input_sublayer): SublayerConnection(
    (norm): LayerNorm()
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (output_sublayer): SublayerConnection(
    (norm):

In [88]:
i_batch = 0
model_opt.optimizer.zero_grad()

batch_onehot, batch_seq_times, batch_dt, batch_seq_types, _, _, _, batch_seq_lengths = \
    get_batch(batch_size, i_batch, model, train_seq_lengths, train_seq_times, train_seq_types,
                   rnn=False)

batch_seq_types = batch_seq_types[:, 1:]

masked_seq_types = MaskBatch(batch_seq_types, pad=model.process_dim,
                             device=device)  # exclude the first added even
model.forward(batch_dt, masked_seq_types.src, masked_seq_types.src_mask)
# nll = model.compute_loss(batch_seq_times, batch_onehot)

In [181]:
### Loss Function
def compute_loss(model,seq_times,seq_onehot_types,n_mc_samples = 20):
    dt_seq = seq_times[:, 1:] - seq_times[:, :-1]
    cell_t = model.state_decay(model.converge_point, model.start_point, model.omega, dt_seq[:, :, None])

    n_batch = seq_times.size(0)
    n_times = seq_times.size(1) - 1
    device = dt_seq.device
    # Get the intensity process
    intens_at_evs = model.intensity_layer(cell_t)
    
    
    log_intensities = intens_at_evs.log()  # log intensities
    log_intensities =  log_intensities*seq_onehot_types[:, 1:, :].sum(dim=-1).unsqueeze(-1)
    
    
    
    seq_mask = seq_onehot_types[:, 1:]
    log_sum = (log_intensities * seq_mask).sum(dim=(2, 1)) 
    
    
    taus = torch.rand(n_batch, n_times, 1, n_mc_samples).to(device) 
    taus = dt_seq[:, :, None, None] * taus 
    
    
    cell_tau = model.state_decay(
    model.converge_point[:, :, :, None],
    model.start_point[:, :, :, None],
    model.omega[:, :, :, None],
    taus)
    
    cell_tau = cell_tau.transpose(2, 3)
    intens_at_samples = model.intensity_layer(cell_tau).transpose(2, 3)
    intens_at_samples = intens_at_samples*seq_onehot_types[:, 1:, :].sum(dim=-1).unsqueeze(-1).unsqueeze(-1)
    
    
    total_intens_samples = intens_at_samples.sum(dim=2)  # shape batch * N * MC
    partial_integrals = dt_seq * total_intens_samples.mean(dim=2)

    integral_ = partial_integrals.sum(dim=1)

    res = torch.sum(- log_sum + integral_)
    
    
    return res

## Training Other 2-D Samples

In [184]:
process_dim = 2
device = 'cpu'
train_ratio = 0.8
lr = 5e-5
batch_size = 32


torch.manual_seed(42)
random.seed(42)
np.random.seed(42)


with open('data/simulated/hawkes_2d.pkl', 'rb') as f:
    loaded_hawkes_data = pickle.load(f)

In [185]:
process_dim = loaded_hawkes_data['process_dim'] if 'process_dim' in loaded_hawkes_data.keys() else process_dim


In [186]:
process_dim

1