In [None]:
# !pip install ipython-autotime 

In [2]:
%load_ext autotime
# %unload_ext autotime

time: 394 µs (started: 2022-08-11 16:41:10 +05:30)


In [1]:
import tick

AttributeError: module 'tick' has no attribute '__version__'

In [3]:
from tick.hawkes import SimuHawkes, HawkesKernelExp
from tqdm.auto import tqdm
import numpy as np
import torch

2022-08-11 16:41:15.940483: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-08-11 16:41:15.940529: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
  from .autonotebook import tqdm as notebook_tqdm


time: 16.3 s (started: 2022-08-11 16:41:10 +05:30)


In [4]:
# Some code is from: https://github.com/babylonhealth/neuralTPPs/blob/831ed1c203c93b4e408b83b1d457af19372d6267/tpp/processes/multi_class_dataset.py#L181
# tick documentation: https://x-datainitiative.github.io/tick/modules/hawkes.html

def generate_points(mu, alpha, decay, window, seed, dt=0.01):
    """
    Generates points of an marked Hawkes processes using the tick library
    """
    
    n_processes = len(mu)
    hawkes = SimuHawkes(n_nodes=n_processes, end_time=window, verbose=False, seed=seed)
    
    for i in range(n_processes):
        for j in range(n_processes):
            hawkes.set_kernel(i=i, j=j, kernel=HawkesKernelExp(intensity=alpha[i][j] / decay[i][j], decay=decay[i][j]))
        hawkes.set_baseline(i, mu[i])

    hawkes.track_intensity(dt)
    hawkes.simulate()
    return hawkes.timestamps


def hawkes_helper(mu, alpha, decay, window, in_seed, in_range):
    times_marked = [generate_points(mu=mu, alpha=alpha, decay=decay, window=window, seed=in_seed+i) for i in tqdm(range(in_range))] 
    records = [hawkes_seq_to_record(r) for r in (times_marked)]
    return records


def hawkes_seq_to_record(seq):
    times = np.concatenate(seq)
    labels = np.concatenate([[i] * len(x) for i, x in enumerate(seq)])
    sort_idx = np.argsort(times)
    times = times[sort_idx]
    labels = labels[sort_idx]
    record = [
        {"time": float(t),
         "labels": (int(l),)} for t, l in zip(times, labels)]
    return record


def combine_splits(d_train, d_val, d_test):
    sequences = []
    
    for dataset in ([d_train, d_val, d_test]):
        for i in range(len(dataset)):
            event_dict = {}
            arrival_times = []
            marks = []
            for j in range(len(dataset[i])):
                curr_time = dataset[i][j]['time']
                curr_mark = dataset[i][j]['labels'][0]
                arrival_times.append(curr_time)
                marks.append(curr_mark)

            event_dict['t_start'] = 0
            event_dict['t_end'] = 100
            event_dict['arrival_times'] = arrival_times
            event_dict['marks'] = marks

            sequences.append(event_dict)
            
    return sequences


def dataset_helper(mu, alpha, beta, window, seed, train_size, val_size, test_size, save_path):
    train_seed = seed
    val_seed = seed + train_size
    test_seed = seed + train_size + val_size

    d_train = hawkes_helper(mu, alpha, beta, window, train_seed, train_size)
    d_val = hawkes_helper(mu, alpha, beta, window, val_seed, val_size)
    d_test = hawkes_helper(mu, alpha, beta, window, test_seed, test_size)

    sequences = combine_splits(d_train, d_val, d_test)
    dataset = {'sequences':sequences, 'num_marks':len(mu)}
    torch.save(dataset, save_path)

time: 6.04 ms (started: 2022-08-11 16:41:26 +05:30)


### Hawkes Ind.

In [5]:
mu = [0.1, 0.05]
alpha = [[0.2, 0.0], [0.0, 0.4]]
beta =  [[1.0, 1.0], [1.0, 2.0]]

window = 100  
seed = 0
train_size = 14745
val_size = 4915
test_size = 4916
save_path = '../data/synth/hawkes_ind.pkl'

dataset_helper(mu, alpha, beta, window, seed, train_size, val_size, test_size, save_path)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 14745/14745 [00:30<00:00, 477.14it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 4915/4915 [00:10<00:00, 482.02it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 4916/4916 [00:09<00:00, 503.92it/s]


time: 53.5 s (started: 2022-08-11 16:41:26 +05:30)


### Hawkes Dep. I

In [6]:
mu = [0.1, 0.05]
alpha = [[0.2, 0.1], [0.2, 0.3]]
beta =  [[1.0, 1.0], [1.0, 1.0]]

window = 100  
seed = 0
train_size = 14745
val_size = 4915
test_size = 4917
save_path = '../data/synth/hawkes_dep_I.pkl'

dataset_helper(mu, alpha, beta, window, seed, train_size, val_size, test_size, save_path)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 14745/14745 [00:32<00:00, 456.32it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 4915/4915 [00:11<00:00, 444.66it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 4917/4917 [00:10<00:00, 451.06it/s]


time: 57.2 s (started: 2022-08-11 16:42:19 +05:30)


### Hawkes Dep. II

In [7]:
mu = [0.713, 0.057, 0.844, 0.254, 0.344]

alpha = [[0.689, 0.549, 0.066, 0.819, 0.007], 
         [0.630, 0.000, 0.457, 0.622, 0.141], 
         [0.134, 0.579, 0.821, 0.527, 0.795], 
         [0.199, 0.556, 0.147, 0.030, 0.649], 
         [0.353, 0.557, 0.892, 0.638, 0.836]] 


beta = [[9.325, 9.764, 2.581, 4.007, 9.319],
        [5.759, 8.742, 4.741, 7.320, 9.768],
        [2.841, 4.349, 6.920, 5.640, 3.839],
        [6.710, 7.460, 3.685, 4.052, 6.813], 
        [2.486, 2.214, 8.718, 4.594, 2.642]]

window = 100  
seed = 0
train_size = 18000
val_size = 6000
test_size = 6000
save_path = '../data/synth/hawkes_dep_II.pkl'

dataset_helper(mu, alpha, beta, window, seed, train_size, val_size, test_size, save_path)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 18000/18000 [02:37<00:00, 114.22it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 6000/6000 [00:52<00:00, 114.56it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 6000/6000 [00:52<00:00, 114.02it/s]


time: 5min 20s (started: 2022-08-11 16:43:17 +05:30)
