In [1]:
from collections import defaultdict

import gzip


def get_graph_stamps(path, top=None):
    count = defaultdict(int)
    srcs = set()
    with gzip.open(path, 'r') as in_file:
        for line in in_file:
            if b',' in line:
                spl = line.split(b',')
            else:
                spl = line.split()
            src, dst = spl[:2]
            count[dst] += 1
            srcs.add(src)

    if top is None:
        valid = srcs
    else:
        valid = set()
        for v, k in sorted(((v, k) for k, v in count.items()), reverse=True):
            if k in srcs:
                valid.add(k)
                if len(valid) == top:
                    break

    graph = {}
    ids = {}
    with gzip.open(path, 'r') as in_file:
        timestamps = []
        for line in in_file:
            if b',' in line:
                spl = line.split(b',')
            else:
                spl = line.split()
            src, dst = spl[:2]
            stamp = float(spl[-1])
            if src not in valid:
                continue
            if dst not in valid:
                continue

            if src not in graph:
                graph[src] = {}
            if dst not in graph[src]:
                graph[src][dst] = 0
            graph[src][dst] += 1

            if dst in ids:
                timestamps[ids[dst]].append(stamp)
            else:
                ids[dst] = len(timestamps)
                timestamps.append([stamp])

    for id_ in list(graph.keys()):
        if id_ not in ids:
            del graph[id_]
    for id_ in ids:
        if id_ not in graph:
            graph[id_] = {}

    return timestamps, graph, ids


In [65]:
timestamps, graph, ids = get_graph_stamps('../../data/soc-sign-bitcoinalpha.csv.gz', top=None)
events = [np.array(sorted(timestamps[i])) for i in range(len(timestamps))]

In [66]:
dim = len(events)

min_time = min(map(min, events))
events = [(ev - min_time + 1.0) / 700019.0 for ev in events]
end_time = max(map(max, events)) + 1e-5

In [68]:
from tick.hawkes import HawkesADM4

model = HawkesADM4(decay=0.1, max_iter=300, verbose=True, print_every=1)
model.fit(events)

Launching the solver HawkesADM4...
  n_iter  |    obj    |  rel_obj  | rel_baseline | rel_adjacency
        1 | -2.44e+01 |  1.00e+00 |     1.00e+00 |      9.99e-01
        2 | -2.35e+01 |  3.73e-02 |     6.38e-02 |      4.03e-01
        3 | -2.28e+01 |  2.73e-02 |     4.53e-03 |      2.63e-01
        4 | -2.24e+01 |  1.96e-02 |     3.34e-03 |      1.69e-01
        5 | -2.21e+01 |  1.55e-02 |     2.08e-03 |      1.29e-01
        6 | -2.18e+01 |  1.25e-02 |     1.45e-03 |      9.97e-02
        7 | -2.15e+01 |  1.04e-02 |     1.10e-03 |      8.13e-02
        8 | -2.14e+01 |  8.92e-03 |     8.66e-04 |      6.93e-02
        9 | -2.12e+01 |  7.73e-03 |     6.22e-04 |      5.95e-02
       10 | -2.10e+01 |  6.84e-03 |     3.81e-04 |      5.26e-02
       11 | -2.09e+01 |  6.15e-03 |     2.46e-04 |      4.71e-02
       12 | -2.08e+01 |  5.59e-03 |     1.70e-04 |      4.27e-02
       13 | -2.07e+01 |  5.12e-03 |     1.25e-04 |      3.90e-02
       14 | -2.06e+01 |  4.69e-03 |     1.02e-04 |     

KeyboardInterrupt: 