In [4]:
import numpy as np, pandas as pd
from matplotlib import pyplot as plt
from scipy.optimize import minimize
import scipy.stats as stats
from matplotlib.ticker import FuncFormatter
import scipy.optimize as op
import os
from nltk import flatten

We use the same function in the mutually exciting process model of arrival and departure at station i, to filter the event times in process j that occurred before event time T in station i.

In [5]:
def prev_event(times, T):
    """
    Return the event times in process j that happens before event time T in process i.
    : param times: an n-dimensional array, the full event times in process j
    : param T: a number, one event time in process i
    
    : return k: a number, the index of the last event time in process j happened before T in process i
    """
    
    return np.searchsorted(times, T, side='right')

And again, similar to previous model, we use a dictionary to store all the filtered event times in each process j = 1, ..., m. We take T as the last event time observed in process i for our chosen piece of data. We label i as j=1.

In [6]:
def process_times(location_codes, times, i):
    """
    Return the event times observed at station j = 1, ..., m, before the last event time
    observed in departure process at station i.
    
    : param location_codes: a list of length M, the location codes of all M stations, with i as the first.
    : param times: a dictionary, storing the departure times from station j = 1, ..., M as values, \
    and the location codes of corresponding stations as keys.
    : param i: the index of the station being studied
    
    : return times: a dictionary, storing all relevant event times
    """
    #get_station_index defined in ks_arr files
    ii = get_station_index(location_codes, i)
    T = times[i][-1]
    ind_list = []
    new_times = {}
    
    for j in range(len(location_codes)):
        ind = prev_event(times[location_codes[j]], T)
        if j != ii and ind != 0:
            new_times[location_codes[j]] = times[location_codes[j]][:ind]
        
        if ind == 0:
            ind_list.append(location_codes[j])
        
        if j == ii:
            new_times[location_codes[j]] = times[location_codes[j]]
            
    return new_times, ind_list

For log likelihood, we still follow the previous model, to get function for $A_{ii}(1)$, ..., $A_{ii}(h)$

In [7]:
def A_ii(theta, times_i):
    """
    Finds the array of function A_i, from 1st event to hth event in process i
    
    : param theta: a real number
    : param times_i, a 1-D array, the event times observed in process i
    
    : return A: a 1D array
    """
   
    A = np.zeros(len(times_i))
    
    for h in range(1, len(times_i)):
        A[h] = np.exp(-theta*(times_i[h] - times_i[h-1]))*(1+A[h-1])
        
    return A

And for $A_{ij}(1)$, ..., $A_{ij}(h)$

In [8]:
def A_ij(theta, times_i, times_j):
    """
    Finds the array of function A_ij, for all event times in process j = 1, ..., M
    
    : param theta: a real number
    : param times_i, a 1-D array, the event times observed in process i
    : param times_j, a 1-D array, the event times observed in process j
    
    : return B: a 1-D array
    """
    
    B = np.zeros(len(times_i))
    ind = prev_event(times_j, times_i[0])
    
    B[0] = np.sum(np.exp(- theta * (times_i[0] - times_j[:ind])))
    
    for h in range(1, len(times_i)):
        B[h] = np.exp(-theta * (times_i[h] - times_i[h-1])) * B[h-1] 
        new_ind = prev_event(times_j, times_i[h])
        if ind != new_ind:
            B[h] += np.sum(np.exp(- theta * (times_i[h] - times_j[ind:new_ind])))
            ind = new_ind#prev_event(times_j, times_i[h])
        
    return B

We also write a function to find the excitation terms of each station

In [9]:
def excitation_j(theta, t, times_j):
    """
    Finds the array of function excitation_j, for all event times in process j = 1, ..., M
    
    : param theta: a real number, thetaj
    : param times_i, a 1-D array, the event times observed in process i
    : param times_j, a 1-D array, the event times observed in process j
    
    : return Ej: a 1-D array
    """
     #Ej = 0
    ind = prev_event(times_j, t)
    Ej = np.zeros(ind)
    if ind != 0:
            Ej = np.exp(-theta*(t - times_j[0:ind])) - 1
    #for i in range(ind):
        #Ej += np.exp(-theta*(t - times_j[i])) - 1
    
    return Ej

And log likelihood is hence

In [10]:
def diff_comp(t, location_codes, event_times, beta, theta, lambda_b, dist, k): 
    
    kappa = kappa_fun(dist, k)
    ratio = kappa*beta/theta
    
    ex_terms = 0
    
    for j in range(len(location_codes)):
        times = event_times[location_codes[j]]
        ind = prev_event(times, t)
        if ind != 0:
            ex_terms += ratio[j] * np.sum(excitation_j(theta, t, times[0:ind]))
    
    res = lambda_b*t - ex_terms 
    
    return res

In [11]:
def diff_log_likelihood(location_codes, event_times, beta, theta, lambda_b, k, dist, i): 
    """
    Finds the log-likelihood of the mutually exciting process between stations
    
    : param location_codes: a list of length M, the location codes of all M stations with i as the first
    : param event_times: a dictionary, the event times in all processes \
      that occurred before the last event in process i 
    : param beta: a real number
    : param theta: a real number
    : param k: a real number
    : param dist: a 1-D array of length M, which stores the distances of each station
    : param lambda_b: a real number, the baseline intensity of station i
    : prarm i: the index of the station being studied
    
    : return A: a real number, the baseline intensity of station i
    """
    ind = get_station_index(location_codes, i)
    times_i = event_times[i]
    
    kappa = kappa_fun(dist, k)
    ratio = kappa*beta/theta
    T = times_i[-1]
    
    A = A_ii(theta, times_i)
    A = kappa[ind]*beta*A
    ex_terms = ratio[ind] * np.sum(excitation_j(theta, T, times_i))
    
    for j in range(len(location_codes)):
        if j != ind:
            A += kappa[j] * beta * A_ij(theta, times_i, event_times[location_codes[j]])
            ex_terms += ratio[j] * np.sum(excitation_j(theta, T, event_times[location_codes[j]]))
    
    res = np.sum(np.log(lambda_b +A)) + ex_terms - lambda_b*T
    
    return res

In [12]:
def diff_log_likelihood2(location_codes, event_times, dist, beta, theta, k, lambda_b): 
    """
    Finds the log-likelihood of the mutually exciting process between stations
    
    : param location_codes: a list of length M, the location codes of all M stations with i as the first
    : param event_times: a dictionary, the event times in all processes \
      that occurred before the last event in process i 
    : param beta: a 1-D array of length M, which stores beta1, ..., betaM
    : param theta: a 1-D array of length M, which stores theta1, ..., thetaM
    : param kappa: a 1-D array of length M, which stores the distance coefficient of each station
    : param lambda_b: a real number, the baseline intensity of station i
    
    : return A: a real number, the baseline intensity of station i
    """
    
    times_i = event_times[location_codes[0]]
    ex_terms = []
    
    kappa = kappa_fun(dist, k)
    ratio = kappa*beta/theta
    T = times_i[-1]
    
    A = A_ii(theta, times_i)
    A = kappa[0]*beta*A
    ex_terms.append(ratio[0] * np.sum(excitation_j(theta, T, times_i)))
    
    for j in range(1, len(location_codes)):
        if len(event_times[location_codes[j]]) > 0:
            A += kappa[j] * beta * A_ij(theta, times_i, event_times[location_codes[j]])
            ex_terms.append(ratio[j] * np.sum(excitation_j(theta, T, event_times[location_codes[j]])))
    
    res = np.sum(np.log(lambda_b +A)) + ex_terms - lambda_b*T
    
    return np.log(lambda_b +A), np.array(ex_terms), lambda_b*T

We also need to define a function that, for a given station, it selects all the stations within a neighbourhood of the given station, with a threshold distance.

In [13]:
def thres_fun(dist, thres, loc_codes):
    new_dist = dist.copy()
    new_loc_codes = np.asarray(loc_codes).copy()
    
    index = [i for i in range(len(dist)) if dist[i] > thres]
    new_dist[index] = -1
    new_loc_codes[index] = -1
    new_dist = [i for i in new_dist if i != -1]
    new_loc_codes = [i for i in new_loc_codes if i != -1]
    
    return np.asarray(new_dist), new_loc_codes

And we define the kappa function.

In [14]:
def kappa_fun(dist, k):
    
    return np.exp(-k * dist)

Some tests.

In [14]:
dist = np.array([1, 2, 3, 4, 5])
k = 1
thres = 3
loc_codes = [4, 5, 6, 7, 8]
thres_fun(dist, thres, loc_codes)

(array([1, 2, 3]), [4, 5, 6])