Import dependencies and configure settings.

In [1]:
import os
import csv
import random
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import scipy.stats as stats
import statsmodels.api as sm

import hawkes
import markDistributions as md
import scipy.optimize as op

from pylab import rcParams
from __future__ import division

%matplotlib inline
np.set_printoptions(precision=10)

Load data.

In [2]:
dat = 'GOOG_2012-06-21_34200000_57600000_message_10'
temp = ([],[],[],[],[],[])
with open('../../data/'+dat+'.csv') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        temp[0].append(float(row[0]))
        temp[1].append(int(row[1]))
        temp[2].append(int(row[2]))
        temp[3].append(int(row[3]))
        temp[4].append(int(row[4]))
        temp[5].append(int(row[5]))

orders = {'Time':None, 'Type':None, 'Order ID':None,
        'Volume':None, 'Price':None, 'Direction':None}
orders['Time'] = np.asarray(temp[0])
orders['Type'] = np.asarray(temp[1])
orders['Order ID'] = np.asarray(temp[2])
orders['Volume'] = np.asarray(temp[3])
orders['Price'] = np.asarray(temp[4])
orders['Direction'] = np.asarray(temp[5])
del temp

In [3]:
ttemp = len(orders['Type'])
print("{0} order messages total:".format(len(orders['Type'])))
temp = len(orders['Type'][orders['Type']==1])
print("- {0} LO submissions ({1:.2f}%)".format(temp,100*temp/ttemp))
temp = len(orders['Type'][orders['Type']==2])
print("- {0} partial LO cancellations ({1:.2f}%)".format(temp,100*temp/ttemp))
temp = len(orders['Type'][orders['Type']==3])
print("- {0} LO cancellations ({1:.2f}%)".format(temp,100*temp/ttemp))
temp = len(orders['Type'][orders['Type']==4])
print("- {0} visible LO execution ({1:.2f}%)".format(temp,100*temp/ttemp))
temp = len(orders['Type'][orders['Type']==5])
print("- {0} hidden LO execution ({1:.2f}%)".format(temp,100*temp/ttemp))
temp = len(orders['Type'][orders['Type']==7])
print("- {0} trading halts ({1:.2f}%)".format(temp,100*temp/ttemp))
del ttemp
del temp

147916 order messages total:
- 71258 LO submissions (48.17%)
- 19 partial LO cancellations (0.01%)
- 64961 LO cancellations (43.92%)
- 7765 visible LO execution (5.25%)
- 3913 hidden LO execution (2.65%)
- 0 trading halts (0.00%)


Extract data into usable form.

In [4]:
LTC_mask = (orders['Type']==1) | (orders['Type']==3) | (orders['Type']==4)
t = orders['Time'][LTC_mask]
x = orders['Volume'][LTC_mask]

j = orders['Type'][LTC_mask]
j[(j==1) & (orders['Direction'][LTC_mask]==-1)] = 0 # limit ask orders
j[(j==1) & (orders['Direction'][LTC_mask]==1)] = 1 # limit bid orders
j[(j==3) & (orders['Direction'][LTC_mask]==-1)] = 2 # limit ask orders cancellation
j[(j==3) & (orders['Direction'][LTC_mask]==1)] = 3 # limit bid orders cancellation
j[(j==4) & (orders['Direction'][LTC_mask]==1)] = 4 # market ask orders
j[(j==4) & (orders['Direction'][LTC_mask]==-1)] = 5 # market bid orders

Initialise a 6-D Hawkes point process and set the mark distributions.

In [5]:
d = 6
h = hawkes.Hawkes(d=d)

Fit the Pareto mark distribution to the volumes of each component. This is done separately in "mark_distribution_GOOG_10.ipynb".

In [None]:
markDistParam = [[128.56423972,3.275861426],
                [243.9044504278,4.3688862849],
                [97.8188721767,2.4801852406],
                [339.2756753572,5.8696885604],
                [460.4339503846,8.0904052855],
                [1115.865179198,18.8074665174]]
h.setMarkDistParam(markDistParam)

Fit model to data.

In [None]:
result = h.MLE(t,j,x,method='L-BFGS-B')

The fitted parameters and stability check:

In [None]:
arr = result["x"]

np.set_printoptions(suppress=True)

print("""eta: {0}\n""".format(arr[:6]))
print("""Q: {0}\n""".format(arr[6:15]))
print("""alpha: {0}\n""".format(arr[15:18]))
print("""alph: {0}\n""".format(arr[18:21]))
print("""beta: {0}\n""".format(arr[21:23]))

print("""Spr(Q) of estimated model: {0}""".format(h.ibParam.getSpectralRadius()))