In [1]:
%load_ext autoreload

%autoreload 2
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.metrics import roc_auc_score, roc_curve
from ast import literal_eval

import os
from sklearn import preprocessing
from sklearn.linear_model import LogisticRegressionCV, LinearRegression

import warnings
import numpy as np
from collections import OrderedDict

from lob_data_utils import lob, db_result, overview, roc_results, model
from lob_data_utils.svm_calculation import lob_svm

sns.set_style('whitegrid')
warnings.filterwarnings('ignore')

  """)


In [2]:
data_length = 10000 # TODO: not used
stock = '9265'

In [3]:
df, df_test = lob.load_prepared_data(
    stock, data_dir='../queue_imbalance/data/prepared', cv=False, include_test=True, length=None)
print(len(df), len(df_test))

12789 3197


In [4]:
def prepare_df(df):
    df.rename(columns={'Unnamed: 0': 'datetime'}, inplace=True)
    df.drop(columns=['bid', 'ask'], inplace=True)
    df.index = df['datetime']
    df = df.sort_index()
    df['prev_queue_imbalance'] = [None] + df['queue_imbalance'].iloc[0:len(df)-1].values.tolist()
    df.dropna(inplace=True)
    return df
df = prepare_df(df)
df_test = prepare_df(df_test)
df.head()

Unnamed: 0_level_0,datetime,bid_price,ask_price,mid_price,sum_sell_ask,sum_buy_bid,mid_price_indicator,queue_imbalance,prev_queue_imbalance
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2013-09-16 13:54:00,2013-09-16 13:54:00,3356.5,3357.0,3356.75,50.0,909.0,0.0,0.895725,0.308792
2013-09-16 13:55:00,2013-09-16 13:55:00,3356.0,3357.0,3356.5,1253.0,1093.0,1.0,-0.068201,0.895725
2013-09-16 13:56:00,2013-09-16 13:56:00,3358.0,3358.5,3358.25,2362.0,586.0,1.0,-0.602442,-0.068201
2013-09-16 13:57:00,2013-09-16 13:57:00,3358.5,3359.5,3359.0,1125.0,1929.0,0.0,0.263261,-0.602442
2013-09-16 13:58:00,2013-09-16 13:58:00,3358.5,3359.0,3358.75,304.0,2133.0,0.0,0.750513,0.263261


## Let's try to fit Logistic Regression

In [13]:
def convert_scores(df, column):
    scores = []
    for i, row in df.iterrows():
        scores.append(np.mean(row[column]))
    return scores
scores_columns = ['f1', 'kappa', 'matthews', 'precision', 'recall', 'roc_auc', 'train_f1', 'train_kappa',
       'train_matthews', 'train_precision', 'train_recall', 'train_roc_auc']

In [14]:
df_result = pd.DataFrame()
features = ['queue_imbalance', 'prev_queue_imbalance']
res = model.validate_model(LogisticRegressionCV(), 
                           df[features], df['mid_price_indicator'])
res['features'] = ', '.join(features)
res['method'] = 'logistic'
df_result = df_result.append(res, ignore_index=True)

features = ['queue_imbalance']
res = model.validate_model(LogisticRegressionCV(), 
                           df[features], df['mid_price_indicator'])
res['features'] = ', '.join(features)
res['method'] = 'logistic'
df_result = df_result.append(res, ignore_index=True)

for col in scores_columns:
    df_result[col] = convert_scores(df_result, col)
df_result

Unnamed: 0,f1,features,kappa,matthews,method,precision,recall,roc_auc,train_f1,train_kappa,train_matthews,train_precision,train_recall,train_roc_auc
0,0.568289,"queue_imbalance, prev_queue_imbalance",0.102187,0.103514,logistic,0.558478,0.583579,0.551004,0.56851,0.108828,0.110824,0.550162,0.594969,0.554291
1,0.571894,queue_imbalance,0.106583,0.108163,logistic,0.559988,0.589789,0.553134,0.569199,0.108485,0.110602,0.549828,0.597351,0.554113


In [6]:
gammas = [0.1, 1, 10, 100]
cs = [0.1, 1, 10, 100]
coef0s = [0.1, 1, 10, 100]

for g in gammas:
    for c in cs:
        for coef in coef0s:
            clf = SVC(kernel='sigmoid', gamma=g, C=c, coef0=coef)
            features = ['queue_imbalance', 'prev_queue_imbalance']
            res = model.validate_model(clf, df[features], df['mid_price_indicator'])
            res['features'] = ', '.join(features) 
            res['method'] = 'svm_sigmoid'
            res['gamma'] = g
            res['coef0'] = coef
            res['c'] = c
            df_result = df_result.append(res, ignore_index=True)

            clf = SVC(kernel='sigmoid', gamma=g, C=c, coef0=coef)
            features = ['queue_imbalance']
            res = model.validate_model(clf, df[features], df['mid_price_indicator'])
            res['features'] = ', '.join(features) 
            res['method'] = 'svm_sigmoid'
            res['gamma'] = g
            res['coef0'] = coef
            res['c'] = c
            df_result = df_result.append(res, ignore_index=True)
df_result

Unnamed: 0,f1,features,kappa,matthews,method,precision,recall,roc_auc,train_f1,train_kappa,train_matthews,train_precision,train_recall,train_roc_auc,c,coef0,gamma
0,"[0.5019083969465649, 0.5253940455341506, 0.550...","queue_imbalance, prev_queue_imbalance","[0.09852330959373179, 0.06685197148123356, 0.1...","[0.09979087844087672, 0.0668523678054315, 0.10...",logistic,"[0.5502092050209205, 0.5263157894736842, 0.570...","[0.4614035087719298, 0.5244755244755245, 0.531...","[0.549113916548127, 0.5334242029157283, 0.5540...","[0.5237651444547997, 0.5229911751045052, 0.517...","[0.11758027171861574, 0.1137043864196412, 0.09...","[0.11819532735077906, 0.11430876547604042, 0.0...","[0.5542406311637081, 0.5535889872173058, 0.544...","[0.49646643109540634, 0.49559859154929575, 0.4...","[0.5586359001114616, 0.5567218546972068, 0.548...",,,
1,"[0.5019083969465649, 0.5300353356890459, 0.549...",queue_imbalance,"[0.09852330959373179, 0.08382438376628765, 0.1...","[0.09979087844087672, 0.08384228884321497, 0.1...",logistic,"[0.5502092050209205, 0.5357142857142857, 0.570...","[0.4614035087719298, 0.5244755244755245, 0.530...","[0.549113916548127, 0.5418987791869149, 0.5531...","[0.5232774674115457, 0.51986909770921, 0.51267...","[0.11589283909815551, 0.11346472427959087, 0.0...","[0.11647848454729114, 0.11421999882434147, 0.0...","[0.5531496062992126, 0.5543369890329013, 0.543...","[0.49646643109540634, 0.4894366197183099, 0.48...","[0.5577969739369649, 0.5565869967278418, 0.546...",,,
2,"[0.0, 0.5032021957913998, 0.5614035087719298, ...","queue_imbalance, prev_queue_imbalance","[0.0, 0.06390699689336876, 0.09556566989370052...","[0.0, 0.06415543824732879, 0.09556581170418815...",svm_sigmoid,"[0.0, 0.527831094049904, 0.5609348914858097, 0...","[0.0, 0.4807692307692308, 0.5618729096989966, ...","[0.5, 0.531910039113429, 0.547780426480704, 0....","[0.0, 0.49462365591397844, 0.5312777942484436,...","[0.0, 0.10581591614745045, 0.09212738430458212...","[0.0, 0.10789664507131229, 0.09215548262756261...","[0.0, 0.5560439560439561, 0.5381381381381382, ...","[0.0, 0.4454225352112676, 0.5245901639344263, ...","[0.5, 0.5526775975719638, 0.546040863744491, 0...",0.1,0.1,0.1
3,"[0.0, 0.4938967136150235, 0.5614035087719298, ...",queue_imbalance,"[0.0, 0.07010740299730667, 0.09556566989370052...","[0.0, 0.07076775986409495, 0.09556581170418815...",svm_sigmoid,"[0.0, 0.5334685598377282, 0.5609348914858097, ...","[0.0, 0.4597902097902098, 0.5618729096989966, ...","[0.5, 0.5349798506578167, 0.547780426480704, 0...","[0.0, 0.47600000000000003, 0.5323058684054536,...","[0.0, 0.09290158033990426, 0.09386055135578908...","[0.0, 0.09558844947424322, 0.09388786107756153...","[0.0, 0.5509259259259259, 0.539015606242497, 0...","[0.0, 0.41901408450704225, 0.5257611241217799,...","[0.5, 0.5462073789538578, 0.5469075586863118, ...",0.1,0.1,0.1
4,"[0.0, 0.0, 0.17604617604617603, 0.604465709728...","queue_imbalance, prev_queue_imbalance","[0.0, 0.0, 0.04069133631944677, 0.142266961133...","[0.0, 0.0, 0.07610648600210718, 0.142719231116...",svm_sigmoid,"[0.0, 0.0, 0.6421052631578947, 0.5830769230769...","[0.0, 0.0, 0.1020066889632107, 0.6274834437086...","[0.5, 0.5, 0.5208615005099742, 0.5709101806356...","[0.0, 0.0, 0.1830776843146957, 0.5481099656357...","[0.0, 0.0, 0.03691634434951285, 0.094742813218...","[0.0, 0.0, 0.06351585079514463, 0.094759795297...","[0.0, 0.0, 0.5910543130990416, 0.5429787234042...","[0.0, 0.0, 0.10831381733021077, 0.553339115351...","[0.5, 0.5, 0.5181614081026756, 0.5473783535765...",0.1,1.0,0.1
5,"[0.0, 0.0, 0.17366136034732274, 0.607313195548...",queue_imbalance,"[0.0, 0.0, 0.04078564262108331, 0.145491571444...","[0.0, 0.0, 0.07703417017597919, 0.146038803505...",svm_sigmoid,"[0.0, 0.0, 0.6451612903225806, 0.5840978593272...","[0.0, 0.0, 0.10033444816053512, 0.632450331125...","[0.5, 0.5, 0.5209119049313313, 0.5724975669966...","[0.0, 0.0, 0.18325903912828134, 0.549078439777...","[0.0, 0.0, 0.03806047600193985, 0.094772979385...","[0.0, 0.0, 0.0656723592030615, 0.0947985676872...","[0.0, 0.0, 0.594855305466238, 0.54279661016949...","[0.0, 0.0, 0.10831381733021077, 0.555507372072...","[0.5, 0.5, 0.5187238377989637, 0.5473950182311...",0.1,1.0,0.1
6,"[0.0, 0.0, 0.0, 0.0, 0.6795454545454546, 0.700...","queue_imbalance, prev_queue_imbalance","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.0, 0.0, 0.0, 0.0, 0.5146299483648882, 0.538...","[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.0, 0.0, 0.0, 0.0, 0.6674311926605505, 0.669...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.5008605851979346, 0.503...","[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.1,10.0,0.1
7,"[0.0, 0.0, 0.0, 0.0, 0.6795454545454546, 0.700...",queue_imbalance,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.0, 0.0, 0.0, 0.0, 0.5146299483648882, 0.538...","[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.0, 0.0, 0.0, 0.0, 0.6674311926605505, 0.669...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.5008605851979346, 0.503...","[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.1,10.0,0.1
8,"[0.0, 0.0, 0.0, 0.0, 0.6795454545454546, 0.700...","queue_imbalance, prev_queue_imbalance","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.0, 0.0, 0.0, 0.0, 0.5146299483648882, 0.538...","[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.0, 0.0, 0.0, 0.0, 0.6674311926605505, 0.669...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.5008605851979346, 0.503...","[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.1,100.0,0.1
9,"[0.0, 0.0, 0.0, 0.0, 0.6795454545454546, 0.700...",queue_imbalance,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.0, 0.0, 0.0, 0.0, 0.5146299483648882, 0.538...","[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.0, 0.0, 0.0, 0.0, 0.6674311926605505, 0.669...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.5008605851979346, 0.503...","[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.1,100.0,0.1


In [7]:
gammas = [0.1, 1, 10, 100]
cs = [0.1, 1, 10, 100]

for g in gammas:
    for c in cs:
        clf = SVC(kernel='rbf', gamma=g, C=c)
        features = ['queue_imbalance', 'prev_queue_imbalance']
        res = model.validate_model(clf, df[features], df['mid_price_indicator'])
        res['features'] = ', '.join(features) 
        res['method'] = 'svm_rbf'
        res['gamma'] = g
        res['c'] = c
        df_result = df_result.append(res, ignore_index=True)

        clf = SVC(kernel='sigmoid', gamma=g, C=c)
        features = ['queue_imbalance']
        res = model.validate_model(clf, df[features], df['mid_price_indicator'])
        res['features'] = ', '.join(features) 
        res['method'] = 'svm_rbf'
        res['gamma'] = g
        res['c'] = c
        df_result = df_result.append(res, ignore_index=True)
df_result

Unnamed: 0,f1,features,kappa,matthews,method,precision,recall,roc_auc,train_f1,train_kappa,train_matthews,train_precision,train_recall,train_roc_auc,c,coef0,gamma
0,"[0.5019083969465649, 0.5253940455341506, 0.550...","queue_imbalance, prev_queue_imbalance","[0.09852330959373179, 0.06685197148123356, 0.1...","[0.09979087844087672, 0.0668523678054315, 0.10...",logistic,"[0.5502092050209205, 0.5263157894736842, 0.570...","[0.4614035087719298, 0.5244755244755245, 0.531...","[0.549113916548127, 0.5334242029157283, 0.5540...","[0.5237651444547997, 0.5229911751045052, 0.517...","[0.11758027171861574, 0.1137043864196412, 0.09...","[0.11819532735077906, 0.11430876547604042, 0.0...","[0.5542406311637081, 0.5535889872173058, 0.544...","[0.49646643109540634, 0.49559859154929575, 0.4...","[0.5586359001114616, 0.5567218546972068, 0.548...",,,
1,"[0.5019083969465649, 0.5300353356890459, 0.549...",queue_imbalance,"[0.09852330959373179, 0.08382438376628765, 0.1...","[0.09979087844087672, 0.08384228884321497, 0.1...",logistic,"[0.5502092050209205, 0.5357142857142857, 0.570...","[0.4614035087719298, 0.5244755244755245, 0.530...","[0.549113916548127, 0.5418987791869149, 0.5531...","[0.5232774674115457, 0.51986909770921, 0.51267...","[0.11589283909815551, 0.11346472427959087, 0.0...","[0.11647848454729114, 0.11421999882434147, 0.0...","[0.5531496062992126, 0.5543369890329013, 0.543...","[0.49646643109540634, 0.4894366197183099, 0.48...","[0.5577969739369649, 0.5565869967278418, 0.546...",,,
2,"[0.0, 0.5032021957913998, 0.5614035087719298, ...","queue_imbalance, prev_queue_imbalance","[0.0, 0.06390699689336876, 0.09556566989370052...","[0.0, 0.06415543824732879, 0.09556581170418815...",svm_sigmoid,"[0.0, 0.527831094049904, 0.5609348914858097, 0...","[0.0, 0.4807692307692308, 0.5618729096989966, ...","[0.5, 0.531910039113429, 0.547780426480704, 0....","[0.0, 0.49462365591397844, 0.5312777942484436,...","[0.0, 0.10581591614745045, 0.09212738430458212...","[0.0, 0.10789664507131229, 0.09215548262756261...","[0.0, 0.5560439560439561, 0.5381381381381382, ...","[0.0, 0.4454225352112676, 0.5245901639344263, ...","[0.5, 0.5526775975719638, 0.546040863744491, 0...",0.1,0.1,0.1
3,"[0.0, 0.4938967136150235, 0.5614035087719298, ...",queue_imbalance,"[0.0, 0.07010740299730667, 0.09556566989370052...","[0.0, 0.07076775986409495, 0.09556581170418815...",svm_sigmoid,"[0.0, 0.5334685598377282, 0.5609348914858097, ...","[0.0, 0.4597902097902098, 0.5618729096989966, ...","[0.5, 0.5349798506578167, 0.547780426480704, 0...","[0.0, 0.47600000000000003, 0.5323058684054536,...","[0.0, 0.09290158033990426, 0.09386055135578908...","[0.0, 0.09558844947424322, 0.09388786107756153...","[0.0, 0.5509259259259259, 0.539015606242497, 0...","[0.0, 0.41901408450704225, 0.5257611241217799,...","[0.5, 0.5462073789538578, 0.5469075586863118, ...",0.1,0.1,0.1
4,"[0.0, 0.0, 0.17604617604617603, 0.604465709728...","queue_imbalance, prev_queue_imbalance","[0.0, 0.0, 0.04069133631944677, 0.142266961133...","[0.0, 0.0, 0.07610648600210718, 0.142719231116...",svm_sigmoid,"[0.0, 0.0, 0.6421052631578947, 0.5830769230769...","[0.0, 0.0, 0.1020066889632107, 0.6274834437086...","[0.5, 0.5, 0.5208615005099742, 0.5709101806356...","[0.0, 0.0, 0.1830776843146957, 0.5481099656357...","[0.0, 0.0, 0.03691634434951285, 0.094742813218...","[0.0, 0.0, 0.06351585079514463, 0.094759795297...","[0.0, 0.0, 0.5910543130990416, 0.5429787234042...","[0.0, 0.0, 0.10831381733021077, 0.553339115351...","[0.5, 0.5, 0.5181614081026756, 0.5473783535765...",0.1,1.0,0.1
5,"[0.0, 0.0, 0.17366136034732274, 0.607313195548...",queue_imbalance,"[0.0, 0.0, 0.04078564262108331, 0.145491571444...","[0.0, 0.0, 0.07703417017597919, 0.146038803505...",svm_sigmoid,"[0.0, 0.0, 0.6451612903225806, 0.5840978593272...","[0.0, 0.0, 0.10033444816053512, 0.632450331125...","[0.5, 0.5, 0.5209119049313313, 0.5724975669966...","[0.0, 0.0, 0.18325903912828134, 0.549078439777...","[0.0, 0.0, 0.03806047600193985, 0.094772979385...","[0.0, 0.0, 0.0656723592030615, 0.0947985676872...","[0.0, 0.0, 0.594855305466238, 0.54279661016949...","[0.0, 0.0, 0.10831381733021077, 0.555507372072...","[0.5, 0.5, 0.5187238377989637, 0.5473950182311...",0.1,1.0,0.1
6,"[0.0, 0.0, 0.0, 0.0, 0.6795454545454546, 0.700...","queue_imbalance, prev_queue_imbalance","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.0, 0.0, 0.0, 0.0, 0.5146299483648882, 0.538...","[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.0, 0.0, 0.0, 0.0, 0.6674311926605505, 0.669...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.5008605851979346, 0.503...","[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.1,10.0,0.1
7,"[0.0, 0.0, 0.0, 0.0, 0.6795454545454546, 0.700...",queue_imbalance,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.0, 0.0, 0.0, 0.0, 0.5146299483648882, 0.538...","[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.0, 0.0, 0.0, 0.0, 0.6674311926605505, 0.669...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.5008605851979346, 0.503...","[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.1,10.0,0.1
8,"[0.0, 0.0, 0.0, 0.0, 0.6795454545454546, 0.700...","queue_imbalance, prev_queue_imbalance","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.0, 0.0, 0.0, 0.0, 0.5146299483648882, 0.538...","[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.0, 0.0, 0.0, 0.0, 0.6674311926605505, 0.669...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.5008605851979346, 0.503...","[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.1,100.0,0.1
9,"[0.0, 0.0, 0.0, 0.0, 0.6795454545454546, 0.700...",queue_imbalance,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.0, 0.0, 0.0, 0.0, 0.5146299483648882, 0.538...","[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.0, 0.0, 0.0, 0.0, 0.6674311926605505, 0.669...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.5008605851979346, 0.503...","[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.1,100.0,0.1


In [8]:
cs = [0.1, 1, 10, 100]

for c in cs:
    clf = SVC(kernel='linear', C=c)
    features = ['queue_imbalance', 'prev_queue_imbalance']
    res = model.validate_model(clf, df[features], df['mid_price_indicator'])
    res['features'] = ', '.join(features) 
    res['method'] = 'svm_linear'
    res['c'] = c
    df_result = df_result.append(res, ignore_index=True)

    clf = SVC(kernel='sigmoid', gamma=g, C=c)
    features = ['queue_imbalance']
    res = model.validate_model(clf, df[features], df['mid_price_indicator'])
    res['features'] = ', '.join(features) 
    res['method'] = 'svm_linear'
    res['c'] = c
    df_result = df_result.append(res, ignore_index=True)
df_result

Unnamed: 0,f1,features,kappa,matthews,method,precision,recall,roc_auc,train_f1,train_kappa,train_matthews,train_precision,train_recall,train_roc_auc,c,coef0,gamma
0,"[0.5019083969465649, 0.5253940455341506, 0.550...","queue_imbalance, prev_queue_imbalance","[0.09852330959373179, 0.06685197148123356, 0.1...","[0.09979087844087672, 0.0668523678054315, 0.10...",logistic,"[0.5502092050209205, 0.5263157894736842, 0.570...","[0.4614035087719298, 0.5244755244755245, 0.531...","[0.549113916548127, 0.5334242029157283, 0.5540...","[0.5237651444547997, 0.5229911751045052, 0.517...","[0.11758027171861574, 0.1137043864196412, 0.09...","[0.11819532735077906, 0.11430876547604042, 0.0...","[0.5542406311637081, 0.5535889872173058, 0.544...","[0.49646643109540634, 0.49559859154929575, 0.4...","[0.5586359001114616, 0.5567218546972068, 0.548...",,,
1,"[0.5019083969465649, 0.5300353356890459, 0.549...",queue_imbalance,"[0.09852330959373179, 0.08382438376628765, 0.1...","[0.09979087844087672, 0.08384228884321497, 0.1...",logistic,"[0.5502092050209205, 0.5357142857142857, 0.570...","[0.4614035087719298, 0.5244755244755245, 0.530...","[0.549113916548127, 0.5418987791869149, 0.5531...","[0.5232774674115457, 0.51986909770921, 0.51267...","[0.11589283909815551, 0.11346472427959087, 0.0...","[0.11647848454729114, 0.11421999882434147, 0.0...","[0.5531496062992126, 0.5543369890329013, 0.543...","[0.49646643109540634, 0.4894366197183099, 0.48...","[0.5577969739369649, 0.5565869967278418, 0.546...",,,
2,"[0.0, 0.5032021957913998, 0.5614035087719298, ...","queue_imbalance, prev_queue_imbalance","[0.0, 0.06390699689336876, 0.09556566989370052...","[0.0, 0.06415543824732879, 0.09556581170418815...",svm_sigmoid,"[0.0, 0.527831094049904, 0.5609348914858097, 0...","[0.0, 0.4807692307692308, 0.5618729096989966, ...","[0.5, 0.531910039113429, 0.547780426480704, 0....","[0.0, 0.49462365591397844, 0.5312777942484436,...","[0.0, 0.10581591614745045, 0.09212738430458212...","[0.0, 0.10789664507131229, 0.09215548262756261...","[0.0, 0.5560439560439561, 0.5381381381381382, ...","[0.0, 0.4454225352112676, 0.5245901639344263, ...","[0.5, 0.5526775975719638, 0.546040863744491, 0...",0.1,0.1,0.1
3,"[0.0, 0.4938967136150235, 0.5614035087719298, ...",queue_imbalance,"[0.0, 0.07010740299730667, 0.09556566989370052...","[0.0, 0.07076775986409495, 0.09556581170418815...",svm_sigmoid,"[0.0, 0.5334685598377282, 0.5609348914858097, ...","[0.0, 0.4597902097902098, 0.5618729096989966, ...","[0.5, 0.5349798506578167, 0.547780426480704, 0...","[0.0, 0.47600000000000003, 0.5323058684054536,...","[0.0, 0.09290158033990426, 0.09386055135578908...","[0.0, 0.09558844947424322, 0.09388786107756153...","[0.0, 0.5509259259259259, 0.539015606242497, 0...","[0.0, 0.41901408450704225, 0.5257611241217799,...","[0.5, 0.5462073789538578, 0.5469075586863118, ...",0.1,0.1,0.1
4,"[0.0, 0.0, 0.17604617604617603, 0.604465709728...","queue_imbalance, prev_queue_imbalance","[0.0, 0.0, 0.04069133631944677, 0.142266961133...","[0.0, 0.0, 0.07610648600210718, 0.142719231116...",svm_sigmoid,"[0.0, 0.0, 0.6421052631578947, 0.5830769230769...","[0.0, 0.0, 0.1020066889632107, 0.6274834437086...","[0.5, 0.5, 0.5208615005099742, 0.5709101806356...","[0.0, 0.0, 0.1830776843146957, 0.5481099656357...","[0.0, 0.0, 0.03691634434951285, 0.094742813218...","[0.0, 0.0, 0.06351585079514463, 0.094759795297...","[0.0, 0.0, 0.5910543130990416, 0.5429787234042...","[0.0, 0.0, 0.10831381733021077, 0.553339115351...","[0.5, 0.5, 0.5181614081026756, 0.5473783535765...",0.1,1.0,0.1
5,"[0.0, 0.0, 0.17366136034732274, 0.607313195548...",queue_imbalance,"[0.0, 0.0, 0.04078564262108331, 0.145491571444...","[0.0, 0.0, 0.07703417017597919, 0.146038803505...",svm_sigmoid,"[0.0, 0.0, 0.6451612903225806, 0.5840978593272...","[0.0, 0.0, 0.10033444816053512, 0.632450331125...","[0.5, 0.5, 0.5209119049313313, 0.5724975669966...","[0.0, 0.0, 0.18325903912828134, 0.549078439777...","[0.0, 0.0, 0.03806047600193985, 0.094772979385...","[0.0, 0.0, 0.0656723592030615, 0.0947985676872...","[0.0, 0.0, 0.594855305466238, 0.54279661016949...","[0.0, 0.0, 0.10831381733021077, 0.555507372072...","[0.5, 0.5, 0.5187238377989637, 0.5473950182311...",0.1,1.0,0.1
6,"[0.0, 0.0, 0.0, 0.0, 0.6795454545454546, 0.700...","queue_imbalance, prev_queue_imbalance","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.0, 0.0, 0.0, 0.0, 0.5146299483648882, 0.538...","[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.0, 0.0, 0.0, 0.0, 0.6674311926605505, 0.669...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.5008605851979346, 0.503...","[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.1,10.0,0.1
7,"[0.0, 0.0, 0.0, 0.0, 0.6795454545454546, 0.700...",queue_imbalance,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.0, 0.0, 0.0, 0.0, 0.5146299483648882, 0.538...","[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.0, 0.0, 0.0, 0.0, 0.6674311926605505, 0.669...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.5008605851979346, 0.503...","[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.1,10.0,0.1
8,"[0.0, 0.0, 0.0, 0.0, 0.6795454545454546, 0.700...","queue_imbalance, prev_queue_imbalance","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.0, 0.0, 0.0, 0.0, 0.5146299483648882, 0.538...","[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.0, 0.0, 0.0, 0.0, 0.6674311926605505, 0.669...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.5008605851979346, 0.503...","[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.1,100.0,0.1
9,"[0.0, 0.0, 0.0, 0.0, 0.6795454545454546, 0.700...",queue_imbalance,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.0, 0.0, 0.0, 0.0, 0.5146299483648882, 0.538...","[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.0, 0.0, 0.0, 0.0, 0.6674311926605505, 0.669...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.5008605851979346, 0.503...","[0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.1,100.0,0.1


In [9]:
df_result.to_csv('res_{}_prev_queue_imbalance.csv'.format(stock))