In [1]:
%load_ext autoreload

%autoreload 2
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.metrics import roc_auc_score, roc_curve
from ast import literal_eval

import os
from sklearn import preprocessing
from sklearn.linear_model import LogisticRegressionCV, LinearRegression

import warnings
import numpy as np
from collections import OrderedDict

from lob_data_utils import lob, db_result, overview, roc_results, model
from lob_data_utils.svm_calculation import lob_svm

sns.set_style('whitegrid')
warnings.filterwarnings('ignore')

In [2]:
data_length = 10000 # TODO: not used
stock = '9064'

In [3]:
df, df_test = lob.load_prepared_data(
    stock, data_dir='../queue_imbalance/data/prepared', cv=False, include_test=True, length=None)
print(len(df), len(df_test))

14141 3535


In [4]:
def prepare_df(df):
    df.rename(columns={'Unnamed: 0': 'datetime'}, inplace=True)
    df.drop(columns=['bid', 'ask'], inplace=True)
    df.index = df['datetime']
    df = df.sort_index()
    df['prev_queue_imbalance'] = [None] + df['queue_imbalance'].iloc[0:len(df)-1].values.tolist()
    df.dropna(inplace=True)
    return df
df = prepare_df(df)
df_test = prepare_df(df_test)
df.head()

Unnamed: 0_level_0,datetime,bid_price,ask_price,mid_price,sum_sell_ask,sum_buy_bid,mid_price_indicator,queue_imbalance,prev_queue_imbalance
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2013-09-16 12:24:00,2013-09-16 12:24:00,77.08,77.1,77.09,53636.0,100.0,1.0,-0.996278,0.261169
2013-09-16 12:25:00,2013-09-16 12:25:00,77.09,77.12,77.105,26303.0,31203.0,1.0,0.085208,-0.996278
2013-09-16 12:26:00,2013-09-16 12:26:00,77.13,77.16,77.145,3400.0,25554.0,1.0,0.765145,0.085208
2013-09-16 12:27:00,2013-09-16 12:27:00,77.16,77.19,77.175,6735.0,30903.0,1.0,0.642117,0.765145
2013-09-16 12:28:00,2013-09-16 12:28:00,77.17,77.19,77.18,4610.0,17290.0,1.0,0.578995,0.642117


## Let's try to fit Logistic Regression

In [5]:
df_result = pd.DataFrame()
features = ['queue_imbalance', 'prev_queue_imbalance']
res = model.validate_model(LogisticRegressionCV(), 
                           df[features], df['mid_price_indicator'])
res['features'] = ', '.join(features)
res['method'] = 'logistic'
df_result = df_result.append(res, ignore_index=True)

features = ['queue_imbalance']
res = model.validate_model(LogisticRegressionCV(), 
                           df[features], df['mid_price_indicator'])
res['features'] = ', '.join(features)
res['method'] = 'logistic'
df_result = df_result.append(res, ignore_index=True)
df_result

Unnamed: 0,f1,features,kappa,matthews,method,precision,recall,roc_auc,train_f1,train_kappa,train_matthews,train_precision,train_recall,train_roc_auc
0,"[0.5894279507603185, 0.5661764705882353, 0.572...","queue_imbalance, prev_queue_imbalance","[0.11527276038978784, 0.08072053370711585, 0.0...","[0.11598674111000024, 0.08109983227920431, 0.0...",logistic,"[0.5606060606060606, 0.5414908579465542, 0.552...","[0.6213740458015267, 0.5932203389830508, 0.593...","[0.5575124197261602, 0.5403208613154248, 0.548...","[0.5652487382840664, 0.5905429071803853, 0.563...","[0.05910645743754639, 0.08680813709261193, 0.0...","[0.05956070185810295, 0.08833523169395377, 0.0...","[0.5347885402455662, 0.5452781371280725, 0.543...","[0.599388379204893, 0.6440030557677616, 0.5858...","[0.5294881674154417, 0.5432545017141742, 0.538..."
1,"[0.5818181818181819, 0.539291217257319, 0.5659...",queue_imbalance,"[0.1029532279114942, 0.06916543109379691, 0.08...","[0.10348668127725809, 0.06916543109379694, 0.0...",logistic,"[0.5555555555555556, 0.539291217257319, 0.5472...","[0.6106870229007634, 0.539291217257319, 0.5858...","[0.5513752574821278, 0.5345827155468985, 0.543...","[0.5728643216080402, 0.5756698044895004, 0.567...","[0.07143368962330265, 0.08570519665880072, 0.0...","[0.07207060421073702, 0.08625114786058516, 0.0...","[0.5399188092016238, 0.547143840330351, 0.5455...","[0.6100917431192661, 0.6073338426279603, 0.590...","[0.5356322423995696, 0.5427628769047811, 0.541..."


In [6]:
gammas = [0.01, 0.1, 1, 10, 100, 1000]
cs = [0.01, 0.1, 1, 10, 100, 1000]
coef0s = [0.1, 1, 10, 100]

for g in gammas:
    for c in cs:
        for coef in coef0s:
            clf = SVC(kernel='sigmoid', gamma=g, C=c, coef0=coef)
            features = ['queue_imbalance', 'prev_queue_imbalance']
            res = model.validate_model(clf, df[features], df['mid_price_indicator'])
            res['features'] = ', '.join(features) 
            res['method'] = 'svm_sigmoid'
            res['gamma'] = g
            res['coef0'] = coef
            res['c'] = c
            df_result = df_result.append(res, ignore_index=True)

            clf = SVC(kernel='sigmoid', gamma=g, C=c, coef0=coef)
            features = ['queue_imbalance']
            res = model.validate_model(clf, df[features], df['mid_price_indicator'])
            res['features'] = ', '.join(features) 
            res['method'] = 'svm_sigmoid'
            res['gamma'] = g
            res['coef0'] = coef
            res['c'] = c
            df_result = df_result.append(res, ignore_index=True)
df_result

Unnamed: 0,f1,features,kappa,matthews,method,precision,recall,roc_auc,train_f1,train_kappa,train_matthews,train_precision,train_recall,train_roc_auc,c,coef0,gamma
0,"[0.5894279507603185, 0.5661764705882353, 0.572...","queue_imbalance, prev_queue_imbalance","[0.11527276038978784, 0.08072053370711585, 0.0...","[0.11598674111000024, 0.08109983227920431, 0.0...",logistic,"[0.5606060606060606, 0.5414908579465542, 0.552...","[0.6213740458015267, 0.5932203389830508, 0.593...","[0.5575124197261602, 0.5403208613154248, 0.548...","[0.5652487382840664, 0.5905429071803853, 0.563...","[0.05910645743754639, 0.08680813709261193, 0.0...","[0.05956070185810295, 0.08833523169395377, 0.0...","[0.5347885402455662, 0.5452781371280725, 0.543...","[0.599388379204893, 0.6440030557677616, 0.5858...","[0.5294881674154417, 0.5432545017141742, 0.538...",,,
1,"[0.5818181818181819, 0.539291217257319, 0.5659...",queue_imbalance,"[0.1029532279114942, 0.06916543109379691, 0.08...","[0.10348668127725809, 0.06916543109379694, 0.0...",logistic,"[0.5555555555555556, 0.539291217257319, 0.5472...","[0.6106870229007634, 0.539291217257319, 0.5858...","[0.5513752574821278, 0.5345827155468985, 0.543...","[0.5728643216080402, 0.5756698044895004, 0.567...","[0.07143368962330265, 0.08570519665880072, 0.0...","[0.07207060421073702, 0.08625114786058516, 0.0...","[0.5399188092016238, 0.547143840330351, 0.5455...","[0.6100917431192661, 0.6073338426279603, 0.590...","[0.5356322423995696, 0.5427628769047811, 0.541...",,,
2,"[0.6752577319587628, 0.671147880041365, 0.6732...","queue_imbalance, prev_queue_imbalance","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.5097276264591439, 0.5050583657587548, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.6745745229499742, 0.6749162155194638, 0.673...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.5089494163424124, 0.5093385214007782, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.01,0.1,0.01
3,"[0.6752577319587628, 0.671147880041365, 0.6732...",queue_imbalance,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.5097276264591439, 0.5050583657587548, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.6745745229499742, 0.6749162155194638, 0.673...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.5089494163424124, 0.5093385214007782, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.01,0.1,0.01
4,"[0.6752577319587628, 0.671147880041365, 0.6732...","queue_imbalance, prev_queue_imbalance","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.5097276264591439, 0.5050583657587548, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.6745745229499742, 0.6749162155194638, 0.673...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.5089494163424124, 0.5093385214007782, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.01,1.0,0.01
5,"[0.6752577319587628, 0.671147880041365, 0.6732...",queue_imbalance,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.5097276264591439, 0.5050583657587548, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.6745745229499742, 0.6749162155194638, 0.673...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.5089494163424124, 0.5093385214007782, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.01,1.0,0.01
6,"[0.6752577319587628, 0.671147880041365, 0.6732...","queue_imbalance, prev_queue_imbalance","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.5097276264591439, 0.5050583657587548, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.6745745229499742, 0.6749162155194638, 0.673...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.5089494163424124, 0.5093385214007782, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.01,10.0,0.01
7,"[0.6752577319587628, 0.671147880041365, 0.6732...",queue_imbalance,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.5097276264591439, 0.5050583657587548, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.6745745229499742, 0.6749162155194638, 0.673...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.5089494163424124, 0.5093385214007782, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.01,10.0,0.01
8,"[0.6752577319587628, 0.671147880041365, 0.6732...","queue_imbalance, prev_queue_imbalance","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.5097276264591439, 0.5050583657587548, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.6745745229499742, 0.6749162155194638, 0.673...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.5089494163424124, 0.5093385214007782, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.01,100.0,0.01
9,"[0.6752577319587628, 0.671147880041365, 0.6732...",queue_imbalance,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.5097276264591439, 0.5050583657587548, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.6745745229499742, 0.6749162155194638, 0.673...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.5089494163424124, 0.5093385214007782, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.01,100.0,0.01


In [7]:
gammas = [0.01, 0.1, 1, 10, 100, 1000]
cs = [0.01, 0.1, 1, 10, 100, 1000]

for g in gammas:
    for c in cs:
        clf = SVC(kernel='rbf', gamma=g, C=c)
        features = ['queue_imbalance', 'prev_queue_imbalance']
        res = model.validate_model(clf, df[features], df['mid_price_indicator'])
        res['features'] = ', '.join(features) 
        res['method'] = 'svm_rbf'
        res['gamma'] = g
        res['c'] = c
        df_result = df_result.append(res, ignore_index=True)

        clf = SVC(kernel='sigmoid', gamma=g, C=c)
        features = ['queue_imbalance']
        res = model.validate_model(clf, df[features], df['mid_price_indicator'])
        res['features'] = ', '.join(features) 
        res['method'] = 'svm_rbf'
        res['gamma'] = g
        res['c'] = c
        df_result = df_result.append(res, ignore_index=True)
df_result

Unnamed: 0,f1,features,kappa,matthews,method,precision,recall,roc_auc,train_f1,train_kappa,train_matthews,train_precision,train_recall,train_roc_auc,c,coef0,gamma
0,"[0.5894279507603185, 0.5661764705882353, 0.572...","queue_imbalance, prev_queue_imbalance","[0.11527276038978784, 0.08072053370711585, 0.0...","[0.11598674111000024, 0.08109983227920431, 0.0...",logistic,"[0.5606060606060606, 0.5414908579465542, 0.552...","[0.6213740458015267, 0.5932203389830508, 0.593...","[0.5575124197261602, 0.5403208613154248, 0.548...","[0.5652487382840664, 0.5905429071803853, 0.563...","[0.05910645743754639, 0.08680813709261193, 0.0...","[0.05956070185810295, 0.08833523169395377, 0.0...","[0.5347885402455662, 0.5452781371280725, 0.543...","[0.599388379204893, 0.6440030557677616, 0.5858...","[0.5294881674154417, 0.5432545017141742, 0.538...",,,
1,"[0.5818181818181819, 0.539291217257319, 0.5659...",queue_imbalance,"[0.1029532279114942, 0.06916543109379691, 0.08...","[0.10348668127725809, 0.06916543109379694, 0.0...",logistic,"[0.5555555555555556, 0.539291217257319, 0.5472...","[0.6106870229007634, 0.539291217257319, 0.5858...","[0.5513752574821278, 0.5345827155468985, 0.543...","[0.5728643216080402, 0.5756698044895004, 0.567...","[0.07143368962330265, 0.08570519665880072, 0.0...","[0.07207060421073702, 0.08625114786058516, 0.0...","[0.5399188092016238, 0.547143840330351, 0.5455...","[0.6100917431192661, 0.6073338426279603, 0.590...","[0.5356322423995696, 0.5427628769047811, 0.541...",,,
2,"[0.6752577319587628, 0.671147880041365, 0.6732...","queue_imbalance, prev_queue_imbalance","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.5097276264591439, 0.5050583657587548, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.6745745229499742, 0.6749162155194638, 0.673...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.5089494163424124, 0.5093385214007782, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.01,0.1,0.01
3,"[0.6752577319587628, 0.671147880041365, 0.6732...",queue_imbalance,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.5097276264591439, 0.5050583657587548, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.6745745229499742, 0.6749162155194638, 0.673...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.5089494163424124, 0.5093385214007782, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.01,0.1,0.01
4,"[0.6752577319587628, 0.671147880041365, 0.6732...","queue_imbalance, prev_queue_imbalance","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.5097276264591439, 0.5050583657587548, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.6745745229499742, 0.6749162155194638, 0.673...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.5089494163424124, 0.5093385214007782, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.01,1.0,0.01
5,"[0.6752577319587628, 0.671147880041365, 0.6732...",queue_imbalance,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.5097276264591439, 0.5050583657587548, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.6745745229499742, 0.6749162155194638, 0.673...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.5089494163424124, 0.5093385214007782, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.01,1.0,0.01
6,"[0.6752577319587628, 0.671147880041365, 0.6732...","queue_imbalance, prev_queue_imbalance","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.5097276264591439, 0.5050583657587548, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.6745745229499742, 0.6749162155194638, 0.673...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.5089494163424124, 0.5093385214007782, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.01,10.0,0.01
7,"[0.6752577319587628, 0.671147880041365, 0.6732...",queue_imbalance,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.5097276264591439, 0.5050583657587548, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.6745745229499742, 0.6749162155194638, 0.673...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.5089494163424124, 0.5093385214007782, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.01,10.0,0.01
8,"[0.6752577319587628, 0.671147880041365, 0.6732...","queue_imbalance, prev_queue_imbalance","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.5097276264591439, 0.5050583657587548, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.6745745229499742, 0.6749162155194638, 0.673...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.5089494163424124, 0.5093385214007782, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.01,100.0,0.01
9,"[0.6752577319587628, 0.671147880041365, 0.6732...",queue_imbalance,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.5097276264591439, 0.5050583657587548, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.6745745229499742, 0.6749162155194638, 0.673...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.5089494163424124, 0.5093385214007782, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.01,100.0,0.01


In [8]:
cs = [0.001, 0.01, 0.1, 1, 10, 100, 1000]

for c in cs:
    clf = SVC(kernel='linear', C=c)
    features = ['queue_imbalance', 'prev_queue_imbalance']
    res = model.validate_model(clf, df[features], df['mid_price_indicator'])
    res['features'] = ', '.join(features) 
    res['method'] = 'svm_linear'
    res['c'] = c
    df_result = df_result.append(res, ignore_index=True)

    clf = SVC(kernel='linear', gamma=g, C=c)
    features = ['queue_imbalance']
    res = model.validate_model(clf, df[features], df['mid_price_indicator'])
    res['features'] = ', '.join(features) 
    res['method'] = 'svm_linear'
    res['c'] = c
    df_result = df_result.append(res, ignore_index=True)
df_result

Unnamed: 0,f1,features,kappa,matthews,method,precision,recall,roc_auc,train_f1,train_kappa,train_matthews,train_precision,train_recall,train_roc_auc,c,coef0,gamma
0,"[0.5894279507603185, 0.5661764705882353, 0.572...","queue_imbalance, prev_queue_imbalance","[0.11527276038978784, 0.08072053370711585, 0.0...","[0.11598674111000024, 0.08109983227920431, 0.0...",logistic,"[0.5606060606060606, 0.5414908579465542, 0.552...","[0.6213740458015267, 0.5932203389830508, 0.593...","[0.5575124197261602, 0.5403208613154248, 0.548...","[0.5652487382840664, 0.5905429071803853, 0.563...","[0.05910645743754639, 0.08680813709261193, 0.0...","[0.05956070185810295, 0.08833523169395377, 0.0...","[0.5347885402455662, 0.5452781371280725, 0.543...","[0.599388379204893, 0.6440030557677616, 0.5858...","[0.5294881674154417, 0.5432545017141742, 0.538...",,,
1,"[0.5818181818181819, 0.539291217257319, 0.5659...",queue_imbalance,"[0.1029532279114942, 0.06916543109379691, 0.08...","[0.10348668127725809, 0.06916543109379694, 0.0...",logistic,"[0.5555555555555556, 0.539291217257319, 0.5472...","[0.6106870229007634, 0.539291217257319, 0.5858...","[0.5513752574821278, 0.5345827155468985, 0.543...","[0.5728643216080402, 0.5756698044895004, 0.567...","[0.07143368962330265, 0.08570519665880072, 0.0...","[0.07207060421073702, 0.08625114786058516, 0.0...","[0.5399188092016238, 0.547143840330351, 0.5455...","[0.6100917431192661, 0.6073338426279603, 0.590...","[0.5356322423995696, 0.5427628769047811, 0.541...",,,
2,"[0.6752577319587628, 0.671147880041365, 0.6732...","queue_imbalance, prev_queue_imbalance","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.5097276264591439, 0.5050583657587548, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.6745745229499742, 0.6749162155194638, 0.673...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.5089494163424124, 0.5093385214007782, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.010,0.1,0.01
3,"[0.6752577319587628, 0.671147880041365, 0.6732...",queue_imbalance,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.5097276264591439, 0.5050583657587548, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.6745745229499742, 0.6749162155194638, 0.673...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.5089494163424124, 0.5093385214007782, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.010,0.1,0.01
4,"[0.6752577319587628, 0.671147880041365, 0.6732...","queue_imbalance, prev_queue_imbalance","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.5097276264591439, 0.5050583657587548, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.6745745229499742, 0.6749162155194638, 0.673...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.5089494163424124, 0.5093385214007782, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.010,1.0,0.01
5,"[0.6752577319587628, 0.671147880041365, 0.6732...",queue_imbalance,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.5097276264591439, 0.5050583657587548, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.6745745229499742, 0.6749162155194638, 0.673...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.5089494163424124, 0.5093385214007782, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.010,1.0,0.01
6,"[0.6752577319587628, 0.671147880041365, 0.6732...","queue_imbalance, prev_queue_imbalance","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.5097276264591439, 0.5050583657587548, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.6745745229499742, 0.6749162155194638, 0.673...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.5089494163424124, 0.5093385214007782, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.010,10.0,0.01
7,"[0.6752577319587628, 0.671147880041365, 0.6732...",queue_imbalance,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.5097276264591439, 0.5050583657587548, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.6745745229499742, 0.6749162155194638, 0.673...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.5089494163424124, 0.5093385214007782, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.010,10.0,0.01
8,"[0.6752577319587628, 0.671147880041365, 0.6732...","queue_imbalance, prev_queue_imbalance","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.5097276264591439, 0.5050583657587548, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.6745745229499742, 0.6749162155194638, 0.673...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.5089494163424124, 0.5093385214007782, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.010,100.0,0.01
9,"[0.6752577319587628, 0.671147880041365, 0.6732...",queue_imbalance,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]",svm_sigmoid,"[0.5097276264591439, 0.5050583657587548, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]","[0.6745745229499742, 0.6749162155194638, 0.673...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0.5089494163424124, 0.5093385214007782, 0.507...","[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]","[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]",0.010,100.0,0.01


In [9]:
df_result.to_csv('res_{}_prev_queue_imbalance.csv'.format(stock))