In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

import numpy as np
import pandas as pd
import joblib

import os
from tqdm import tqdm
from glob import glob
import matplotlib.pyplot as plt


import pathlib
DATA_DIR = pathlib.Path.cwd()/'data/input'
OUT_DIR = pathlib.Path.cwd()/'data/output'

import sys 
sys.path.append(str(pathlib.Path.cwd()/'utils'))
from utils.misc_utils import fullrange, realized_volatility, log_return, rmspe, get_stock_path, load_parquet_file, load_parquet_files, load_train_test
from utils.feature_engineering_utils import full_feature_engineering, groupby_and_aggregate, generate_features_book_data, generate_features_trade_data, full_feature_engineering_by_cutoff

book_aggregation = {
    'wap1': [np.mean, np.std, fullrange], 
    'wap2': [np.mean, np.std, fullrange], 
    'log_return_1': [fullrange, np.sum, np.mean, realized_volatility], 
    'log_return_2': [fullrange, np.sum, np.mean, realized_volatility], 
    'bid_ask_price_spread_1': [np.mean, np.std, fullrange],
    'bid_ask_price_spread_2': [np.mean, np.std, fullrange],
    'bid_ask_size_spread_1': [np.mean, np.std, fullrange],
    'bid_ask_size_spread_2': [np.mean, np.std, fullrange]
    }

trade_aggregation = {
    'volume': [np.mean, np.sum, np.std], 
    'price': [np.mean, np.std], 
    'order_count': [np.mean, np.sum, np.std]
    }

time_agg_trade = {
    'volume_mean': [np.mean, np.sum, np.std], 
    'price_mean': [np.mean, np.std], 
    'order_count_mean': [np.mean, np.sum, np.std]
    }

time_agg_book = {
    'wap1_std': [np.mean], 
    'wap2_std': [np.mean], 
    'log_return_1_realized_volatility': [np.mean], 
    'log_return_2_realized_volatility': [np.mean], 
    'log_return_1_sum': [np.mean], 
    'log_return_2_sum': [np.mean] 
    }

stock_agg_trade = {
    'volume_mean': [np.mean,np.std], 
    'price_mean': [np.mean, np.std], 
    'order_count_mean': [np.mean, np.std]
    }

stock_agg_book = {
    'wap1_std': [np.std], 
    'wap2_std': [np.std], 
    'log_return_1_realized_volatility': [np.std], 
    'log_return_2_realized_volatility': [np.std], 
    'log_return_1_sum': [np.std], 
    'log_return_2_sum': [np.std] 
    }

pd.set_option('display.max_columns', None)

In [3]:
training_target = pd.read_csv(DATA_DIR/'train.csv')
book = load_parquet_files(stock_ids=[0, 126], file_type='book')
trade = load_parquet_files(stock_ids=[0, 126], file_type='trade')

100%|██████████| 2/2 [00:02<00:00,  1.18s/it]
100%|██████████| 2/2 [00:00<00:00,  6.37it/s]


In [4]:
book = generate_features_book_data(book)
book.head()

Unnamed: 0,time_id,seconds_in_bucket,bid_price1,ask_price1,bid_price2,ask_price2,bid_size1,ask_size1,bid_size2,ask_size2,stock_id,id,wap1,wap2,log_return_1,log_return_2,bid_ask_price_spread_1,bid_ask_price_spread_2,bid_ask_size_spread_1,bid_ask_size_spread_2
0,5,0,1.001422,1.002301,1.00137,1.002353,3,226,2,100,0,0-5,1.001434,1.00139,0.0,0.0,0.000879,0.000983,223,98
1,5,1,1.001422,1.002301,1.00137,1.002353,3,100,2,100,0,0-5,1.001448,1.00139,1.4e-05,0.0,0.000879,0.000983,97,98
2,5,5,1.001422,1.002301,1.00137,1.002405,3,100,2,100,0,0-5,1.001448,1.001391,0.0,1e-06,0.000879,0.001034,97,98
3,5,6,1.001422,1.002301,1.00137,1.002405,3,126,2,100,0,0-5,1.001443,1.001391,-5e-06,0.0,0.000879,0.001034,123,98
4,5,7,1.001422,1.002301,1.00137,1.002405,3,126,2,100,0,0-5,1.001443,1.001391,0.0,0.0,0.000879,0.001034,123,98


In [5]:
agg_book_data = groupby_and_aggregate(book, agg_col = 'id', agg_dict=book_aggregation)
agg_book_data.head()

Unnamed: 0_level_0,wap1_mean,wap1_std,wap1_fullrange,wap2_mean,wap2_std,wap2_fullrange,log_return_1_fullrange,log_return_1_sum,log_return_1_mean,log_return_1_realized_volatility,log_return_2_fullrange,log_return_2_sum,log_return_2_mean,log_return_2_realized_volatility,bid_ask_price_spread_1_mean,bid_ask_price_spread_1_std,bid_ask_price_spread_1_fullrange,bid_ask_price_spread_2_mean,bid_ask_price_spread_2_std,bid_ask_price_spread_2_fullrange,bid_ask_size_spread_1_mean,bid_ask_size_spread_1_std,bid_ask_size_spread_1_fullrange,bid_ask_size_spread_2_mean,bid_ask_size_spread_2_std,bid_ask_size_spread_2_fullrange
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
0-1000,0.9988,0.000531,0.001829,0.998738,0.000538,0.002868,0.001286,-0.000944,-6e-06,0.001781,0.002234,-0.001198,-7e-06,0.004389,0.000443,0.000173,0.000878,0.000765,0.000249,0.001478,11.695122,171.482507,1116,28.609756,83.79463,599
0-10000,0.999884,0.000395,0.001409,0.999912,0.000488,0.00208,0.001733,0.000546,2e-06,0.00289,0.001641,0.000686,2e-06,0.004111,0.00046,0.000232,0.001022,0.000739,0.000239,0.001314,10.143357,152.601616,1045,-17.912587,130.656572,747
0-10005,1.001301,0.001809,0.00713,1.00116,0.001676,0.007296,0.004685,0.002516,1.4e-05,0.008674,0.006666,0.000928,5e-06,0.013725,0.002107,0.000602,0.003449,0.002743,0.000602,0.003025,-11.918478,103.390706,532,28.766304,107.369872,694
0-10017,0.996141,0.004763,0.0173,0.99603,0.004678,0.01804,0.016086,-0.002971,-1.3e-05,0.017629,0.01851,-0.003512,-1.5e-05,0.021224,0.003538,0.001323,0.007071,0.004959,0.002062,0.010697,60.934211,158.634259,860,-0.675439,151.850436,700
0-10030,0.999464,0.000433,0.001729,0.999477,0.000498,0.002206,0.001503,0.00058,3e-06,0.002551,0.004305,0.003158,1.6e-05,0.005463,0.000623,0.000246,0.001085,0.00104,0.000305,0.001463,33.57732,83.867072,364,6.876289,85.745528,416


In [6]:
trade = generate_features_trade_data(trade)
trade.head()

Unnamed: 0,time_id,seconds_in_bucket,price,size,order_count,stock_id,id,volume
0,5,21,1.002301,326,12,0,0-5,326.750244
1,5,46,1.002778,128,4,0,0-5,128.355591
2,5,50,1.002818,55,1,0,0-5,55.155014
3,5,57,1.003155,121,5,0,0-5,121.381798
4,5,68,1.003646,4,1,0,0-5,4.014584


In [7]:
agg_trade_data =  groupby_and_aggregate(trade, agg_col = 'id', agg_dict=trade_aggregation)
agg_trade_data.head()

Unnamed: 0_level_0,volume_mean,volume_sum,volume_std,price_mean,price_std,order_count_mean,order_count_sum,order_count_std
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0-1000,29.961796,898.853882,45.738499,0.998908,0.000554,2.133333,64,2.029665
0-10000,80.825371,1939.80896,132.549789,0.999931,0.000373,2.916667,70,2.500725
0-10005,81.59082,2366.133789,119.623756,1.001232,0.001756,3.137931,91,3.502286
0-10017,89.54705,3760.976074,133.678314,0.996374,0.004358,3.166667,133,3.875606
0-10030,98.03035,2940.910645,103.179214,0.999314,0.000493,2.333333,70,1.667816


In [8]:
output_df = agg_book_data.merge(agg_trade_data, left_index=True, right_index=True).reset_index()
output_df['stock_id'] = output_df['id'].apply(lambda x: int(x.split('-')[0]))
output_df['time_id'] = output_df['id'].apply(lambda x: int(x.split('-')[1]))
output_df.head()

Unnamed: 0,id,wap1_mean,wap1_std,wap1_fullrange,wap2_mean,wap2_std,wap2_fullrange,log_return_1_fullrange,log_return_1_sum,log_return_1_mean,log_return_1_realized_volatility,log_return_2_fullrange,log_return_2_sum,log_return_2_mean,log_return_2_realized_volatility,bid_ask_price_spread_1_mean,bid_ask_price_spread_1_std,bid_ask_price_spread_1_fullrange,bid_ask_price_spread_2_mean,bid_ask_price_spread_2_std,bid_ask_price_spread_2_fullrange,bid_ask_size_spread_1_mean,bid_ask_size_spread_1_std,bid_ask_size_spread_1_fullrange,bid_ask_size_spread_2_mean,bid_ask_size_spread_2_std,bid_ask_size_spread_2_fullrange,volume_mean,volume_sum,volume_std,price_mean,price_std,order_count_mean,order_count_sum,order_count_std,stock_id,time_id
0,0-1000,0.9988,0.000531,0.001829,0.998738,0.000538,0.002868,0.001286,-0.000944,-6e-06,0.001781,0.002234,-0.001198,-7e-06,0.004389,0.000443,0.000173,0.000878,0.000765,0.000249,0.001478,11.695122,171.482507,1116,28.609756,83.79463,599,29.961796,898.853882,45.738499,0.998908,0.000554,2.133333,64,2.029665,0,1000
1,0-10000,0.999884,0.000395,0.001409,0.999912,0.000488,0.00208,0.001733,0.000546,2e-06,0.00289,0.001641,0.000686,2e-06,0.004111,0.00046,0.000232,0.001022,0.000739,0.000239,0.001314,10.143357,152.601616,1045,-17.912587,130.656572,747,80.825371,1939.80896,132.549789,0.999931,0.000373,2.916667,70,2.500725,0,10000
2,0-10005,1.001301,0.001809,0.00713,1.00116,0.001676,0.007296,0.004685,0.002516,1.4e-05,0.008674,0.006666,0.000928,5e-06,0.013725,0.002107,0.000602,0.003449,0.002743,0.000602,0.003025,-11.918478,103.390706,532,28.766304,107.369872,694,81.59082,2366.133789,119.623756,1.001232,0.001756,3.137931,91,3.502286,0,10005
3,0-10017,0.996141,0.004763,0.0173,0.99603,0.004678,0.01804,0.016086,-0.002971,-1.3e-05,0.017629,0.01851,-0.003512,-1.5e-05,0.021224,0.003538,0.001323,0.007071,0.004959,0.002062,0.010697,60.934211,158.634259,860,-0.675439,151.850436,700,89.54705,3760.976074,133.678314,0.996374,0.004358,3.166667,133,3.875606,0,10017
4,0-10030,0.999464,0.000433,0.001729,0.999477,0.000498,0.002206,0.001503,0.00058,3e-06,0.002551,0.004305,0.003158,1.6e-05,0.005463,0.000623,0.000246,0.001085,0.00104,0.000305,0.001463,33.57732,83.867072,364,6.876289,85.745528,416,98.03035,2940.910645,103.179214,0.999314,0.000493,2.333333,70,1.667816,0,10030


## Time Period Aggregation

In [9]:
time_agg_trade_data = groupby_and_aggregate(output_df, agg_col = 'time_id', agg_dict=time_agg_trade, suffix='_period')
time_agg_trade_data.head()

Unnamed: 0_level_0,volume_mean_mean_period,volume_mean_sum_period,volume_mean_std_period,price_mean_mean_period,price_mean_std_period,order_count_mean_mean_period,order_count_mean_sum_period,order_count_mean_std_period
time_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
5,63.489216,126.978432,23.001141,1.002913,0.001144,2.669118,5.338235,0.114385
11,75.123787,150.247574,45.460377,1.000473,0.000377,2.709259,5.418519,1.144465
16,109.012894,218.025787,32.058147,0.999548,0.000486,2.744615,5.489231,0.034811
31,81.574921,163.149841,69.389015,0.998861,0.000225,3.05,6.1,1.249222
62,67.259079,134.518158,19.963213,0.999277,0.000483,3.320025,6.640049,1.025913


In [10]:
time_agg_book_data = groupby_and_aggregate(output_df, agg_col = 'time_id', agg_dict=time_agg_book, suffix='_period')
time_agg_book_data.head()

Unnamed: 0_level_0,wap1_std_mean_period,wap2_std_mean_period,log_return_1_realized_volatility_mean_period,log_return_2_realized_volatility_mean_period,log_return_1_sum_mean_period,log_return_2_sum_mean_period
time_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
5,0.000876,0.00099,0.006064,0.009029,0.002922,0.00275
11,0.000503,0.000533,0.003823,0.00454,-0.00206,-0.001627
16,0.000802,0.00084,0.002699,0.00501,-0.002588,-0.002912
31,0.000809,0.000708,0.004144,0.004617,-0.000918,-0.000466
62,0.000413,0.000478,0.003319,0.004168,0.001852,0.001475


In [11]:
time_agg = time_agg_book_data.merge(time_agg_trade_data, left_index=True, right_index=True)
output_df_with_time = output_df.merge(time_agg, on='time_id')
output_df_with_time = output_df_with_time.merge(training_target, on=['stock_id', 'time_id']).head()

In [12]:
train, test = load_train_test()
train_id = train.stock_id.unique()
test_id = test.stock_id.unique()

In [13]:
#final_training_data = full_feature_engineering(stock_ids = [], train=True, training_target= training_target)
#final_test_data = full_feature_engineering(stock_ids = [], train=False)

cutoffs =  [(0,150), (150, 300), (300, 450), (450, 600)]
cutoffs =  [(0,100), (100, 200), (200, 300), (300, 400), (400, 500), (500, 600)]

final_training_data = full_feature_engineering_by_cutoff(
    cutoffs = cutoffs,
    stock_ids = train_id,
    training=True)

final_test_data = full_feature_engineering_by_cutoff(
    cutoffs = cutoffs,
    stock_ids = test_id,
    training=False)

#final_training_data = train.merge(final_training_data, on = ['id', 'stock_id', 'time_id'], how='left')
#final_test_data = test.merge(final_test_data, on = ['id', 'stock_id', 'time_id'], how='left')

final_training_data.to_pickle(OUT_DIR/'final_training_data_finer_buckets.pkl')
final_test_data.to_pickle(OUT_DIR/'final_test_data_finer_buckets.pkl')


100%|██████████| 112/112 [09:40<00:00,  5.19s/it]
100%|██████████| 112/112 [09:36<00:00,  5.15s/it]
100%|██████████| 112/112 [09:36<00:00,  5.15s/it]
100%|██████████| 112/112 [09:39<00:00,  5.17s/it]
100%|██████████| 112/112 [09:41<00:00,  5.20s/it]
100%|██████████| 112/112 [09:44<00:00,  5.22s/it]
100%|██████████| 1/1 [00:00<00:00, 17.86it/s]
100%|██████████| 1/1 [00:00<00:00, 20.84it/s]
100%|██████████| 1/1 [00:00<00:00, 21.28it/s]
100%|██████████| 1/1 [00:00<00:00, 20.83it/s]
100%|██████████| 1/1 [00:00<00:00, 21.74it/s]
100%|██████████| 1/1 [00:00<00:00, 21.74it/s]


In [15]:
final_training_data.head()

Unnamed: 0,id,time_id,stock_id,target,wap1_mean_0_100,wap1_std_0_100,wap1_fullrange_0_100,wap2_mean_0_100,wap2_std_0_100,wap2_fullrange_0_100,log_return_1_fullrange_0_100,log_return_1_sum_0_100,log_return_1_mean_0_100,log_return_1_realized_volatility_0_100,log_return_2_fullrange_0_100,log_return_2_sum_0_100,log_return_2_mean_0_100,log_return_2_realized_volatility_0_100,bid_ask_price_spread_1_mean_0_100,bid_ask_price_spread_1_std_0_100,bid_ask_price_spread_1_fullrange_0_100,bid_ask_price_spread_2_mean_0_100,bid_ask_price_spread_2_std_0_100,bid_ask_price_spread_2_fullrange_0_100,bid_ask_size_spread_1_mean_0_100,bid_ask_size_spread_1_std_0_100,bid_ask_size_spread_1_fullrange_0_100,bid_ask_size_spread_2_mean_0_100,bid_ask_size_spread_2_std_0_100,bid_ask_size_spread_2_fullrange_0_100,volume_mean_0_100,volume_sum_0_100,volume_std_0_100,price_mean_0_100,price_std_0_100,order_count_mean_0_100,order_count_sum_0_100,order_count_std_0_100,wap1_std_mean_period_0_100,wap2_std_mean_period_0_100,log_return_1_realized_volatility_mean_period_0_100,log_return_2_realized_volatility_mean_period_0_100,log_return_1_sum_mean_period_0_100,log_return_2_sum_mean_period_0_100,volume_mean_mean_period_0_100,volume_mean_sum_period_0_100,volume_mean_std_period_0_100,price_mean_mean_period_0_100,price_mean_std_period_0_100,order_count_mean_mean_period_0_100,order_count_mean_sum_period_0_100,order_count_mean_std_period_0_100,wap1_std_std_stock_0_100,wap2_std_std_stock_0_100,log_return_1_realized_volatility_std_stock_0_100,log_return_2_realized_volatility_std_stock_0_100,log_return_1_sum_std_stock_0_100,log_return_2_sum_std_stock_0_100,volume_mean_mean_stock_0_100,volume_mean_std_stock_0_100,price_mean_mean_stock_0_100,price_mean_std_stock_0_100,order_count_mean_mean_stock_0_100,order_count_mean_std_stock_0_100,wap1_mean_100_200,wap1_std_100_200,wap1_fullrange_100_200,wap2_mean_100_200,wap2_std_100_200,wap2_fullrange_100_200,log_return_1_fullrange_100_200,log_return_1_sum_100_200,log_return_1_mean_100_200,log_return_1_realized_volatility_100_200,log_return_2_fullrange_100_200,log_return_2_sum_100_200,log_return_2_mean_100_200,log_return_2_realized_volatility_100_200,bid_ask_price_spread_1_mean_100_200,bid_ask_price_spread_1_std_100_200,bid_ask_price_spread_1_fullrange_100_200,bid_ask_price_spread_2_mean_100_200,bid_ask_price_spread_2_std_100_200,bid_ask_price_spread_2_fullrange_100_200,bid_ask_size_spread_1_mean_100_200,bid_ask_size_spread_1_std_100_200,bid_ask_size_spread_1_fullrange_100_200,bid_ask_size_spread_2_mean_100_200,bid_ask_size_spread_2_std_100_200,bid_ask_size_spread_2_fullrange_100_200,volume_mean_100_200,volume_sum_100_200,volume_std_100_200,price_mean_100_200,price_std_100_200,order_count_mean_100_200,order_count_sum_100_200,order_count_std_100_200,wap1_std_mean_period_100_200,wap2_std_mean_period_100_200,log_return_1_realized_volatility_mean_period_100_200,log_return_2_realized_volatility_mean_period_100_200,log_return_1_sum_mean_period_100_200,log_return_2_sum_mean_period_100_200,volume_mean_mean_period_100_200,volume_mean_sum_period_100_200,volume_mean_std_period_100_200,price_mean_mean_period_100_200,price_mean_std_period_100_200,order_count_mean_mean_period_100_200,order_count_mean_sum_period_100_200,order_count_mean_std_period_100_200,wap1_std_std_stock_100_200,wap2_std_std_stock_100_200,log_return_1_realized_volatility_std_stock_100_200,log_return_2_realized_volatility_std_stock_100_200,log_return_1_sum_std_stock_100_200,log_return_2_sum_std_stock_100_200,volume_mean_mean_stock_100_200,volume_mean_std_stock_100_200,price_mean_mean_stock_100_200,price_mean_std_stock_100_200,order_count_mean_mean_stock_100_200,order_count_mean_std_stock_100_200,wap1_mean_200_300,wap1_std_200_300,wap1_fullrange_200_300,wap2_mean_200_300,wap2_std_200_300,wap2_fullrange_200_300,log_return_1_fullrange_200_300,log_return_1_sum_200_300,log_return_1_mean_200_300,log_return_1_realized_volatility_200_300,log_return_2_fullrange_200_300,log_return_2_sum_200_300,log_return_2_mean_200_300,log_return_2_realized_volatility_200_300,bid_ask_price_spread_1_mean_200_300,bid_ask_price_spread_1_std_200_300,bid_ask_price_spread_1_fullrange_200_300,bid_ask_price_spread_2_mean_200_300,bid_ask_price_spread_2_std_200_300,bid_ask_price_spread_2_fullrange_200_300,bid_ask_size_spread_1_mean_200_300,bid_ask_size_spread_1_std_200_300,bid_ask_size_spread_1_fullrange_200_300,bid_ask_size_spread_2_mean_200_300,bid_ask_size_spread_2_std_200_300,bid_ask_size_spread_2_fullrange_200_300,volume_mean_200_300,volume_sum_200_300,volume_std_200_300,price_mean_200_300,price_std_200_300,order_count_mean_200_300,order_count_sum_200_300,order_count_std_200_300,wap1_std_mean_period_200_300,wap2_std_mean_period_200_300,log_return_1_realized_volatility_mean_period_200_300,log_return_2_realized_volatility_mean_period_200_300,log_return_1_sum_mean_period_200_300,log_return_2_sum_mean_period_200_300,volume_mean_mean_period_200_300,volume_mean_sum_period_200_300,volume_mean_std_period_200_300,price_mean_mean_period_200_300,price_mean_std_period_200_300,order_count_mean_mean_period_200_300,order_count_mean_sum_period_200_300,order_count_mean_std_period_200_300,wap1_std_std_stock_200_300,wap2_std_std_stock_200_300,log_return_1_realized_volatility_std_stock_200_300,log_return_2_realized_volatility_std_stock_200_300,log_return_1_sum_std_stock_200_300,log_return_2_sum_std_stock_200_300,volume_mean_mean_stock_200_300,volume_mean_std_stock_200_300,price_mean_mean_stock_200_300,price_mean_std_stock_200_300,order_count_mean_mean_stock_200_300,order_count_mean_std_stock_200_300,wap1_mean_300_400,wap1_std_300_400,wap1_fullrange_300_400,wap2_mean_300_400,wap2_std_300_400,wap2_fullrange_300_400,log_return_1_fullrange_300_400,log_return_1_sum_300_400,log_return_1_mean_300_400,log_return_1_realized_volatility_300_400,log_return_2_fullrange_300_400,log_return_2_sum_300_400,log_return_2_mean_300_400,log_return_2_realized_volatility_300_400,bid_ask_price_spread_1_mean_300_400,bid_ask_price_spread_1_std_300_400,bid_ask_price_spread_1_fullrange_300_400,bid_ask_price_spread_2_mean_300_400,bid_ask_price_spread_2_std_300_400,bid_ask_price_spread_2_fullrange_300_400,bid_ask_size_spread_1_mean_300_400,bid_ask_size_spread_1_std_300_400,bid_ask_size_spread_1_fullrange_300_400,bid_ask_size_spread_2_mean_300_400,bid_ask_size_spread_2_std_300_400,bid_ask_size_spread_2_fullrange_300_400,volume_mean_300_400,volume_sum_300_400,volume_std_300_400,price_mean_300_400,price_std_300_400,order_count_mean_300_400,order_count_sum_300_400,order_count_std_300_400,wap1_std_mean_period_300_400,wap2_std_mean_period_300_400,log_return_1_realized_volatility_mean_period_300_400,log_return_2_realized_volatility_mean_period_300_400,log_return_1_sum_mean_period_300_400,log_return_2_sum_mean_period_300_400,volume_mean_mean_period_300_400,volume_mean_sum_period_300_400,volume_mean_std_period_300_400,price_mean_mean_period_300_400,price_mean_std_period_300_400,order_count_mean_mean_period_300_400,order_count_mean_sum_period_300_400,order_count_mean_std_period_300_400,wap1_std_std_stock_300_400,wap2_std_std_stock_300_400,log_return_1_realized_volatility_std_stock_300_400,log_return_2_realized_volatility_std_stock_300_400,log_return_1_sum_std_stock_300_400,log_return_2_sum_std_stock_300_400,volume_mean_mean_stock_300_400,volume_mean_std_stock_300_400,price_mean_mean_stock_300_400,price_mean_std_stock_300_400,order_count_mean_mean_stock_300_400,order_count_mean_std_stock_300_400,wap1_mean_400_500,wap1_std_400_500,wap1_fullrange_400_500,wap2_mean_400_500,wap2_std_400_500,wap2_fullrange_400_500,log_return_1_fullrange_400_500,log_return_1_sum_400_500,log_return_1_mean_400_500,log_return_1_realized_volatility_400_500,log_return_2_fullrange_400_500,log_return_2_sum_400_500,log_return_2_mean_400_500,log_return_2_realized_volatility_400_500,bid_ask_price_spread_1_mean_400_500,bid_ask_price_spread_1_std_400_500,bid_ask_price_spread_1_fullrange_400_500,bid_ask_price_spread_2_mean_400_500,bid_ask_price_spread_2_std_400_500,bid_ask_price_spread_2_fullrange_400_500,bid_ask_size_spread_1_mean_400_500,bid_ask_size_spread_1_std_400_500,bid_ask_size_spread_1_fullrange_400_500,bid_ask_size_spread_2_mean_400_500,bid_ask_size_spread_2_std_400_500,bid_ask_size_spread_2_fullrange_400_500,volume_mean_400_500,volume_sum_400_500,volume_std_400_500,price_mean_400_500,price_std_400_500,order_count_mean_400_500,order_count_sum_400_500,order_count_std_400_500,wap1_std_mean_period_400_500,wap2_std_mean_period_400_500,log_return_1_realized_volatility_mean_period_400_500,log_return_2_realized_volatility_mean_period_400_500,log_return_1_sum_mean_period_400_500,log_return_2_sum_mean_period_400_500,volume_mean_mean_period_400_500,volume_mean_sum_period_400_500,volume_mean_std_period_400_500,price_mean_mean_period_400_500,price_mean_std_period_400_500,order_count_mean_mean_period_400_500,order_count_mean_sum_period_400_500,order_count_mean_std_period_400_500,wap1_std_std_stock_400_500,wap2_std_std_stock_400_500,log_return_1_realized_volatility_std_stock_400_500,log_return_2_realized_volatility_std_stock_400_500,log_return_1_sum_std_stock_400_500,log_return_2_sum_std_stock_400_500,volume_mean_mean_stock_400_500,volume_mean_std_stock_400_500,price_mean_mean_stock_400_500,price_mean_std_stock_400_500,order_count_mean_mean_stock_400_500,order_count_mean_std_stock_400_500,wap1_mean_500_600,wap1_std_500_600,wap1_fullrange_500_600,wap2_mean_500_600,wap2_std_500_600,wap2_fullrange_500_600,log_return_1_fullrange_500_600,log_return_1_sum_500_600,log_return_1_mean_500_600,log_return_1_realized_volatility_500_600,log_return_2_fullrange_500_600,log_return_2_sum_500_600,log_return_2_mean_500_600,log_return_2_realized_volatility_500_600,bid_ask_price_spread_1_mean_500_600,bid_ask_price_spread_1_std_500_600,bid_ask_price_spread_1_fullrange_500_600,bid_ask_price_spread_2_mean_500_600,bid_ask_price_spread_2_std_500_600,bid_ask_price_spread_2_fullrange_500_600,bid_ask_size_spread_1_mean_500_600,bid_ask_size_spread_1_std_500_600,bid_ask_size_spread_1_fullrange_500_600,bid_ask_size_spread_2_mean_500_600,bid_ask_size_spread_2_std_500_600,bid_ask_size_spread_2_fullrange_500_600,volume_mean_500_600,volume_sum_500_600,volume_std_500_600,price_mean_500_600,price_std_500_600,order_count_mean_500_600,order_count_sum_500_600,order_count_std_500_600,wap1_std_mean_period_500_600,wap2_std_mean_period_500_600,log_return_1_realized_volatility_mean_period_500_600,log_return_2_realized_volatility_mean_period_500_600,log_return_1_sum_mean_period_500_600,log_return_2_sum_mean_period_500_600,volume_mean_mean_period_500_600,volume_mean_sum_period_500_600,volume_mean_std_period_500_600,price_mean_mean_period_500_600,price_mean_std_period_500_600,order_count_mean_mean_period_500_600,order_count_mean_sum_period_500_600,order_count_mean_std_period_500_600,wap1_std_std_stock_500_600,wap2_std_std_stock_500_600,log_return_1_realized_volatility_std_stock_500_600,log_return_2_realized_volatility_std_stock_500_600,log_return_1_sum_std_stock_500_600,log_return_2_sum_std_stock_500_600,volume_mean_mean_stock_500_600,volume_mean_std_stock_500_600,price_mean_mean_stock_500_600,price_mean_std_stock_500_600,order_count_mean_mean_stock_500_600,order_count_mean_std_stock_500_600
0,0-5,5,0,0.004136,1.002747,0.001098,0.002706,1.002567,0.0011,0.002806,0.001571,0.002687,6.718433e-05,0.001978,0.002639,0.002412,6e-05,0.002421,0.000877,0.000195,0.000983,0.001147,0.000198,0.000672,-16.625,126.441775,536,50.025,95.018618,423,128.360229,770.161377,109.786964,1.003077,0.000558,4.666667,28.0,4.033196,0.00057,0.00062,0.001911,0.002547,0.000877,0.000929,334.291229,37440.617188,1191.232788,0.992713,0.094657,3.989378,446.810358,2.228897,0.000482,0.000621,0.004111,0.004679,0.005242,0.005307,106.060745,96.844856,0.976031,0.153171,3.309269,2.135482,1.003984,0.000396,0.001471,1.004098,0.0005,0.001667,0.001275,-0.000127,-2e-06,0.002181,0.002416,-0.000346,-5e-06,0.003073,0.000804,0.000111,0.000465,0.00107,0.000202,0.000931,-2.115942,111.257324,654.0,-54.623188,124.024678,511.0,73.157021,512.099121,95.958725,1.00417,0.00026,1.857143,13.0,1.214986,0.000502,0.000565,0.001916,0.002529,0.000448,0.000454,258.615356,28964.917969,700.929749,1.002169,0.00161,3.960038,443.52421,2.311941,0.000454,0.000545,0.001721,0.002503,0.00146,0.001638,104.995697,99.563965,0.979401,0.142188,3.261583,2.002002,1.004047,0.000327,0.001244,1.003895,0.000394,0.001495,0.000959,-0.000426,-8e-06,0.001689,0.002236,-1.4e-05,-2.617965e-07,0.003165,0.000981,0.000205,0.000672,0.001275,0.000177,0.000672,-12.018519,134.686427,577.0,43.166667,107.575467,353.0,52.542999,315.257996,47.269367,1.003998,0.000283,2.5,15.0,1.643168,0.000483,0.000522,0.001945,0.002528,0.000366,0.00035,231.03125,25875.5,312.769104,0.993762,0.094762,3.705772,415.046415,1.479202,0.000452,0.000511,0.001641,0.002337,0.001411,0.001596,100.441055,88.929283,0.977584,0.148221,3.219615,2.044801,1.004042,0.000311,0.001228,1.003886,0.000375,0.001274,0.001511,-5e-05,-1.208158e-06,0.001853,0.001497,0.001024,2.5e-05,0.00161,0.000918,0.000321,0.000983,0.001255,0.000177,0.000672,47.731707,118.079004,398.0,43.121951,99.930024,393.0,108.848648,544.243225,186.449783,1.004329,0.000226,3.0,15.0,3.391165,0.000414,0.000478,0.001716,0.002252,-0.000254,-0.000174,250.838974,28093.964844,604.42395,1.002663,0.001755,3.739298,418.801338,1.509354,0.000426,0.000484,0.001464,0.00219,0.001406,0.001558,99.468056,95.945068,0.972571,0.16335,3.157341,1.996562,1.003908,0.00022,0.001007,1.0039,0.000488,0.001956,0.001531,0.000469,1e-05,0.001778,0.002842,-0.000478,-1e-05,0.003457,0.000789,0.000184,0.000776,0.001165,0.000206,0.000931,-50.520833,71.290532,302.0,-30.666667,111.743501,498.0,51.51469,309.088135,83.239075,1.003754,0.000173,2.166667,13.0,2.401388,0.000434,0.000477,0.001725,0.002294,-0.000119,-0.000218,214.066956,23975.5,324.479584,1.002503,0.001853,3.967582,444.369132,1.612947,0.000406,0.000476,0.00154,0.002123,0.001306,0.001439,98.857071,88.278702,0.975729,0.153994,3.18682,1.988772,1.003369,0.000546,0.001947,1.003265,0.000602,0.002069,0.001104,-0.000263,-5.253526e-06,0.001459,0.002206,-0.000272,-5e-06,0.003018,0.000784,0.00018,0.000672,0.001217,0.000244,0.000776,16.3,96.183975,435.0,36.52,93.827292,398.0,73.928932,739.289307,156.900833,1.003308,0.000516,2.6,26.0,1.776388,0.0005,0.000543,0.001728,0.002252,-0.000499,-0.000469,217.899887,24404.787109,262.424286,1.002144,0.001949,3.97751,445.481109,1.586054,0.000364,0.000413,0.001376,0.001908,0.001251,0.001361,99.670937,90.517441,0.974452,0.157978,3.162263,1.998994
1,0-11,11,0,0.001445,0.999884,0.000131,0.000315,0.999922,0.000136,0.000437,0.00398,-0.003979,-0.0003061067,0.003799,0.004146,-0.003819,-0.000294,0.003937,0.000633,0.000159,0.000552,0.000934,4.8e-05,0.000201,-54.692308,80.211579,299,-1.846154,30.797311,139,23.000399,115.001991,48.63773,0.999885,0.000114,1.4,7.0,0.547723,0.000273,0.000333,0.00245,0.002661,-0.001424,-0.001427,225.905731,25301.441406,203.319778,0.982456,0.133072,4.080183,456.980493,2.198129,0.000482,0.000621,0.004111,0.004679,0.005242,0.005307,106.060745,96.844856,0.976031,0.153171,3.309269,2.135482,0.999965,0.000162,0.000501,1.000041,0.000189,0.000748,0.000505,0.000229,7e-06,0.000521,0.001093,-0.00024,-7e-06,0.001218,0.000506,0.000181,0.000652,0.000836,0.000191,0.000602,-4.257143,127.6895,459.0,-27.257143,98.451643,379.0,16.665712,49.997139,14.29415,0.99993,5.8e-05,1.666667,5.0,1.154701,0.000258,0.000306,0.000945,0.001344,2.3e-05,3.4e-05,295.51947,33098.179688,582.604858,0.991544,0.094541,4.108801,460.185736,2.723966,0.000454,0.000545,0.001721,0.002503,0.00146,0.001638,104.995697,99.563965,0.979401,0.142188,3.261583,2.002002,1.000131,4.9e-05,0.00019,1.000027,0.000185,0.000468,0.000233,0.000235,6e-06,0.000258,0.000729,0.000537,1.452692e-05,0.000631,0.000332,9.3e-05,0.000201,0.000518,0.000103,0.000301,-164.378378,161.450603,339.0,50.594595,43.95102,133.0,37.338062,224.028381,84.607689,1.000117,0.000249,1.5,9.0,0.83666,0.000263,0.000312,0.000877,0.001325,6e-05,4e-06,251.16713,28130.71875,417.529419,0.991581,0.094549,4.015329,449.716808,2.358766,0.000452,0.000511,0.001641,0.002337,0.001411,0.001596,100.441055,88.929283,0.977584,0.148221,3.219615,2.044801,1.000253,5.8e-05,0.000188,1.000338,0.000182,0.000641,0.000339,3.5e-05,8.25763e-07,0.0003,0.000561,0.000176,4e-06,0.00063,0.000329,0.000102,0.000351,0.00056,9.8e-05,0.000402,-50.52381,183.822188,663.0,-108.52381,138.56305,424.0,14.203468,71.017342,10.087463,1.000222,9.2e-05,2.6,13.0,1.341641,0.000247,0.000293,0.000777,0.001177,6.4e-05,0.000137,235.074661,26328.361328,298.945557,0.982696,0.133109,3.762298,421.377427,2.31357,0.000426,0.000484,0.001464,0.00219,0.001406,0.001558,99.468056,95.945068,0.972571,0.16335,3.157341,1.996562,1.000346,8e-05,0.000316,1.000171,0.000176,0.000768,0.000365,7.5e-05,2e-06,0.000371,0.001046,-0.000338,-9e-06,0.001256,0.000396,7.3e-05,0.000301,0.000587,0.000111,0.000452,-106.305556,154.486212,558.0,78.444444,111.191841,542.0,70.755203,283.020813,139.509903,1.000212,9.7e-05,2.25,9.0,2.5,0.000247,0.000295,0.000766,0.00114,2.7e-05,-7.3e-05,221.393356,24796.054688,321.4487,0.991671,0.094559,3.748235,419.802268,2.029273,0.000406,0.000476,0.00154,0.002123,0.001306,0.001439,98.857071,88.278702,0.975729,0.153994,3.18682,1.988772,1.00061,0.00023,0.000544,1.000524,0.000231,0.001037,0.000872,-1.3e-05,-3.607373e-07,0.000857,0.001319,0.000575,1.6e-05,0.001435,0.000339,0.000163,0.000552,0.000783,0.000207,0.000552,-77.648649,103.960253,393.0,19.72973,140.092558,409.0,78.041107,546.28772,92.317375,1.000616,0.000213,2.0,14.0,1.914854,0.000229,0.000282,0.000802,0.001173,-2e-05,3e-05,300.718781,33680.503906,650.013489,0.991664,0.094562,4.240549,474.941532,3.710251,0.000364,0.000413,0.001376,0.001908,0.001251,0.001361,99.670937,90.517441,0.974452,0.157978,3.162263,1.998994
2,0-16,16,0,0.002168,1.000159,0.000176,0.000643,1.00017,0.000352,0.001319,0.0011,1e-05,2.300308e-07,0.00107,0.001838,-6.7e-05,-2e-06,0.002782,0.000877,0.000109,0.000383,0.00134,0.000209,0.000718,-60.72093,66.847846,215,-59.930233,119.040688,498,64.477417,128.954834,88.355804,0.999928,0.000406,2.5,5.0,2.12132,0.000279,0.000319,0.001919,0.002219,-0.000919,-0.000941,233.294342,26128.964844,249.398911,0.999732,0.000895,3.422477,383.317374,3.133745,0.000482,0.000621,0.004111,0.004679,0.005242,0.005307,106.060745,96.844856,0.976031,0.153171,3.309269,2.135482,1.000312,0.000238,0.001038,1.00039,0.000364,0.00107,0.00088,-0.000475,-1.1e-05,0.001049,0.000894,-0.000237,-5e-06,0.001282,0.000673,6.3e-05,0.000335,0.000968,0.00014,0.000527,-42.159091,57.631929,213.0,-60.318182,53.90119,189.0,68.208389,341.041962,124.434013,1.000215,0.000232,2.4,12.0,2.607681,0.000278,0.000332,0.000917,0.001325,4.3e-05,7.3e-05,389.145111,43584.25,2011.265869,0.981829,0.132989,3.204913,358.950234,1.758573,0.000454,0.000545,0.001721,0.002503,0.00146,0.001638,104.995697,99.563965,0.979401,0.142188,3.261583,2.002002,0.999476,0.000299,0.001063,0.999632,0.000383,0.001478,0.001464,0.000351,1.1e-05,0.001445,0.0021,-0.000884,-2.680239e-05,0.002236,0.00067,0.000199,0.000718,0.001117,0.000329,0.001245,102.333333,148.32095,589.0,2.242424,95.515388,406.0,83.622787,501.736725,97.398666,0.999512,0.000314,2.166667,13.0,1.47196,0.000263,0.00031,0.000905,0.00129,7.1e-05,2.2e-05,251.082687,28121.261719,448.780945,0.990812,0.094474,3.308289,370.528322,1.802929,0.000452,0.000511,0.001641,0.002337,0.001411,0.001596,100.441055,88.929283,0.977584,0.148221,3.219615,2.044801,0.999672,0.000293,0.000894,0.999749,0.000335,0.000899,0.000588,-0.000895,-5.261982e-05,0.000535,0.001017,0.000783,4.6e-05,0.001004,0.000881,0.000111,0.000335,0.001332,0.000218,0.000622,-6.176471,86.791586,398.0,-71.352941,134.140291,421.0,50.974537,101.949074,70.674767,0.999712,0.000305,2.0,4.0,1.414214,0.000267,0.0003,0.000899,0.001267,-8.5e-05,-2.2e-05,240.997665,26991.738281,270.412506,0.999775,0.001317,3.497083,391.673332,2.017961,0.000426,0.000484,0.001464,0.00219,0.001406,0.001558,99.468056,95.945068,0.972571,0.16335,3.157341,1.996562,0.998697,0.000396,0.001088,0.999066,0.000701,0.002101,0.001066,-0.000815,-2.5e-05,0.00099,0.001252,-0.001837,-5.7e-05,0.001706,0.000667,0.000115,0.000479,0.000913,0.000234,0.000766,145.5,131.640517,504.0,-22.6875,89.102492,305.0,70.899155,425.394928,156.593857,0.998646,0.00052,3.333333,20.0,3.614784,0.000284,0.000332,0.000924,0.001324,-0.000111,-0.00012,329.021271,36850.382812,965.865906,0.999687,0.001465,3.697543,414.12477,2.66661,0.000406,0.000476,0.00154,0.002123,0.001306,0.001439,98.857071,88.278702,0.975729,0.153994,3.18682,1.988772,0.997783,0.000365,0.001045,0.997985,0.000632,0.002153,0.000695,-0.000882,-4.641933e-05,0.00064,0.003048,-0.000495,-2.6e-05,0.002509,0.000552,9.2e-05,0.000287,0.001134,0.000365,0.001245,163.157895,113.287375,380.0,-21.052632,99.439246,370.0,164.882858,659.531433,104.679047,0.997698,0.000343,3.5,14.0,1.732051,0.000281,0.000331,0.000925,0.001346,-0.000278,-0.000298,219.356705,24567.951172,225.724548,0.963771,0.186318,3.110471,348.372721,1.698501,0.000364,0.000413,0.001376,0.001908,0.001251,0.001361,99.670937,90.517441,0.974452,0.157978,3.162263,1.998994
3,0-31,31,0,0.002195,1.000312,0.000112,0.000399,0.999829,0.000109,0.000377,0.003002,0.002413,0.0001419266,0.002737,0.002193,0.001919,0.000113,0.001978,0.000623,4.4e-05,0.000139,0.000748,5e-05,0.000185,-147.411765,94.041253,363,136.411765,78.374947,333,14.503355,29.00671,2.121811,1.000231,0.0,1.0,2.0,0.0,0.000304,0.000355,0.001976,0.002313,0.000427,0.000451,223.275696,25006.876953,226.149033,0.999805,0.000916,3.940881,441.378638,2.078866,0.000482,0.000621,0.004111,0.004679,0.005242,0.005307,106.060745,96.844856,0.976031,0.153171,3.309269,2.135482,0.99882,0.000489,0.00159,0.998486,0.000486,0.00194,0.001619,-0.001658,-4.9e-05,0.001752,0.001548,-0.001646,-4.8e-05,0.002126,0.000988,0.000293,0.000879,0.001476,0.000309,0.001018,70.058824,114.344758,450.0,115.441176,90.331441,400.0,123.920372,371.761108,69.223427,0.999128,0.000692,3.333333,10.0,2.516611,0.000315,0.000364,0.001019,0.001446,-6.7e-05,-2e-05,223.168121,24994.830078,267.30072,0.990771,0.094469,3.596127,402.766241,1.784226,0.000454,0.000545,0.001721,0.002503,0.00146,0.001638,104.995697,99.563965,0.979401,0.142188,3.261583,2.002002,0.998593,0.000164,0.000397,0.998262,0.000202,0.000825,0.000669,0.000312,2e-05,0.000535,0.001124,-1.6e-05,-1.022636e-06,0.000968,0.000928,0.000233,0.000555,0.001154,0.000247,0.000694,-49.0,142.686603,372.0,105.3125,91.99255,365.0,4.992135,4.992135,0.0,0.998427,0.0,1.0,1.0,0.0,0.000309,0.000361,0.001097,0.001463,-0.000114,-0.000159,203.426041,22783.716797,294.239624,0.963894,0.186339,3.508387,392.939385,2.294911,0.000452,0.000511,0.001641,0.002337,0.001411,0.001596,100.441055,88.929283,0.977584,0.148221,3.219615,2.044801,0.99871,0.000409,0.001343,0.998487,0.000535,0.002065,0.001929,-0.000461,-1.537818e-05,0.001466,0.002461,0.000134,4e-06,0.002252,0.000666,0.000263,0.001295,0.000958,0.000344,0.001064,-52.333333,156.114753,484.0,31.366667,94.823787,303.0,173.480621,1040.883789,146.203445,0.999118,0.000319,5.833333,35.0,5.269409,0.000297,0.000346,0.001004,0.001402,2.3e-05,-2e-06,246.937546,27657.005859,572.643799,0.990648,0.094462,3.647547,408.525303,2.099352,0.000426,0.000484,0.001464,0.00219,0.001406,0.001558,99.468056,95.945068,0.972571,0.16335,3.157341,1.996562,0.998084,5.4e-05,0.00011,0.998585,0.000274,0.000787,0.000218,-8.3e-05,-8e-06,0.000177,0.00075,0.000283,2.6e-05,0.000663,0.001035,2.3e-05,4.6e-05,0.001262,0.000201,0.000509,182.818182,17.747215,61.0,-83.181818,106.452636,308.0,4.991672,4.991672,0.0,0.998334,0.0,1.0,1.0,0.0,0.000275,0.00032,0.00093,0.001314,8.6e-05,8.5e-05,285.935883,32024.820312,921.911438,0.954999,0.207375,3.49487,391.425438,2.4128,0.000406,0.000476,0.00154,0.002123,0.001306,0.001439,98.857071,88.278702,0.975729,0.153994,3.18682,1.988772,0.998074,0.000534,0.001153,0.998257,0.000599,0.001321,0.001211,-0.000635,-5.289089e-05,0.000987,0.001581,-0.000857,-7.1e-05,0.00136,0.00106,8.9e-05,0.000185,0.001241,0.000134,0.000278,102.333333,122.560066,354.0,-38.5,98.932943,250.0,253.985077,507.970154,275.916534,0.997991,2.7e-05,5.0,10.0,4.242641,0.000276,0.000321,0.000948,0.001332,-9.7e-05,-6.5e-05,261.154877,29249.347656,497.812988,0.981729,0.132983,3.545395,397.084239,2.132017,0.000364,0.000413,0.001376,0.001908,0.001251,0.001361,99.670937,90.517441,0.974452,0.157978,3.162263,1.998994
4,0-62,62,0,0.001747,0.99966,0.00032,0.000915,0.999598,0.000375,0.001147,0.003341,0.002056,7.908193e-05,0.002773,0.002927,0.001416,5.4e-05,0.002656,0.000443,0.000101,0.000419,0.000764,0.000112,0.000513,-21.576923,96.517013,403,6.346154,85.114954,315,54.237686,216.950745,98.642136,0.999643,0.000341,3.5,14.0,3.785939,0.000285,0.000311,0.001868,0.002143,8.9e-05,6.1e-05,218.700104,24494.412109,309.453186,0.963971,0.186352,3.868412,433.262169,2.243512,0.000482,0.000621,0.004111,0.004679,0.005242,0.005307,106.060745,96.844856,0.976031,0.153171,3.309269,2.135482,0.999727,0.000154,0.000512,0.999534,0.000372,0.000964,0.000392,0.000259,7e-06,0.000383,0.001701,0.000915,2.5e-05,0.001672,0.000311,8e-05,0.00042,0.000653,0.000205,0.000653,-125.666667,124.795604,394.0,-16.388889,112.71336,477.0,28.991388,115.965553,52.665947,0.999641,0.00016,3.25,13.0,3.86221,0.000225,0.000284,0.000839,0.001265,-4.4e-05,-2e-05,227.097214,25434.886719,351.262665,0.954992,0.20737,3.847491,430.919035,2.947905,0.000454,0.000545,0.001721,0.002503,0.00146,0.001638,104.995697,99.563965,0.979401,0.142188,3.261583,2.002002,0.999883,0.000187,0.000589,0.999829,0.000233,0.000834,0.000408,-0.000371,-1.5e-05,0.000399,0.001272,-0.000108,-4.33223e-06,0.001126,0.000375,0.000119,0.000373,0.00055,0.000167,0.000559,-130.08,128.092714,503.0,-12.08,104.436552,504.0,79.638718,238.916153,131.900574,0.999788,0.000128,2.666667,8.0,2.081666,0.000235,0.000292,0.000781,0.001175,5.2e-05,6.4e-05,249.213577,27911.919922,501.250153,0.963986,0.186356,3.539152,396.385075,1.839875,0.000452,0.000511,0.001641,0.002337,0.001411,0.001596,100.441055,88.929283,0.977584,0.148221,3.219615,2.044801,0.999514,0.00012,0.000467,0.99951,0.000251,0.000769,0.000414,-0.000142,-4.171483e-06,0.000502,0.001349,-0.000635,-1.9e-05,0.001578,0.000354,0.000103,0.000326,0.000627,0.000129,0.000373,28.264706,134.415301,426.0,-16.205882,86.892403,420.0,211.298492,1056.492432,152.340744,0.99955,7.9e-05,6.4,32.0,6.188699,0.000205,0.000266,0.000752,0.001154,-4.6e-05,-7.2e-05,287.07312,32152.189453,671.476624,0.97287,0.162129,3.848408,431.021746,2.56893,0.000426,0.000484,0.001464,0.00219,0.001406,0.001558,99.468056,95.945068,0.972571,0.16335,3.157341,1.996562,0.999481,0.000212,0.00078,0.999698,0.000297,0.000893,0.000947,0.000214,6e-06,0.000891,0.001221,0.000606,1.6e-05,0.0013,0.000493,0.00016,0.000559,0.000829,0.000187,0.000606,31.657895,75.562495,309.0,-101.552632,83.672688,223.0,40.647106,121.941315,66.07637,0.999574,6.9e-05,3.333333,10.0,2.309401,0.000213,0.000273,0.000737,0.001133,-2.2e-05,2.1e-05,216.758667,24276.970703,290.775879,0.99066,0.09446,3.698044,414.180912,2.264936,0.000406,0.000476,0.00154,0.002123,0.001306,0.001439,98.857071,88.278702,0.975729,0.153994,3.18682,1.988772,0.999454,0.000307,0.000925,0.99964,0.00018,0.000642,0.001233,0.000573,3.37012e-05,0.001124,0.000809,-0.000173,-1e-05,0.00078,0.000414,7.2e-05,0.000373,0.000749,7.5e-05,0.000233,38.588235,103.169193,410.0,-119.823529,122.337768,392.0,13.329403,39.988209,18.005182,0.999543,0.00018,4.0,12.0,5.196152,0.000224,0.000271,0.000764,0.001113,4.8e-05,2.6e-05,272.70578,30543.048828,544.022766,0.981832,0.132992,4.039177,452.387796,2.807819,0.000364,0.000413,0.001376,0.001908,0.001251,0.001361,99.670937,90.517441,0.974452,0.157978,3.162263,1.998994


In [16]:
final_training_data.isna().sum().sort_values()

id                                    0
price_std_0_100                       0
order_count_mean_0_100                0
order_count_sum_0_100                 0
order_count_std_0_100                 0
                                     ..
wap1_std_400_500                      4
wap1_fullrange_400_500                4
wap2_mean_400_500                     4
wap2_fullrange_400_500                4
order_count_mean_std_stock_500_600    4
Length: 364, dtype: int64

In [9]:
import mlflow
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from lofo import LOFOImportance, Dataset, plot_importance
from sklearn.metrics import make_scorer
from sklearn.model_selection import train_test_split
import lightgbm as lgb
from utils.misc_utils import rmspe_eval, rmspe_obj, rmspe
from utils.feature_engineering_utils import full_feature_engineering_by_cutoff, load_train_test
from utils.logging_utils import create_logger

rmspe_scorer = make_scorer(rmspe, greater_is_better=False)
logger = create_logger(export_log=False)

def optiver_train_and_log_experiment(run_name, cutoffs):
    #mlflow.set_tracking_uri("https://localhost:1111")
    print(f'...Currently Running {run_name}...')

    mlflow_experiment_id = 0
    mlflow.start_run(run_name = run_name, experiment_id=mlflow_experiment_id)

    train, test = load_train_test()
    train_id = train.stock_id.unique()
    test_id = test.stock_id.unique()
    
    print('...Currently Feature Engineering...')
    final_training_data = full_feature_engineering_by_cutoff(
        cutoffs = cutoffs,
        stock_ids = train_id,
        training=True)

    mlflow.log_param('Cutoffs', cutoffs)
    mlflow.log_param('Train Dataset Row/Columns', final_training_data.shape)
    mlflow.log_param('Test Dataset Row/Columns', final_training_data.shape)


    ########################Initial Model Training########################
    print('...Currently Training First LGBM...')
    model_col = [col for col in final_training_data.columns if ('id' not in col) & ('target' not in col)]

    X_train, X_test, y_train, y_test = train_test_split(
                                            final_training_data.drop('target', axis=1)[model_col],
                                            final_training_data['target'],
                                            test_size=0.1
                                            )

    X_train, X_valid, y_train, y_valid = train_test_split(
                                            X_train,
                                            y_train,
                                            test_size=0.1
                                            )

    train_data = lgb.Dataset(X_train, label=y_train)
    valid_data = lgb.Dataset(X_valid, label=y_valid)
    test_data = lgb.Dataset(X_test, label=y_test)

    parameters = {'verbosity': 0,
                    'n_jobs': -1,
                    'seed': 123}

    model = lgb.train(parameters,
                        train_data,
                        valid_sets=valid_data,
                        fobj = rmspe_obj,
                        feval = rmspe_eval,
                        num_boost_round=50000,
                        early_stopping_rounds=200)

    mlflow.log_metric('Test Score Full Model', rmspe(y_test, model.predict(X_test)))

    ########################LOFO Feature Selection#######################
    print('...Currently Performing LOFO...')

    # extract a sample of the data
    sample_df = X_test.copy() 
    sample_df['target'] = y_test
    sample_df = sample_df.sample(frac=0.01, random_state=0)

    cv = KFold(n_splits=4, shuffle=True, random_state=0)
    dataset = Dataset(df=sample_df, target="target", features=[col for col in sample_df.columns if col != 'target'])
    lofo_imp = LOFOImportance(dataset, cv=cv, scoring=rmspe_scorer)
    importance_df = lofo_imp.get_importance()
    selected_lofo_features = importance_df.loc[importance_df.importance_mean>0.001]['feature'].to_list()

    mlflow.log_param('Feature Importance DF', importance_df)
    mlflow.log_param('LOFO Selected Features', selected_lofo_features)

    ########################Final Model Training#######################
    print('...Currently Training Final Model...\n')
    selected_lofo_features = importance_df.loc[importance_df.importance_mean>0.001]['feature'].to_list()
    X_train_lofo, X_test_lofo, y_train_lofo, y_test_lofo = train_test_split(
                                            final_training_data.drop('target', axis=1)[selected_lofo_features],
                                            final_training_data['target'],
                                            test_size=0.1
                                            )

    X_train_lofo, X_valid_lofo, y_train_lofo, y_valid_lofo = train_test_split(
                                            X_train_lofo,
                                            y_train_lofo,
                                            test_size=0.1
                                            )

    train_data_lofo = lgb.Dataset(X_train_lofo, label=y_train_lofo)
    valid_data_lofo = lgb.Dataset(X_valid_lofo, label=y_valid_lofo)
    test_data_lofo = lgb.Dataset(X_test_lofo, label=y_test_lofo)

    parameters = {'verbosity': 0,
                    'n_jobs': -1,
                    'seed': 123}

    model = lgb.train(parameters,
                        train_data_lofo,
                        valid_sets=valid_data_lofo,
                        fobj = rmspe_obj,
                        feval = rmspe_eval,
                        num_boost_round=50000,
                        early_stopping_rounds=200)


    mlflow.log_metric('Test Score Feature-Selected Model', rmspe(y_test_lofo, model.predict(X_test_lofo)))
    mlflow.end_run()

In [10]:
cutoffs =  [(0,150), (150, 300), (300, 450), (450, 600)]
run_name = f'LOFO_{cutoffs}'

cutoffs_list =  [
    [(0,600)], 
    [(0,300), (300, 600)], 
    [(0,200), (200, 400), (400, 600)],  
    [(0,100), (100, 200), (200, 300), (300, 400), (400, 500), (500, 600)],
    [(0,600), (150, 600), (300, 600), (450, 600)]
]

for cutoffs in cutoffs_list:
    run_name = f'LOFO_{cutoffs}'
    optiver_train_and_log_experiment(
        run_name=run_name, 
        cutoffs=cutoffs
    )

  0%|          | 0/112 [00:00<?, ?it/s]

...Currently Feature Engineering...


100%|██████████| 112/112 [10:08<00:00,  5.43s/it]
100%|██████████| 112/112 [09:39<00:00,  5.18s/it]
100%|██████████| 112/112 [09:42<00:00,  5.20s/it]
100%|██████████| 112/112 [09:38<00:00,  5.17s/it]


...Currently Training First LGBM...
[1]	valid_0's rmspe: 0.907528
Training until validation scores don't improve for 200 rounds
[2]	valid_0's rmspe: 0.824881
[3]	valid_0's rmspe: 0.751048
[4]	valid_0's rmspe: 0.685383
[5]	valid_0's rmspe: 0.626776
[6]	valid_0's rmspe: 0.574917
[7]	valid_0's rmspe: 0.529015
[8]	valid_0's rmspe: 0.488529
[9]	valid_0's rmspe: 0.452821
[10]	valid_0's rmspe: 0.421618
[11]	valid_0's rmspe: 0.394269
[12]	valid_0's rmspe: 0.370463
[13]	valid_0's rmspe: 0.349818
[14]	valid_0's rmspe: 0.331936
[15]	valid_0's rmspe: 0.316642
[16]	valid_0's rmspe: 0.303497
[17]	valid_0's rmspe: 0.29225
[18]	valid_0's rmspe: 0.282625
[19]	valid_0's rmspe: 0.274491
[20]	valid_0's rmspe: 0.267584
[21]	valid_0's rmspe: 0.261713
[22]	valid_0's rmspe: 0.256691
[23]	valid_0's rmspe: 0.252418
[24]	valid_0's rmspe: 0.248755
[25]	valid_0's rmspe: 0.245643
[26]	valid_0's rmspe: 0.243034
[27]	valid_0's rmspe: 0.240829
[28]	valid_0's rmspe: 0.238956
[29]	valid_0's rmspe: 0.237341
[30]	valid_0'

100%|██████████| 192/192 [02:10<00:00,  1.47it/s]


...Currently Training Final Model...
[1]	valid_0's rmspe: 0.90747
Training until validation scores don't improve for 200 rounds
[2]	valid_0's rmspe: 0.824703
[3]	valid_0's rmspe: 0.750972
[4]	valid_0's rmspe: 0.685307
[5]	valid_0's rmspe: 0.626897
[6]	valid_0's rmspe: 0.575131
[7]	valid_0's rmspe: 0.529388
[8]	valid_0's rmspe: 0.489037
[9]	valid_0's rmspe: 0.453582
[10]	valid_0's rmspe: 0.422625
[11]	valid_0's rmspe: 0.395576
[12]	valid_0's rmspe: 0.372019
[13]	valid_0's rmspe: 0.35162
[14]	valid_0's rmspe: 0.334201
[15]	valid_0's rmspe: 0.319096
[16]	valid_0's rmspe: 0.306206
[17]	valid_0's rmspe: 0.295229
[18]	valid_0's rmspe: 0.28587
[19]	valid_0's rmspe: 0.27805
[20]	valid_0's rmspe: 0.271485
[21]	valid_0's rmspe: 0.265928
[22]	valid_0's rmspe: 0.261048
[23]	valid_0's rmspe: 0.256983
[24]	valid_0's rmspe: 0.253609
[25]	valid_0's rmspe: 0.250816
[26]	valid_0's rmspe: 0.248278
[27]	valid_0's rmspe: 0.246187
[28]	valid_0's rmspe: 0.244487
[29]	valid_0's rmspe: 0.242791
[30]	valid_0's 