In [3]:
import os, sys
import pandas as pd
import numpy as np

In [4]:
EPS = 1e-8

f_data='taq.csv'
df_data = pd.read_csv(f_data)
df_data['time']=pd.to_datetime(df_data['time'])
df_data = df_data.set_index('time')

t_start = '2021-04-19 06:00:00'
t_end = '2021-04-19 06:30:00'
df_data = df_data.loc[t_start: t_end].sort_index()

In [7]:
def infer_order_flow(df_data):

    df_trade = df_data[['amount', 'price', 'side']].dropna()
    df_ask = df_data[['ask', 'ask_size']].dropna()
    df_bid = df_data[['bid', 'bid_size']].dropna()
    df_ask=df_ask[df_ask['ask_size'].diff()!=0]
    df_bid=df_bid[df_bid['bid_size'].diff()!=0]

    #ask insertion
    df_ask['ask_price_delta']=df_ask['ask'].diff()
    df_ask['ask_size_delta']=df_ask['ask_size'].diff()
    df_ask_insertion=df_ask[((df_ask['ask_price_delta'].abs()<EPS) &(df_ask['ask_size_delta']>0))|(df_ask['ask_price_delta']<0)].copy()
    df_ask_insertion['size'] = df_ask_insertion['ask_size_delta']
    df_ask_insertion['direction']=0
    df_ask_insertion['action']='insertion'
    df_ask_insertion['price']=df_ask_insertion['ask']

    #bid insertion
    df_bid['bid_price_delta']=df_bid['bid'].diff()
    df_bid['bid_size_delta']=df_bid['bid_size'].diff()
    df_bid_insertion=df_bid[((df_bid['bid_price_delta']==0) &(df_bid['bid_size_delta']>0))|(df_bid['bid_price_delta']>0)].copy()
    df_bid_insertion['size'] = df_bid_insertion['bid_size_delta']
    df_bid_insertion['direction']=1
    df_bid_insertion['action']='insertion'
    df_bid_insertion['price']=df_bid_insertion['bid']
    
    #ask reduce (can be buy, or cancel ask)
    df_ask_reduce=df_ask[((df_ask['ask_price_delta']==0) &(df_ask['ask_size_delta']<0))|(df_ask['ask_price_delta']>0)]
    df_ask_reduce_merge = pd.concat([df_trade[df_trade['side']=='buy'], df_ask_reduce], axis=1)
    df_ask_reduce_merge[['amount', 'price']] = df_ask_reduce_merge[['amount', 'price']].fillna(method='ffill')
    df_ask_reduce_merge['direction'] = 1

    # ask cancellation.  cancel amount=ask_size_delta
    df_ask_cancel1 = df_ask_reduce_merge[(df_ask_reduce_merge['price']!=df_ask_reduce_merge['ask'])&(df_ask_reduce_merge['ask'].notnull())]

    # ask reduce, can be sell trade or bid cancel
    df_ask_reduce_temp = df_ask_reduce_merge[df_ask_reduce_merge['price']==df_ask_reduce_merge['ask']]
    df_ask_reduce_temp['trade_cum'] = df_ask_reduce_temp['amount'][(df_ask_reduce_temp['price'].diff()!=0)|(df_ask_reduce_temp['amount'].diff()!=0)]
    #df_ask_reduce_temp['trade_cum'] = df_ask_reduce_temp['trade_cum'].fillna(0)+df_ask_reduce_temp['ask_size_delta']
    #df_ask_reduce_temp['trade_cum'] = df_ask_reduce_temp['trade_cum'].round(8)

    prev_trade_cum = df_ask_reduce_temp['trade_cum'].iloc[0]
    for ix,row in df_ask_reduce_temp.iterrows():
        x = row['trade_cum']
        if np.isnan(x):
            x = prev_trade_cum + row['ask_size_delta']
        else:
            x = x + row['ask_size_delta']
        x = round(x,8)
        df_ask_reduce_temp.loc[ix, 'trade_cum'] = x
        prev_trade_cum = x
    df_ask_reduce_temp['is_trade'] = df_ask_reduce_temp['trade_cum']>-EPS

    # ask reduce caused by trade
    df_ask_trade = df_ask_reduce_temp[df_ask_reduce_temp['is_trade']]
    df_ask_cancel2 = df_ask_reduce_temp[~df_ask_reduce_temp['is_trade']]
    df_ask_trade['size'] = df_ask_trade['ask_size_delta'].abs()

    # ask redue due to cancellation
    df_ask_cancel = pd.concat([df_ask_cancel1, df_ask_cancel2], sort=False).sort_index()
    df_ask_cancel['size'] = df_ask_cancel['ask_size_delta'].abs()
    df_ask_cancel['action'] = 'cancellation'

    # ask reduce due to trade
    df_ask_trade['direction']=1
    df_ask_trade['action']='trade'

    #bid reduce (can be sell, or cancel bid)
    df_bid_reduce=df_bid[((df_bid['bid_price_delta']==0) &(df_bid['bid_size_delta']<0))|(df_bid['bid_price_delta']<0)]
    df_bid_reduce_merge = pd.concat([df_trade[df_trade['side']=='sell'], df_bid_reduce], axis=1)
    df_bid_reduce_merge[['amount', 'price']] = df_bid_reduce_merge[['amount', 'price']].fillna(method='ffill')
    df_bid_reduce_merge['direction'] = 0
    # bid cancellation.  cancel amount=bid_size_delta
    df_bid_cancel1 = df_bid_reduce_merge[(df_bid_reduce_merge['price']!=df_bid_reduce_merge['bid'])&(df_bid_reduce_merge['bid'].notnull())]

    # ask reduce, can be sell trade or bid cancel
    df_bid_reduce_temp = df_bid_reduce_merge[df_bid_reduce_merge['price']==df_bid_reduce_merge['bid']]
    df_bid_reduce_temp['trade_cum'] = df_bid_reduce_temp['amount'][(df_bid_reduce_merge['price'].diff()!=0)|(df_bid_reduce_merge['amount'].diff()!=0)]


    prev_trade_cum = df_bid_reduce_temp['trade_cum'].iloc[0]
    for ix,row in df_bid_reduce_temp.iterrows():
        x = row['trade_cum']
        if np.isnan(x):
            x = prev_trade_cum + row['bid_size_delta']
        else:
            x = x + row['bid_size_delta']
        x = round(x,8)
        df_bid_reduce_temp.loc[ix, 'trade_cum'] = x
        prev_trade_cum = x
    df_bid_reduce_temp['is_trade'] = df_bid_reduce_temp['trade_cum']>-EPS

    # bid reduce caused by trade
    df_bid_trade = df_bid_reduce_temp[df_bid_reduce_temp['is_trade']].copy()
    df_bid_cancel2 = df_bid_reduce_temp[~df_bid_reduce_temp['is_trade']]
    df_bid_trade['size'] = df_bid_trade['bid_size_delta'].abs()

    # bid redue due to cancellation
    df_bid_cancel = pd.concat([df_bid_cancel1, df_bid_cancel2], sort=False).sort_index()
    df_bid_cancel['size'] = df_bid_cancel['bid_size_delta'].abs()
    df_bid_cancel['action'] = 'cancellation'

    # bid reduce due to trade
    df_bid_trade['direction']=0
    df_bid_trade['action']='trade'

    list_actions = [df_ask_insertion, df_bid_insertion, df_ask_cancel, df_ask_trade, df_bid_cancel, df_bid_trade]
    df_res = pd.concat(list_actions, sort=False)[['action', 'price', 'direction', 'size']].sort_index()

    return df_res

In [8]:
infer_order_flow(df_data)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_gui

Unnamed: 0_level_0,action,price,direction,size
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-04-19 06:00:00.038000128+00:00,insertion,56507.70,0,0.021
2021-04-19 06:00:00.045000192+00:00,insertion,56507.70,0,0.091
2021-04-19 06:00:00.101000192+00:00,trade,56507.70,1,0.030
2021-04-19 06:00:00.102999808+00:00,insertion,56507.70,0,0.004
2021-04-19 06:00:00.220999936+00:00,cancellation,56507.69,0,0.001
...,...,...,...,...
2021-04-19 06:30:00.969000192+00:00,insertion,56749.17,0,0.076
2021-04-19 06:30:00.969000192+00:00,insertion,56746.96,1,-0.037
2021-04-19 06:30:00.980000+00:00,insertion,56749.16,0,-0.109
2021-04-19 06:30:00.997999872+00:00,insertion,56748.73,0,0.758
