In [0]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

In [0]:
# Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [0]:
import pandas as pd
import numpy as np
import datetime as dt
import timeit
import os.path

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
IH_dir = '/content/drive/My Drive/DL_Project/IH/'

In [0]:
front_month_map = {1: '1802', 2: '1803', 3: '1804', 4: '1805', 5: '1806', 6: '1807', 
           7: '1808', 8: '1809', 9: '1810', 10: '1811', 11: '1812', 12: '1901'}

In [0]:
Morning_Start = dt.timedelta(hours=9, minutes=30)
Morning_End = dt.timedelta(hours=11, minutes=30)
Afternoon_Start = dt.timedelta(hours=13)
Afternoon_End = dt.timedelta(hours=15)

In [0]:
def regularize(df, fill_last, fill_zero, reg_col = 'datetime', reg_str='0.5S'):
  df = df.set_index(reg_col)
  df = df.resample(reg_str).last()
  # ffill: propagate last valid observation forward to next valid
  df[fill_last] = df[fill_last].fillna(method='ffill')
  df[fill_zero] = df[fill_zero].fillna(0)
  return df

In [0]:
def add_features(df):
  
  # helper start:
  df['mid'] = 0.5*(df['bid1'] + df['ask1'])
  df['wmid'] = (df['bid1']*df['askv1'] + df['ask1']*df['bidv1']) / (df['askv1'] + df['bidv1'])
  
  df['prev_bid'] = df['bid1'].shift(1)
  df['prev_ask'] = df['ask1'].shift(1)

  df['ask_tick'] = 0 # ask order change
  # 1) ask was lifted to current level:
  df.loc[df['ask1'] > df['prev_ask'], 'ask_tick'] = -1
  # 2) approximation: assume all trades happened at previous ask 
  #   level if the most recent trade price was previous ask price
  df.loc[(df['prev_ask']==df['last']) & (df['volume']>0), 'ask_tick'] = \
      df.loc[(df['prev_ask']==df['last']) & (df['volume']>0), 'volume']
  

  df['bid_tick'] = 0 # bid order change
  # 1) bid was hit to current level:
  df.loc[df['bid1'] < df['prev_bid'], 'bid_tick'] = -1
  # 2) approximation: assume all trades happened at previous bid 
  #   level if the most recent trade price was previous bid price
  df.loc[(df['prev_bid']==df['last']) & (df['volume']>0), 'bid_tick'] = \
     df.loc[(df['prev_bid']==df['last']) & (df['volume']>0), 'volume']

  df['trade_dir'] = 0 # approximation of trade direction
  df.loc[((df['mid'] > df['mid'].shift(1)) | 
      (df['last'] >= df['ask1'].shift(1)) ) & (df['volume'] > 0), 'trade_dir'] = 1
  df.loc[((df['mid'] < df['mid'].shift(1)) | 
      (df['last'] <= df['bid1'].shift(1)) ) & (df['volume'] > 0), 'trade_dir'] = -1

  return df

In [0]:
# simple mm strategy, place bid and offer at same time
# if filled on one side (e.g. offer), stop placing 
# new order on that side (e.g. offer)

def add_trade_strategy(df):
  max_pos = 3
  pos = 0
  cash = 0.0
  action = 0

  bid_order = None # bid price, size ahead of it in the queue
  ask_order = None # ask price, size ahead of it in the queue

  # enter_price = np.nan
  df['pos'] = 0.0
  df['cash'] = 0.0
  df['action'] = 0

  for i in df.index:
    action = 0

    # Fill/cancel existing bid order if market moved
    if bid_order:
      # order filled in full:
      if ((df.loc[i, 'bid_tick'] < 0) or 
          (df.loc[i, 'bid_tick'] > bid_order[1])):
        pos += 1
        cash -= bid_order[0]
        action = 1
        bid_order = None 
      # order move forward in queue:
      elif df.loc[i, 'bid_tick'] > 0: 
        bid_order[1] -= df.loc[i, 'bid_tick']
      # bid ticked up, order is obsolete and canceled
      elif df.loc[i, 'bid1'] > bid_order[0]:
        bid_order = None

    # Fill/cancel existing ask order if market moved
    if ask_order:
      # order filled in full:
      if ((df.loc[i, 'ask_tick'] < 0) or 
          (df.loc[i, 'ask_tick'] > ask_order[1])):
        pos -= 1
        cash += ask_order[0]
        action = -1
        ask_order = None 
      # order move forward in queue:
      elif df.loc[i, 'ask_tick'] > 0: 
        ask_order[1] -= df.loc[i, 'ask_tick']
      # ask ticked down, order is obsolete and canceled
      elif df.loc[i, 'ask1'] < ask_order[0]:
        ask_order = None

    # place new order
    if not bid_order and pos < max_pos:
      bid_order = [df.loc[i, 'bid1'], df.loc[i, 'bidv1']] 
    if not ask_order and pos > -max_pos:
      ask_order = [df.loc[i, 'ask1'], df.loc[i, 'askv1']]

    df.loc[i, 'pos'] = pos
    df.loc[i, 'cash'] = cash
    df.loc[i, 'action'] = action
  
  # assume can offload positions at EOD for 0 transaction costs
  df['cum_pnl'] = df['pos'] * df['mid'] + df['cash'] 

  return df


In [0]:
def build_one_day_IH(df, morning_session_start, morning_session_end,
           afternoon_session_start, afternoon_session_end,
           pnl_dict):
  df.rename(columns = {' instrument': 'instrument',
                     ' datetime': 'datetime',
                     ' last': 'last',
                     ' opi': 'opi',
                     ' turnover': 'turnover',
                     ' volume': 'volume',
                     ' bid1': 'bid1',
                     ' ask1': 'ask1',
                     ' bidv1': 'bidv1',
                     ' askv1': 'askv1'}, inplace = True)
  
  df = df[['datetime', 'last', 'opi', 'turnover', 'volume', 'bid1', 'ask1', 'bidv1', 'askv1']]
  fill_last_cols = ['last', 'opi', 'bid1', 'ask1', 'bidv1', 'askv1']
  fill_zero_cols = ['turnover', 'volume']

  df['datetime'] = pd.to_datetime(df['datetime'])

  df_am = df[(df['datetime'] >= morning_session_start) & 
             (df['datetime'] <= morning_session_end)]
  df_pm = df[(df['datetime'] >= afternoon_session_start) & 
             (df['datetime'] <= afternoon_session_end)]
  
  # df_am = regularize(df_am, fill_last_cols, fill_zero_cols)
  # df_pm = regularize(df_pm, fill_last_cols, fill_zero_cols)
  # df_am = df_am.set_index('datetime')
  # df_pm = df_pm.set_index('datetime')
  
  df_am = add_features(df_am)
  df_pm = add_features(df_pm)

  df_am = add_trade_strategy(df_am)
  df_pm = add_trade_strategy(df_pm)
  
  pnl_dict['am'] = df_am['cum_pnl'].iloc[-1]
  pnl_dict['pm'] = df_pm['cum_pnl'].iloc[-1]

  # merge rows
  df = pd.concat([df_am, df_pm])
  return df
  

In [0]:
start_time = timeit.default_timer()

pnl_summary = {'date': [], 'am': [], 'pm': []}

prev_am_pnl = 0.0
prev_pm_pnl = 0.0
total_pnl = 0.0

for trade_date in pd.date_range('20180101', '20181231'):
  contract = 'IH' + front_month_map[trade_date.month] + '_' + trade_date.strftime('%Y%m%d')
  
  if not os.path.exists(IH_dir + contract + '.csv'):
    # print(IH_dir + contract + '.csv', ' not found')
    continue
  
  print('Processing', trade_date.date(), ' Contract:', contract,
     'Prev AM PnL', prev_am_pnl,
     'Prev PM PnL', prev_pm_pnl,
     'Prev Day PnL', prev_am_pnl + prev_pm_pnl,
      ' Total PnL:', total_pnl)
  
  IH = pd.read_csv(IH_dir + contract + '.csv')
  
  morning_start = trade_date + Morning_Start
  morning_end = trade_date + Morning_End
  afternoon_start = trade_date + Afternoon_Start
  afternoon_end = trade_date + Afternoon_End
  
  pnl_cur = {'am': 0.0, 'pm': 0.0}

  IH = build_one_day_IH(IH, morning_start, morning_end,
              afternoon_start, afternoon_end, pnl_cur)
  
  pnl_summary['date'].append(trade_date.date())
  pnl_summary['am'].append(pnl_cur['am'])
  pnl_summary['pm'].append(pnl_cur['pm'])

  prev_am_pnl = pnl_cur['am']
  prev_pm_pnl = pnl_cur['pm']
  total_pnl += (prev_am_pnl + prev_pm_pnl)

  # IH_dropna = IH.dropna()
  IH.to_csv('/content/drive/My Drive/DL_Project/mm_strategy_om/mm_backtest_' + 
            trade_date.strftime('%Y%m%d') + '.csv')

pnl_summary = pd.DataFrame.from_dict(pnl_summary)
pnl_summary['cum_pnl'] = pnl_summary['am'] + pnl_summary['pm']
pnl_summary.to_csv('/content/drive/My Drive/DL_Project/mm_strategy_om/pnl_summary.csv')
print('Time took: ', timeit.default_timer() - start_time)

Processing 2018-01-02  Contract: IH1802_20180102 Prev AM PnL 0.0 Prev PM PnL 0.0 Prev Day PnL 0.0  Total PnL: 0.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http:/

Processing 2018-01-03  Contract: IH1802_20180103 Prev AM PnL 492.2000000000062 Prev PM PnL 357.60000000001173 Prev Day PnL 849.8000000000179  Total PnL: 849.8000000000179


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http:/

Processing 2018-01-04  Contract: IH1802_20180104 Prev AM PnL 588.7000000000135 Prev PM PnL 563.4000000000051 Prev Day PnL 1152.1000000000186  Total PnL: 2001.9000000000365
Processing 2018-01-05  Contract: IH1802_20180105 Prev AM PnL 577.7000000000075 Prev PM PnL 562.8000000000256 Prev Day PnL 1140.5000000000332  Total PnL: 3142.4000000000697
Processing 2018-01-08  Contract: IH1802_20180108 Prev AM PnL 445.6000000000022 Prev PM PnL 370.09999999998945 Prev Day PnL 815.6999999999916  Total PnL: 3958.1000000000613
Processing 2018-01-09  Contract: IH1802_20180109 Prev AM PnL 668.4999999999873 Prev PM PnL 332.6000000000022 Prev Day PnL 1001.0999999999894  Total PnL: 4959.200000000051
Processing 2018-01-10  Contract: IH1802_20180110 Prev AM PnL 528.0999999999876 Prev PM PnL 474.39999999999327 Prev Day PnL 1002.4999999999809  Total PnL: 5961.700000000032
Processing 2018-01-11  Contract: IH1802_20180111 Prev AM PnL 364.2000000000007 Prev PM PnL 460.6000000000022 Prev Day PnL 824.8000000000029  