In [0]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

In [0]:
# Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
import pandas as pd
import numpy as np
import datetime as dt
import timeit
import os.path

import torch
import torch.nn as nn
import torch.nn.functional as F

In [0]:
IH_input_dir = '/content/drive/My Drive/DL_Project/input_df_cross_assets_v2/'
front_month_map = {1: '1802', 2: '1803', 3: '1804', 4: '1805', 5: '1806', 6: '1807', 
           7: '1808', 8: '1809', 9: '1810', 10: '1811', 11: '1812', 12: '1901'}

In [0]:
class Input_Sampler:
  def __init__(self, x_col_names=['mid_lag_01s', 'mid_lag_05s'], 
         ref_col_names=['mid', 'bid1', 'ask1'], 
         sample_start_str='20180101',
         sample_end_str='20181031'):
    self.x_col_names = x_col_names  # actual predictive features (x)
    self.ref_col_names = ref_col_names # reference data
    self.x_col_len = len(self.x_col_names)
    self.state_len = self.x_col_len + 1 # x features, pos holding (actual state for model)
    # x features, pos holding, ref data, 
    # initial time entering position, cash
    # input to NN (only the first self.state_len elements actually used in NN fitting)
    self.input_len = self.state_len + len(self.ref_col_names) + 2 
    self.sample_start_str = sample_start_str
    self.sample_end_str = sample_end_str

    self.morning_start = dt.timedelta(hours=9, minutes=30)
    self.morning_end = dt.timedelta(hours=11, minutes=30)
    self.afternoon_start = dt.timedelta(hours=13)
    self.afternoon_end = dt.timedelta(hours=15)

    self.sample_morning_start = dt.timedelta(hours=9, minutes=30)
    self.sample_morning_end = dt.timedelta(hours=11, minutes=15)
    self.sample_afternoon_start = dt.timedelta(hours=13)
    self.sample_afternoon_end = dt.timedelta(hours=14, minutes=45)
  
  def get_input_shape(self):
    return self.input_len

  def get_state_shape(self):
    return self.state_len
  
  # return a (randomly sampled new) single state - numpy array (self.state_len, )
  def sample_state(self):
    file_name = 'NaN'
    while(not os.path.exists(IH_input_dir + file_name)):
      trade_date = pd.to_datetime(np.random.choice(
          pd.date_range(self.sample_start_str, self.sample_end_str)))
      file_name = 'input_' + trade_date.strftime('%Y%m%d') + '.csv.gz' 

    df = pd.read_csv(IH_input_dir + file_name)

    # use AM is 0, otherwise use PM
    if np.random.randint(0, 2) == 0: # random from {0, 1}
      df = df[(df['datetime']>trade_date+self.sample_morning_start) &
           (df['datetime']<trade_date+self.sample_morning_end)]
    else:
      df = df[(df['datetime']>trade_date+self.sample_afternoon_start) &
           (df['datetime']<trade_date+self.sample_afternoon_end)]
    
    sample_slice = df.sample(n=1)
    # timestamp_item = pd.to_datetime(sample_slice['datetime'])

    return np.array(list(sample_slice[self.x_col_names]) + # actual features
            [0] +  # position holding
            list(sample_slice[self.ref_col_names]) + # reference data
            [pd.NaT, 0.0]) # reference data: time entered pos, cash

    #return np.random.normal(0, 1, self.state_len)
  
  # return next_state, immediate return, done or not, info dict
  def get_feedback(self, s, a):
    return s_, r, done
    return s_, r, done, {}

In [0]:
# Hyper Parameters
BATCH_SIZE = 32
LR = 0.01 # learning rate
EPSILON = 0.9  # greedy policy
GAMMA = 0.9 # reward discount
TARGET_REPLACE_ITER = 100   # target update frequency
MEMORY_CAPACITY = 2000
NUM_EPOCHS = 400
N_ACTIONS = 3 # buy/hold/sell (number of actions to choose from)

env = Input_Sampler()
# (state + ref data)'s dimension:
# (x features, position held, ref data, time entered, cash)
N_STATES_REF = env.get_input_shape()

N_STATES = env.get_state_shape()  # x features + pos held

In [0]:
# Estimates Q(s, a): input state, output Q(s, a) for each a in action space
class Net(nn.Module):
  def __init__(self, input_features_dimension=N_STATES, 
         num_actions=N_ACTIONS, hidden_size=10):
    super(Net, self).__init__()
    self.input_features_dimension = input_features_dimension
    self.num_actions = num_actions
    self.hidden_size = hidden_size

    self.fc1 = nn.Linear(self.input_features_dimension, self.hidden_size)
    self.fc1.weight.data.normal_(0, 0.1)   # initialization
    self.out = nn.Linear(self.hidden_size, self.num_actions)
    self.out.weight.data.normal_(0, 0.1)   # initialization

  def forward(self, x):
    x = self.fc1(x)
    x = F.relu(x)
    actions_value = self.out(x)
    return actions_value

In [0]:
class DQN(object):
  def __init__(self, input_features_dimension=N_STATES,
         input_dimension=N_STATES_REF, num_actions=N_ACTIONS, 
         memory_size=MEMORY_CAPACITY, target_update_period=TARGET_REPLACE_ITER, 
         batch_size=BATCH_SIZE, discount_gamma=GAMMA, hidden_size=10):

    self.eval_net = Net(input_features_dimension, num_actions, hidden_size)
    self.target_net = Net(input_features_dimension, num_actions, hidden_size)

    self.input_features_dimension = input_features_dimension
    self.input_dimension = input_dimension
    self.num_actions = num_actions
    self.memory_size = memory_size
    self.target_update_period = target_update_period
    self.batch_size = batch_size
    self.discount_gamma = discount_gamma
    
    self.learn_step_counter = 0  # for target updating
    self.memory_counter = 0    # for storing memory
    self.memory = np.zeros((self.memory_size, self.input_features_dimension * 2 + 2)) # (2000, len([s, a, r, s_next]))
    self.optimizer = torch.optim.Adam(self.eval_net.parameters(), lr=LR)
    self.loss_func = nn.MSELoss()

  def choose_action(self, x):
    # unsqueeze(): Returns a new tensor with a dimension of size one inserted at the specified position
    # x size: (input_dimension, ) (1d)
    # torch.unsqueeze(torch.FloatTensor(x), 0) size: torch.Size([1, input_dimension]) (2d)
    # tensor([[x_1, x_2, ..., x_input_dimension]])
    x = Variable(torch.unsqueeze(torch.FloatTensor(x), 0)) # only one sample
    # input only one sample
    if np.random.uniform() < EPSILON:   # greedy
      # actions_value is: tensor([[x_1, x_2, x_3, ..., x_input_dimension]], grad_fn=<AddmmBackward>)
      actions_value = self.eval_net.forward(x)
      # torch.max(actions_value, 1) returns a 2D structure (max taken across axis=1)
      # 1st is array of max values (each element is max value across column for a row)
      # 2nd is array of indices of max value (column index of the max col value for a row)
      # torch.max(actions_value, 1)[1] extracts the max indices
      # torch.max(actions_value, 1)[1].data.numpy() transforms it into numpy array
      action = torch.max(actions_value, 1)[1].data.numpy()[0, 0]
      # action = torch.max(actions_value, 1)[1].data.numpy()
      # action = action[0] if ENV_A_SHAPE == 0 else action.reshape(ENV_A_SHAPE)  # return the argmax index
    else:   # random
      action = np.random.randint(0, self.num_actions)
    return action

  def store_transition(self, s, a, r, s_):
    transition = np.hstack((s, [a, r], s_))
    index = self.memory_counter % self.memory_size
    self.memory[index, :] = transition
    self.memory_counter += 1

  def learn(self):
    # target parameter update
    if self.learn_step_counter % self.target_update_period == 0:
        self.target_net.load_state_dict(self.eval_net.state_dict())
    self.learn_step_counter += 1

    # sample batch transitions
    # extract data from memory
    # (32,) vector, choose 32 (==BATCH_SIZE) random indicies in memory
    # the random sample is generated from np.arange(self.memory_size)
    sample_index = np.random.choice(self.memory_size, self.batch_size)
    b_memory = self.memory[sample_index, :] # (32, self.input_features_dimension *2 +2)
    # torch.Size([32, self.input_features_dimension]):
    b_s = Variable(torch.FloatTensor(b_memory[:, :self.input_features_dimension]))
    # torch.Size([32, 1]):
    b_a = Variable(torch.LongTensor(b_memory[:, self.input_features_dimension:self.input_features_dimension+1].astype(int)))
    b_r = Variable(torch.FloatTensor(b_memory[:, self.input_features_dimension+1:self.input_features_dimension+2]))
    b_s_ = Variable(torch.FloatTensor(b_memory[:, -self.input_features_dimension:]))

    # q_eval w.r.t the action in experience
    # according to action taken b_a, choose q_eval (q_eval has value for all actions)
    q_eval = self.eval_net(b_s).gather(1, b_a)  # shape (batch, 1)
    # self.eval_net(b_s) shape: torch.Size([32, self.num_actions]) (2D, one row for each batch item)
    # self.eval_net(b_s).gather(1, b_a)  shape: torch.Size([32, 1]) (2D, one row for each batch, 1 column in total,
    #    choose the value for row i as the b_a[i]-th column value of eval_net(b_s)'s ith row)
    #    selects the index of b_s's axis=1 based on value of b_a
    q_next = self.target_net(b_s_).detach()     # q_next does not pass error in opposite direction
    # detach from graph, don't backpropagate
    q_target = b_r + self.discount_gamma * q_next.max(1)[0]   # shape (batch, 1)
    loss = self.loss_func(q_eval, q_target)

    # calculate, and update evel_net
    self.optimizer.zero_grad()
    loss.backward()
    self.optimizer.step()

In [0]:
dqn = DQN(input_features_dimension=N_STATES,
      input_dimension=N_STATES_REF, num_actions=N_ACTIONS, 
      memory_size=MEMORY_CAPACITY, target_update_period=TARGET_REPLACE_ITER, 
      batch_size=BATCH_SIZE, discount_gamma=GAMMA, hidden_size=10)

print('\nCollecting experience...')
for i_episode in range(NUM_EPOCHS):
  s = env.sample_state()  # single observation, e.g. array([-0.01674417,  0.03288027,  0.00696402, -0.01111758])
  x_input = s[:dqn.input_features_dimension]
  while True:
    a = dqn.choose_action(x_input)  # 0 ,or 1, or 2
    # take action
    # s_: next state; r: immediate reward
    # done: if terminal state
    # info {}
    s_, r, done, info = env.get_feedback(s, a)
    x_input_next = s_[:dqn.input_features_dimension]

    dqn.store_transition(x_input, a, r, x_input_next)

    if dqn.memory_counter > dqn.memory_size:
      dqn.learn()

    if done:
      break
    s = s_

In [0]:
s

array([-0.01674417,  0.03288027,  0.00696402, -0.01111758])

In [0]:
s = env.reset()
ep_r = 0

In [0]:
env.x_threshold

2.4

In [0]:
x = s
x = torch.unsqueeze(torch.FloatTensor(x), 0)
x

tensor([[-0.0167,  0.0329,  0.0070, -0.0111]])

In [0]:
action = np.random.randint(0, N_ACTIONS)
action = action 
action

1

In [0]:
s_, r, done, info = env.step(action)

In [0]:
info

{}

In [0]:
IH_dir = '/content/drive/My Drive/DL_Project/IH/'
IF_dir = '/content/drive/My Drive/DL_Project/IF/'
IC_dir = '/content/drive/My Drive/DL_Project/IC/'

In [0]:
front_month_map = {1: '1802', 2: '1803', 3: '1804', 4: '1805', 5: '1806', 6: '1807', 
           7: '1808', 8: '1809', 9: '1810', 10: '1811', 11: '1812', 12: '1901'}

In [0]:
Morning_Start = dt.timedelta(hours=9, minutes=30)
Morning_End = dt.timedelta(hours=11, minutes=30)
Afternoon_Start = dt.timedelta(hours=13)
Afternoon_End = dt.timedelta(hours=15)

In [0]:
def regularize(df, fill_last, fill_zero, reg_col = 'datetime', reg_str='0.5S'):
  df = df.set_index(reg_col)
  df = df.resample(reg_str).last()
  # ffill: propagate last valid observation forward to next valid
  df[fill_last] = df[fill_last].fillna(method='ffill')
  df[fill_zero] = df[fill_zero].fillna(0)
  return df

In [0]:
def add_features(df):
  
  # helper start:
  df['mid'] = 0.5*(df['bid1'] + df['ask1'])
  df['wmid'] = (df['bid1']*df['askv1'] + df['ask1']*df['bidv1']) / (df['askv1'] + df['bidv1'])
  
  df['trade_dir'] = 0 # approximation
  df.loc[((df['mid'] > df['mid'].shift(1)) | 
      (df['last'] >= df['ask1'].shift(1)) ) & (df['volume'] > 0), 'trade_dir'] = 1
  df.loc[((df['mid'] < df['mid'].shift(1)) | 
      (df['last'] <= df['bid1'].shift(1)) ) & (df['volume'] > 0), 'trade_dir'] = -1
  df['signed_volume'] = df['trade_dir'] * df['volume']
  # df['price_volume'] = df['volume'] * df['last']
  # df['price_volume'] = df['price_volume'].replace(to_replace=0, method='ffill')
  
  df['bid_bsize'] = df['bid1'] * df['bidv1']
  df['ask_asize'] = df['ask1'] * df['askv1']
  
  df['tick_up'] = 0
  df.loc[df['mid'] > df['mid'].shift(1), 'tick_up'] = 1
  
  df['tick_down'] = 0
  df.loc[df['mid'] < df['mid'].shift(1), 'tick_down'] = 1
  
  # helper end ---------------------------------------------------------------
  
  df['wmid_mid'] = df['wmid'] - df['mid']
  df['wmid_last'] = df['wmid'] - df['last']
  
  df['mid_lag_1tick'] = df['mid'] - df['mid'].shift(1)
  df['mid_lag_01s'] = df['mid'] - df['mid'].shift(1*2)
  df['mid_lag_05s'] = df['mid'] - df['mid'].shift(5*2)
  df['mid_lag_10s'] = df['mid'] - df['mid'].shift(10*2)
  df['mid_lag_30s'] = df['mid'] - df['mid'].shift(30*2)
  df['mid_lag_01m'] = df['mid'] - df['mid'].shift(1*60*2)
  df['mid_lag_05m'] = df['mid'] - df['mid'].shift(5*60*2)
  df['mid_lag_10m'] = df['mid'] - df['mid'].shift(10*60*2)
  # df['mid_lag_01s_lag1'] = df['mid'].shift(1*2) - df['mid'].shift(2*1*2)
  
  df['wmid_ma_05m'] = df['wmid'] - df['wmid'].rolling(5*60*2).mean()
  df['wmid_max_05m'] = df['wmid'] - df['wmid'].rolling(5*60*2).max()
  df['wmid_min_05m'] = df['wmid'] - df['wmid'].rolling(5*60*2).min()
  df['wmid_ma_10m'] = df['wmid'] - df['wmid'].rolling(10*60*2).mean()
  df['wmid_max_10m'] = df['wmid'] - df['wmid'].rolling(10*60*2).max()
  df['wmid_min_10m'] = df['wmid'] - df['wmid'].rolling(10*60*2).min()
  
  df['wmid_bidask_10m'] = df['wmid'] - ((df['bid_bsize'].rolling(10*60*2).mean() * df['askv1'].rolling(10*60*2).mean() + 
                                           df['ask_asize'].rolling(10*60*2).mean() * df['bidv1'].rolling(10*60*2).mean()) /
                                          (df['askv1'].rolling(10*60*2).mean() + df['bidv1'].rolling(10*60*2).mean()))
  df['wmid_bidask_01m'] = df['wmid'] - ((df['bid_bsize'].rolling(1*60*2).mean() * df['askv1'].rolling(1*60*2).mean() + 
                                           df['ask_asize'].rolling(1*60*2).mean() * df['bidv1'].rolling(1*60*2).mean()) /
                                          (df['askv1'].rolling(1*60*2).mean() + df['bidv1'].rolling(1*60*2).mean())) 
  
  df['total_volume_10s'] = df['volume'].rolling(10*2).sum()
  df['signed_volume_10s'] = df['signed_volume'].rolling(10*2).sum()
  df['signed_tick_10s'] = df['tick_up'].rolling(10*2).sum() - df['tick_down'].rolling(10*2).sum()

  df['total_volume_01m'] = df['volume'].rolling(60*2).sum()
  df['signed_volume_01m'] = df['signed_volume'].rolling(60*2).sum()
  df['signed_tick_01m'] = df['tick_up'].rolling(60*2).sum() - df['tick_down'].rolling(60*2).sum()
  
  df['total_volume_10m'] = df['volume'].rolling(10*60*2).sum()
  df['signed_volume_10m'] = df['signed_volume'].rolling(10*60*2).sum()
  df['signed_tick_10m'] = df['tick_up'].rolling(10*60*2).sum() - df['tick_down'].rolling(10*60*2).sum()
  
  
  # clean up helper columns:
  del df['bid_bsize']
  del df['ask_asize']
  del df['signed_volume']
  del df['tick_up']
  del df['tick_down']

  return df

In [0]:
def add_features_other_assets(df, ticker):
  
  # helper start:
  df['mid'] = 0.5*(df['bid1'] + df['ask1'])
  # helper end ---------------------------------------------------------------
  
  df['mid_lag_01s'] = df['mid'] - df['mid'].shift(1*2) 
  df['mid_lag_05s'] = df['mid'] - df['mid'].shift(5*2)  
  df['mid_lag_30s'] = df['mid'] - df['mid'].shift(30*2)  
  df['mid_lag_01m'] = df['mid'] - df['mid'].shift(1*60*2)
  df['mid_lag_05m'] = df['mid'] - df['mid'].shift(5*60*2)
  df['mid_lag_10m'] = df['mid'] - df['mid'].shift(10*60*2)

  df['total_volume_10s'] = df['volume'].rolling(10*2).sum()
  df['total_volume_01m'] = df['volume'].rolling(60*2).sum()
  df['total_volume_10m'] = df['volume'].rolling(10*60*2).sum()
  
  return df

In [0]:
def add_y(df):
  df['mid_1tick'] = df['mid'].shift(-1) - df['mid']  # future - current
  df['mid_01s'] = df['mid'].shift(-1*2) - df['mid']
  df['mid_05s'] = df['mid'].shift(-5*2) - df['mid']
  df['mid_10s'] = df['mid'].shift(-10*2) - df['mid']
  df['mid_30s'] = df['mid'].shift(-30*2) - df['mid']
  df['mid_01m'] = df['mid'].shift(-1*60*2) - df['mid']
  df['mid_05m'] = df['mid'].shift(-5*60*2) - df['mid']
  df['mid_10m'] = df['mid'].shift(-10*60*2) - df['mid']
  df['mid_15m'] = df['mid'].shift(-15*60*2) - df['mid']
  
  df['wmid_1tick'] = df['wmid'].shift(-1) - df['wmid']
  df['wmid_01s'] = df['wmid'].shift(-1*2) - df['wmid']
  df['wmid_05s'] = df['wmid'].shift(-5*2) - df['wmid']
  df['wmid_10s'] = df['wmid'].shift(-10*2) - df['wmid']
  df['wmid_30s'] = df['wmid'].shift(-30*2) - df['wmid']
  df['wmid_01m'] = df['wmid'].shift(-1*60*2) - df['wmid']
  df['wmid_05m'] = df['wmid'].shift(-5*60*2) - df['wmid']
  df['wmid_10m'] = df['wmid'].shift(-10*60*2) - df['wmid']
  df['wmid_15m'] = df['wmid'].shift(-15*60*2) - df['wmid']
  
  return df

In [0]:
def build_one_day_IH(df, other_assets, morning_session_start, morning_session_end,
                     afternoon_session_start, afternoon_session_end):
  df.rename(columns = {' instrument': 'instrument',
                     ' datetime': 'datetime',
                     ' last': 'last',
                     ' opi': 'opi',
                     ' turnover': 'turnover',
                     ' volume': 'volume',
                     ' bid1': 'bid1',
                     ' ask1': 'ask1',
                     ' bidv1': 'bidv1',
                     ' askv1': 'askv1'}, inplace = True)
  
  df = df[['datetime', 'last', 'opi', 'turnover', 'volume', 'bid1', 'ask1', 'bidv1', 'askv1']]
  fill_last_cols = ['last', 'opi', 'bid1', 'ask1', 'bidv1', 'askv1']
  fill_zero_cols = ['turnover', 'volume']

  df['datetime'] = pd.to_datetime(df['datetime'])

  df_am = df[(df['datetime'] >= morning_session_start) & 
             (df['datetime'] <= morning_session_end)]
  df_pm = df[(df['datetime'] >= afternoon_session_start) & 
             (df['datetime'] <= afternoon_session_end)]
  
  df_am = regularize(df_am, fill_last_cols, fill_zero_cols)
  df_pm = regularize(df_pm, fill_last_cols, fill_zero_cols)
  
  df_am = add_features(df_am)
  df_pm = add_features(df_pm)
  df_am = add_y(df_am)
  df_pm = add_y(df_pm)
  
  # expand columns
  for other_asset in other_assets:
    df_am = pd.merge(df_am, other_asset['am'], how='left', left_index=True, right_index=True)
    df_pm = pd.merge(df_pm, other_asset['pm'], how='left', left_index=True, right_index=True)
  
  # merge rows
  df = pd.concat([df_am, df_pm])
  return df
  

In [0]:
def build_one_day_other_asset(df, ticker, morning_session_start, morning_session_end,
                                  afternoon_session_start, afternoon_session_end):
  df.rename(columns = {' instrument': 'instrument',
                     ' datetime': 'datetime',
                     ' last': 'last',
                     ' opi': 'opi',
                     ' turnover': 'turnover',
                     ' volume': 'volume',
                     ' bid1': 'bid1',
                     ' ask1': 'ask1',
                     ' bidv1': 'bidv1',
                     ' askv1': 'askv1'}, inplace = True)
  
  df = df[['datetime', 'last', 'opi', 'turnover', 'volume', 'bid1', 'ask1', 'bidv1', 'askv1']]
  fill_last_cols = ['last', 'opi', 'bid1', 'ask1', 'bidv1', 'askv1']
  fill_zero_cols = ['turnover', 'volume']

  df['datetime'] = pd.to_datetime(df['datetime'])

  df_am = df[(df['datetime'] >= morning_session_start) & 
             (df['datetime'] <= morning_session_end)]
  df_pm = df[(df['datetime'] >= afternoon_session_start) & 
             (df['datetime'] <= afternoon_session_end)]
  
  df_am = regularize(df_am, fill_last_cols, fill_zero_cols)
  df_pm = regularize(df_pm, fill_last_cols, fill_zero_cols)
  
  df_am = add_features_other_assets(df_am, ticker)
  df_pm = add_features_other_assets(df_pm, ticker)
  
  df_am.columns = [ticker + '_' + c for c in df_am.columns]
  df_pm.columns = [ticker + '_' + c for c in df_pm.columns]
  
  return {'am': df_am, 'pm': df_pm}  

In [0]:
start_time = timeit.default_timer()

for trade_date in pd.date_range('20180101', '20181231'):
  contract = 'IH' + front_month_map[trade_date.month] + '_' + trade_date.strftime('%Y%m%d')
  contract_if = 'IF' + front_month_map[trade_date.month] + '_' + trade_date.strftime('%Y%m%d')
  contract_ic = 'IC' + front_month_map[trade_date.month] + '_' + trade_date.strftime('%Y%m%d')
  
  if not os.path.exists(IH_dir + contract + '.csv'):
    # print(IH_dir + contract + '.csv', ' not found')
    continue
  
  print('Processing', trade_date.date(), ' Contract:', contract)
  
  IH = pd.read_csv(IH_dir + contract + '.csv')
  IF = pd.read_csv(IF_dir + contract_if + '.csv')
  IC = pd.read_csv(IC_dir + contract_ic + '.csv')
  
  morning_start = trade_date + Morning_Start
  morning_end = trade_date + Morning_End
  afternoon_start = trade_date + Afternoon_Start
  afternoon_end = trade_date + Afternoon_End
  
   
  IF = build_one_day_other_asset(IF, 'IF', morning_start, morning_end,
                  afternoon_start, afternoon_end)
  IC = build_one_day_other_asset(IC, 'IC', morning_start, morning_end,
                   afternoon_start, afternoon_end)
  
  
  IH = build_one_day_IH(IH, [IF, IC], morning_start, morning_end,
              afternoon_start, afternoon_end)
  
  IH_dropna = IH.dropna()
  
  # IH.to_csv('/content/drive/My Drive/DL_Project/input_df_cross_assets_v2/raw_input_' + 
  #           trade_date.strftime('%Y%m%d') + '.csv.gz', compression='gzip')
  IH_dropna.to_csv('/content/drive/My Drive/DL_Project/input_df_cross_assets_v2/input_' + 
           trade_date.strftime('%Y%m%d') + '.csv.gz', compression='gzip')

print('Time took: ', timeit.default_timer() - start_time)

Processing 2018-01-02  Contract: IH1802_20180102


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Processing 2018-01-03  Contract: IH1802_20180103


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Processing 2018-01-04  Contract: IH1802_20180104
Processing 2018-01-05  Contract: IH1802_20180105
Processing 2018-01-08  Contract: IH1802_20180108
Processing 2018-01-09  Contract: IH1802_20180109
Processing 2018-01-10  Contract: IH1802_20180110
Processing 2018-01-11  Contract: IH1802_20180111
Processing 2018-01-12  Contract: IH1802_20180112
Processing 2018-01-15  Contract: IH1802_20180115
Processing 2018-01-16  Contract: IH1802_20180116
Processing 2018-01-17  Contract: IH1802_20180117
Processing 2018-01-18  Contract: IH1802_20180118
Processing 2018-01-19  Contract: IH1802_20180119
Processing 2018-01-22  Contract: IH1802_20180122
Processing 2018-01-23  Contract: IH1802_20180123
Processing 2018-01-24  Contract: IH1802_20180124
Processing 2018-01-25  Contract: IH1802_20180125
Processing 2018-01-26  Contract: IH1802_20180126
Processing 2018-01-29  Contract: IH1802_20180129
Processing 2018-01-30  Contract: IH1802_20180130
Processing 2018-01-31  Contract: IH1802_20180131
Processing 2018-02-0