In [18]:
from grid_search_tools import GSTools
from ptstrategy_cointegration import CointStrategy
from custom_analyzer import Metrics
from pandas_datafeed import PandasData
from pair_selector import *

import backtrader as bt
import backtrader.feeds as btfeeds
import pandas as pd
import warnings
import glob
import os
import uuid
import itertools
import json
import datetime

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [31]:
# INPUT PARAMETERS

DIR = "../ib-data/nyse-daily-tech/"

BT_START_DT = '2018-03-19'
TEST_PERIOD = 200

PRE_PAIR_FORMATION = 252 + 252 + 252 + 52 - 60 - 52
PAIR_FORMATION_LEN = 60

# top PCT percentage of the pairs with lowest distance will be backtested
PCT = 0.9

# STRATEGY PARAMETERS
LOOKBACK_VALUE = [20, 40, 52]
ENTER_THRESHOLD_SIZE = [2, 3]
EXIT_THRESHOLD_SIZE = [0.5]
LOSS_LIMIT = [-0.005]
MAX_LOOKBACK = max(LOOKBACK_VALUE)
CONSIDER_BORROW_COST = False
CONSIDER_COMMISSION = True

# ADDITIONAL INFO
OTHER_INFO = ""

# Where to save the ouputs
DST_DIR = "../backtest-results/cointegration/experiment3-y3/"

In [32]:
CONFIG = {
    'DIR': DIR,
    'BT_START_DT': BT_START_DT,
    'TEST_PERIOD': TEST_PERIOD,
    'PRE_PAIR_FORMATION': PRE_PAIR_FORMATION,
    'PAIR_FORMATION_LEN': PAIR_FORMATION_LEN,
    'PCT': PCT,
    'LOOKBACK_VALUE': LOOKBACK_VALUE,
    'ENTER_THRESHOLD_SIZE': ENTER_THRESHOLD_SIZE,
    'EXIT_THRESHOLD_SIZE': EXIT_THRESHOLD_SIZE,
    'LOSS_LIMIT': LOSS_LIMIT,
    'MAX_LOOKBACK': MAX_LOOKBACK,
    'CONSIDER_BORROW_COST': CONSIDER_BORROW_COST,
    'CONSIDER_COMMISSION': CONSIDER_COMMISSION,
    'DST_DIR': DST_DIR,
    'OTHER_INFO': OTHER_INFO,
}

# create json string
CONFIG_JSON_STR = json.dumps(CONFIG)

# create directory if neccessary
if not os.path.exists(DST_DIR):
    os.makedirs(DST_DIR)
    
# save json string to a file
with open(DST_DIR + 'config.json', 'w') as outfile:
    json.dump(CONFIG_JSON_STR, outfile)

In [26]:
print("---------------------------------------------------------------------")

###################################################################################################################
# Load data
data = GSTools.load_csv_files(DIR)
dt_idx = GSTools.get_trading_dates(data)

print("Initial number of datafeeds: " + str(len(dt_idx)) + ".")

###################################################################################################################
# get position of intended start date of backtest
bt_start_idx = dt_idx.get_loc(BT_START_DT)
size = PRE_PAIR_FORMATION + PAIR_FORMATION_LEN + MAX_LOOKBACK + (len(dt_idx) - bt_start_idx)

print("To fulfill BT_START_DT, PAIR_FORMATION_LEN and MAX_LOOKBACK, size = " + str(size) + ".")

# get datafeeds which fulfill size requirement
data = GSTools.cut_datafeeds(data, size=size)

print("After cutting datafeeds, " + str(len(data.keys())) + " datafeeds remaining.")

###################################################################################################################
# just to be safe, sync the start end dates of the dataframes
data, start_dt, end_dt = GSTools.sync_start_end(data)
dt_idx = GSTools.get_trading_dates(data)

print("Backtest start date: " + str(dt_idx[PRE_PAIR_FORMATION + PAIR_FORMATION_LEN + MAX_LOOKBACK]))
print("Backtest end date: " + str(dt_idx[PRE_PAIR_FORMATION + PAIR_FORMATION_LEN + MAX_LOOKBACK + TEST_PERIOD - 1]))

###################################################################################################################
# get aggregated close prices
close_df = GSTools.get_aggregated(data, col='close')

if close_df.isnull().values.any():
    warnings.warn("There are null values in the aggregated close price df.")
else:
    print("No null values detected in aggregated close price df.")

###################################################################################################################
# total number of stocks remaining
N = len(data.keys())

# number of pairs of interest
K = int(PCT * N * (N-1) / 2)

###################################################################################################################
# pair selection
good_pairs = coint(df=close_df[PRE_PAIR_FORMATION:PRE_PAIR_FORMATION + PAIR_FORMATION_LEN], intercept=True, sig_level=0.005)
good_pairs.sort(key=lambda x: x[2])
good_pairs = good_pairs[0 : K]

print("From " + str(int(N * (N-1) / 2)) + " pairs, " + str(len(good_pairs)) + " pairs passed the cointegration test.")

print("---------------------------------------------------------------------")

---------------------------------------------------------------------
Initial number of datafeeds: 2517.
To fulfill BT_START_DT, PAIR_FORMATION_LEN and MAX_LOOKBACK, size = 1010.
After cutting datafeeds, 116 datafeeds remaining.
Backtest start date: 2018-03-19 00:00:00
Backtest end date: 2019-01-02 00:00:00
No null values detected in aggregated close price df.
From 6670 pairs, 231 pairs passed the cointegration test.
---------------------------------------------------------------------


In [33]:
# combinations of parameters
param_combinations = list(itertools.product(LOOKBACK_VALUE, ENTER_THRESHOLD_SIZE, EXIT_THRESHOLD_SIZE, LOSS_LIMIT))

# list to store MACRO results
macro_results = []

for i, params in enumerate(param_combinations, 1):
    # set params
    print("Running " + str(i) + "/" + str(len(param_combinations)))
    print (str(datetime.datetime.now()))
    print("Backtesting all pairs using parameters " + str(params))
    # list to store MICRO results
    results = []

    for pair in good_pairs:
        # get names of both stock
        stk0, stk1, _ = pair

        # get data of both stock
        stk0_df, stk1_df = data[stk0], data[stk1]
        stk0_df_test = stk0_df[PRE_PAIR_FORMATION + PAIR_FORMATION_LEN : PRE_PAIR_FORMATION + PAIR_FORMATION_LEN + MAX_LOOKBACK + TEST_PERIOD]
        stk1_df_test = stk1_df[PRE_PAIR_FORMATION + PAIR_FORMATION_LEN : PRE_PAIR_FORMATION + PAIR_FORMATION_LEN + MAX_LOOKBACK + TEST_PERIOD]
        
        # Create a cerebro
        cerebro = bt.Cerebro()

        # Create data feeds
        data0 = bt.feeds.PandasData(dataname=stk0_df_test, timeframe=(bt.TimeFrame.Days), datetime=0)
        data1 = bt.feeds.PandasData(dataname=stk1_df_test, timeframe=(bt.TimeFrame.Days), datetime=0)

        # add data feeds to cerebro
        cerebro.adddata(data0)
        cerebro.adddata(data1)

        # Add the strategy
        cerebro.addstrategy(CointStrategy, 
                            lookback=params[0],
                            max_lookback=MAX_LOOKBACK,
                            enter_threshold_size=params[1], 
                            exit_threshold_size=params[2], 
                            loss_limit=params[3],
                            consider_borrow_cost=CONSIDER_BORROW_COST,
                            consider_commission=CONSIDER_COMMISSION
                           )

        # Add analyzers
        cerebro.addanalyzer(bt.analyzers.SharpeRatio, _name='mysharpe')
        cerebro.addanalyzer(Metrics, 
                            lookback=MAX_LOOKBACK, 
                            _name='metrics')

        # Add the commission - only stocks like a for each operation
        cerebro.broker.setcash(1000000)

        # And run it
        strat = cerebro.run()

        # get MICRO metrics
        sharperatio = strat[0].analyzers.mysharpe.get_analysis()['sharperatio']
        returnstd = strat[0].analyzers.metrics.returns_std()
        startcash = cerebro.getbroker().startingcash
        endcash = cerebro.getbroker().getvalue()
        profit = (endcash - startcash) / startcash
        
        results.append((stk0 + "-" + stk1, sharperatio, profit, returnstd))
    
    # convert to dataframe
    results_df = pd.DataFrame(results)
    results_df.columns = ['pair', 'sharpe_ratio', 'overall_return', 'returns_std']
    
    # save as csv
    uuid_str = str(uuid.uuid4())
    path = DST_DIR + str(uuid_str) + ".csv" 
    results_df.to_csv(path_or_buf=path, index=False)
    
    # calculate MACRO attributes
    avg_sharpe_ratio = results_df['sharpe_ratio'].mean()
    median_sharpe_ratio = results_df['sharpe_ratio'].median()
    
    avg_overall_return = results_df['overall_return'].mean()
    median_overall_return = results_df['overall_return'].median()
    overall_return_std = results_df['overall_return'].std()

    
    macro_results.append((params[0], 
                          params[1], 
                          params[2], 
                          params[3],
                          avg_sharpe_ratio,
                          median_sharpe_ratio,
                          avg_overall_return,
                          median_overall_return,
                          overall_return_std,
                          uuid_str
                         ))
    
    # nextline
    print("")

macro_results_df = pd.DataFrame(macro_results)
macro_results_df.columns = ['lookback', 
                            'enter_threshold_size', 
                            'exit_threshold_size',
                            'loss_limit', 
                            'avg_sharpe_ratio', 
                            'median_sharpe_ratio',
                            'avg_overall_return', 
                            'median_overall_return',
                            'overall_return_std',
                            'uuid']
macro_results_df.to_csv(DST_DIR + 'summary.csv', index=False)

Running 1/6
2019-02-13 06:28:33.671447
Backtesting all pairs using parameters (20, 2, 0.5, -0.005)
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Running 2/6
2019-02-13 06:31:48.847019
Backtesting all pairs using parameters (20, 3, 0.5, -0.005)
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Running 3/6
2019-02-13 06:35:00.392557
Backtesting all pairs using parameters (40, 2, 0.5, -0.005)
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Running

In [34]:
macro_results_df = pd.read_csv(DST_DIR + "summary.csv")
macro_results_df

Unnamed: 0,lookback,enter_threshold_size,exit_threshold_size,loss_limit,avg_sharpe_ratio,median_sharpe_ratio,avg_overall_return,median_overall_return,overall_return_std,uuid
0,20,2,0.5,-0.005,-0.52856,0.446749,0.045094,0.038953,0.134364,06db143f-d8f9-4ba5-b29b-00652280abc7
1,20,3,0.5,-0.005,-45.40575,0.521311,0.002021,0.0,0.014235,230de111-cf44-40c6-987b-d134a7b1c4b2
2,40,2,0.5,-0.005,-0.653909,-0.654533,0.029715,0.009244,0.167664,651812e0-66d5-413b-bb2d-2597d90c03f7
3,40,3,0.5,-0.005,-1.71067,-1.453839,0.002538,0.0,0.044709,a10626b8-5e1b-447f-b867-7b477e93276d
4,52,2,0.5,-0.005,-2.647379,-0.498075,0.026258,0.010508,0.147629,2da85d74-75b7-4501-856d-c309e0beec02
5,52,3,0.5,-0.005,-3.065099,-1.426912,0.008305,0.0,0.071121,b5c080f0-7ae6-46df-9299-835f6eb2d370


In [13]:
macro_results_df[macro_results_df['median_overall_return'] == max(macro_results_df['median_overall_return'])]

Unnamed: 0,lookback,enter_threshold_size,exit_threshold_size,loss_limit,avg_sharpe_ratio,median_sharpe_ratio,avg_overall_return,median_overall_return,overall_return_std,uuid
0,20,2,0.5,-0.005,,,0.040286,0.034262,0.11514,fdf5da80-f046-429c-b9e1-2b29bed0cab6


In [14]:
macro_results_df

Unnamed: 0,lookback,enter_threshold_size,exit_threshold_size,loss_limit,avg_sharpe_ratio,median_sharpe_ratio,avg_overall_return,median_overall_return,overall_return_std,uuid
0,20,2,0.5,-0.005,,,0.040286,0.034262,0.11514,fdf5da80-f046-429c-b9e1-2b29bed0cab6
1,20,3,0.5,-0.005,,,0.004781,0.0,0.022553,02c34cb3-deeb-4286-84bb-d06a3e7e4193
2,40,2,0.5,-0.005,,,0.025462,0.026327,0.126825,999ae1ba-e215-4e53-ac54-06488afcd170
3,40,3,0.5,-0.005,,,0.010998,0.0,0.045017,851dfd54-1ab2-4296-b260-9afe35b60192
4,52,2,0.5,-0.005,,,0.012924,0.006478,0.119521,db84021c-da75-411d-a775-205861438ec3
5,52,3,0.5,-0.005,,,0.005982,0.0,0.045221,c92e1c60-d87e-4319-8828-184cd1ab3a68
