In [1]:
from grid_search_tools import GSTools
from ptstrategy_cointegration import CointStrategy
from custom_analyzer import Metrics
from pandas_datafeed import PandasData
from pair_selector import *

import backtrader as bt
import backtrader.feeds as btfeeds
import pandas as pd
import warnings
import glob
import os
import uuid
import itertools
import json

%load_ext autoreload
%autoreload 2

In [2]:
# INPUT PARAMETERS

DIR = "../ib-data/nyse-daily-tech/"

BT_START_DT = '2016-03-18'
TEST_PERIOD = 500

PRE_PAIR_FORMATION = 252 + 52 - 60 - 45
PAIR_FORMATION_LEN = 60

# top PCT percentage of the pairs with lowest distance will be backtested
PCT = 0.01

# STRATEGY PARAMETERS
LOOKBACK_VALUE = [20, 30, 40, 45]
ENTER_THRESHOLD_SIZE = [2, 3]
EXIT_THRESHOLD_SIZE = [0.5]
LOSS_LIMIT = [-0.005, -0.01, -0.015]
MAX_LOOKBACK = max(LOOKBACK_VALUE)
CONSIDER_BORROW_COST = False
CONSIDER_COMMISSION = True

# ADDITIONAL INFO
OTHER_INFO = "Added alpha > 0 conditions"

# Where to save the ouputs
DST_DIR = "../backtest-results/cointegration/progress-report-v2/"

In [3]:
CONFIG = {
    'DIR': DIR,
    'BT_START_DT': BT_START_DT,
    'TEST_PERIOD': TEST_PERIOD,
    'PRE_PAIR_FORMATION': PRE_PAIR_FORMATION,
    'PAIR_FORMATION_LEN': PAIR_FORMATION_LEN,
    'PCT': PCT,
    'LOOKBACK_VALUE': LOOKBACK_VALUE,
    'ENTER_THRESHOLD_SIZE': ENTER_THRESHOLD_SIZE,
    'EXIT_THRESHOLD_SIZE': EXIT_THRESHOLD_SIZE,
    'LOSS_LIMIT': LOSS_LIMIT,
    'MAX_LOOKBACK': MAX_LOOKBACK,
    'CONSIDER_BORROW_COST': CONSIDER_BORROW_COST,
    'CONSIDER_COMMISSION': CONSIDER_COMMISSION,
    'DST_DIR': DST_DIR,
    'OTHER_INFO': OTHER_INFO,
}

# create json string
CONFIG_JSON_STR = json.dumps(CONFIG)

# create directory if neccessary
if not os.path.exists(DST_DIR):
    os.makedirs(DST_DIR)
    
# save json string to a file
with open(DST_DIR + 'config.json', 'w') as outfile:
    json.dump(CONFIG_JSON_STR, outfile)

In [None]:
print("---------------------------------------------------------------------")

###################################################################################################################
# Load data
data = GSTools.load_csv_files(DIR)
dt_idx = GSTools.get_trading_dates(data)

print("Initial number of datafeeds: " + str(len(dt_idx)) + ".")

###################################################################################################################
# get position of intended start date of backtest
bt_start_idx = dt_idx.get_loc(BT_START_DT)
size = PRE_PAIR_FORMATION + PAIR_FORMATION_LEN + MAX_LOOKBACK + (len(dt_idx) - bt_start_idx)

print("To fulfill BT_START_DT, PAIR_FORMATION_LEN and MAX_LOOKBACK, size = " + str(size) + ".")

# get datafeeds which fulfill size requirement
data = GSTools.cut_datafeeds(data, size=size)

print("After cutting datafeeds, " + str(len(data.keys())) + " datafeeds remaining.")

###################################################################################################################
# just to be safe, sync the start end dates of the dataframes
data, start_dt, end_dt = GSTools.sync_start_end(data)
dt_idx = GSTools.get_trading_dates(data)

print("Backtest start date: " + str(dt_idx[PRE_PAIR_FORMATION + PAIR_FORMATION_LEN + MAX_LOOKBACK]))
print("Backtest end date: " + str(dt_idx[PRE_PAIR_FORMATION + PAIR_FORMATION_LEN + MAX_LOOKBACK + TEST_PERIOD - 1]))

###################################################################################################################
# get aggregated close prices
close_df = GSTools.get_aggregated(data, col='close')

if close_df.isnull().values.any():
    warnings.warn("There are null values in the aggregated close price df.")
else:
    print("No null values detected in aggregated close price df.")

###################################################################################################################
# total number of stocks remaining
N = len(data.keys())

# number of pairs of interest
K = int(PCT * N * (N-1) / 2)

###################################################################################################################
# pair selection
good_pairs = coint(df=close_df[PRE_PAIR_FORMATION:PRE_PAIR_FORMATION + PAIR_FORMATION_LEN], intercept=True, sig_level=0.005)
good_pairs.sort(key=lambda x: x[2])
good_pairs = good_pairs[0 : K]

print("From " + str(int(N * (N-1) / 2)) + " pairs, " + str(len(good_pairs)) + " pairs passed the cointegration test.")

print("---------------------------------------------------------------------")

---------------------------------------------------------------------
Initial number of datafeeds: 2517.
To fulfill BT_START_DT, PAIR_FORMATION_LEN and MAX_LOOKBACK, size = 1009.
After cutting datafeeds, 116 datafeeds remaining.
Backtest start date: 2016-03-18 00:00:00
Backtest end date: 2018-03-13 00:00:00
No null values detected in aggregated close price df.
From 6670 pairs, 66 pairs passed the cointegration test.
---------------------------------------------------------------------


In [None]:
# combinations of parameters
param_combinations = list(itertools.product(LOOKBACK_VALUE, ENTER_THRESHOLD_SIZE, EXIT_THRESHOLD_SIZE, LOSS_LIMIT))

# list to store MACRO results
macro_results = []

for i, params in enumerate(param_combinations, 1):
    # set params
    print("Running " + str(i) + "/" + str(len(param_combinations)))
    print("Backtesting all pairs using parameters " + str(params))
    # list to store MICRO results
    results = []

    for pair in good_pairs:
        # get names of both stock
        stk0, stk1, _ = pair

        # get data of both stock
        stk0_df, stk1_df = data[stk0], data[stk1]
        stk0_df_test = stk0_df[PRE_PAIR_FORMATION + PAIR_FORMATION_LEN : PRE_PAIR_FORMATION + PAIR_FORMATION_LEN + MAX_LOOKBACK + TEST_PERIOD]
        stk1_df_test = stk1_df[PRE_PAIR_FORMATION + PAIR_FORMATION_LEN : PRE_PAIR_FORMATION + PAIR_FORMATION_LEN + MAX_LOOKBACK + TEST_PERIOD]
        
        # Create a cerebro
        cerebro = bt.Cerebro()

        # Create data feeds
        data0 = bt.feeds.PandasData(dataname=stk0_df_test, timeframe=(bt.TimeFrame.Days), datetime=0)
        data1 = bt.feeds.PandasData(dataname=stk1_df_test, timeframe=(bt.TimeFrame.Days), datetime=0)

        # add data feeds to cerebro
        cerebro.adddata(data0)
        cerebro.adddata(data1)

        # Add the strategy
        cerebro.addstrategy(CointStrategy, 
                            lookback=params[0],
                            max_lookback=MAX_LOOKBACK,
                            enter_threshold_size=params[1], 
                            exit_threshold_size=params[2], 
                            loss_limit=params[3],
                            consider_borrow_cost=CONSIDER_BORROW_COST,
                            consider_commission=CONSIDER_COMMISSION
                           )

        # Add analyzers
        cerebro.addanalyzer(bt.analyzers.SharpeRatio, _name='mysharpe')
        cerebro.addanalyzer(Metrics, 
                            lookback=MAX_LOOKBACK, 
                            _name='metrics')

        # Add the commission - only stocks like a for each operation
        cerebro.broker.setcash(1000000)

        # And run it
        strat = cerebro.run()

        # get MICRO metrics
        sharperatio = strat[0].analyzers.mysharpe.get_analysis()['sharperatio']
        returnstd = strat[0].analyzers.metrics.returns_std()
        startcash = cerebro.getbroker().startingcash
        endcash = cerebro.getbroker().getvalue()
        profit = (endcash - startcash) / startcash
        
        results.append((stk0 + "-" + stk1, sharperatio, profit, returnstd))
    
    # convert to dataframe
    results_df = pd.DataFrame(results)
    results_df.columns = ['pair', 'sharpe_ratio', 'overall_return', 'returns_std']
    
    # save as csv
    uuid_str = str(uuid.uuid4())
    path = DST_DIR + str(uuid_str) + ".csv" 
    results_df.to_csv(path_or_buf=path, index=False)
    
    # calculate MACRO attributes
    avg_sharpe_ratio = results_df['sharpe_ratio'].mean()
    median_sharpe_ratio = results_df['sharpe_ratio'].median()
    
    avg_overall_return = results_df['overall_return'].mean()
    median_overall_return = results_df['overall_return'].median()
    overall_return_std = results_df['overall_return'].std()

    
    macro_results.append((params[0], 
                          params[1], 
                          params[2], 
                          params[3],
                          avg_sharpe_ratio,
                          median_sharpe_ratio,
                          avg_overall_return,
                          median_overall_return,
                          overall_return_std,
                          uuid_str
                         ))
    
    # nextline
    print("")

macro_results_df = pd.DataFrame(macro_results)
macro_results_df.columns = ['lookback', 
                            'enter_threshold_size', 
                            'exit_threshold_size',
                            'loss_limit', 
                            'avg_sharpe_ratio', 
                            'median_sharpe_ratio',
                            'avg_overall_return', 
                            'median_overall_return',
                            'overall_return_std',
                            'uuid']
macro_results_df.to_csv(DST_DIR + 'summary.csv', index=False)

Running 1/24
Backtesting all pairs using parameters (20, 2, 0.5, -0.005)
------------------------------------------------------------------
Running 2/24
Backtesting all pairs using parameters (20, 2, 0.5, -0.01)
------------------------------------------------------------------
Running 3/24
Backtesting all pairs using parameters (20, 2, 0.5, -0.015)
------------------------------------------------------------------
Running 4/24
Backtesting all pairs using parameters (20, 3, 0.5, -0.005)
------------------------------------------------------------------
Running 5/24
Backtesting all pairs using parameters (20, 3, 0.5, -0.01)
------------------------------------------------------------------
Running 6/24
Backtesting all pairs using parameters (20, 3, 0.5, -0.015)
------------------------------------------------------------------
Running 7/24
Backtesting all pairs using parameters (30, 2, 0.5, -0.005)
----------------------------

In [8]:
macro_results_df

Unnamed: 0,lookback,enter_threshold_size,exit_threshold_size,loss_limit,avg_sharpe_ratio,median_sharpe_ratio,avg_overall_return,median_overall_return,overall_return_std,uuid
0,30,2,0.5,-0.005,-0.641993,-0.775713,-0.019509,-0.029083,0.169579,dff5549a-12db-4ff3-afee-b11fdb033f8f
1,30,2,0.5,-0.01,-1.94594,-0.709625,-0.003963,-0.018254,0.202156,b5b752b8-00e3-4a69-b4f0-c39b230e228b
2,30,3,0.5,-0.005,-6.738797,-0.953346,-0.021354,-0.009595,0.096708,e18d4086-51d4-4f39-9fb0-9f1d57139e6a
3,30,3,0.5,-0.01,-1.614199,-1.000258,-0.020267,-0.017413,0.091602,cdf6cfdd-5ecd-4b03-ae46-45ad25dbe7e1
4,35,2,0.5,-0.005,-0.708938,-0.766938,-0.032067,-0.035309,0.182253,36634b1e-6260-44b1-bfe9-f9fa576ac862
5,35,2,0.5,-0.01,-1.082209,-0.753386,-0.035829,-0.044499,0.183808,b7e896b9-2c7c-45a2-b6a1-b000f3a6f0fd
6,35,3,0.5,-0.005,-5.380641,-0.872416,-0.014366,-0.003554,0.103843,d63a39f9-4449-46c7-8b04-cab05f8b8395
7,35,3,0.5,-0.01,-0.937925,-0.872416,-0.014284,-0.007504,0.107908,c3d84ec6-8ec4-4d6a-8741-8f9bf25af1a9
8,40,2,0.5,-0.005,-0.754175,-0.778012,-0.034271,-0.034185,0.172335,14dc6cc5-2b15-46bc-8d02-fb5c342bf964
9,40,2,0.5,-0.01,-0.525066,-0.768514,-0.040458,-0.0356,0.176153,7feda142-081b-4ff9-9ed7-b05d12a98213


In [8]:
macro_results_df[macro_results_df['median_overall_return'] == max(macro_results_df['median_overall_return'])]

Unnamed: 0,lookback,enter_threshold_size,exit_threshold_size,loss_limit,avg_sharpe_ratio,median_sharpe_ratio,avg_overall_return,median_overall_return,overall_return_std,uuid
0,30,2,0.5,-0.005,-0.32035,-0.469113,0.003924,0.0,0.141542,fb0ecd25-8379-4112-8893-d55791d0c275
2,30,3,0.5,-0.005,-6.054907,-1.267491,0.000297,0.0,0.055167,cdc67e7a-089c-4785-bec3-5ca907c47991
3,30,3,0.5,-0.01,-1.827137,-1.278244,0.001084,0.0,0.051753,a058a5b2-7dec-4d22-9d20-83ad31a23925
6,35,3,0.5,-0.005,-6.164747,-1.235997,0.001358,0.0,0.065158,f30ac3c7-914c-4e35-81df-9bb8521afcab
7,35,3,0.5,-0.01,-0.995015,-1.232798,0.000627,0.0,0.068717,1c6276bc-b11c-44ff-943b-3e62977ac8cb
14,45,3,0.5,-0.005,-2.317116,-1.236795,-0.002201,0.0,0.060148,37a79a7b-fc9b-45f3-b577-2054a77ff4d9
15,45,3,0.5,-0.01,-0.998292,-1.171443,-0.000449,0.0,0.06909,388a6684-8f5d-413b-8131-538db95aa73f


In [9]:
macro_results_df

Unnamed: 0,lookback,enter_threshold_size,exit_threshold_size,loss_limit,avg_sharpe_ratio,median_sharpe_ratio,avg_overall_return,median_overall_return,overall_return_std,uuid
0,30,2,0.5,-0.005,-0.156478,-1.095593,-0.023391,-0.020477,0.107185,fdf954e1-7ad5-47d1-b60e-ec6d50d8af2b
1,30,2,0.5,-0.01,-0.252219,-0.436165,0.00253,-0.009574,0.126433,6319d346-9d7e-4708-b9d5-80a906d2801f
2,30,3,0.5,-0.005,-5.818735,-1.410168,0.004309,0.0,0.050791,b7a871eb-8264-44f9-b0b6-1386430fd703
3,30,3,0.5,-0.01,-2.904534,-1.192084,0.006192,0.00326,0.052236,d16f4d80-80f0-4fa4-b103-877eec706e18
4,35,2,0.5,-0.005,-0.813441,-1.110579,-0.016751,-0.011906,0.112783,ea227ed2-3ca4-4c3e-bb97-464f0ab55b04
5,35,2,0.5,-0.01,-4.401413,-1.037338,-0.005072,0.001467,0.135263,abafc2c2-3411-43cc-a99f-ac5e9239cc0d
6,35,3,0.5,-0.005,-10.811344,-1.354049,0.004693,0.0,0.047964,1e7f1508-10f0-42b1-8d72-9e6e18b3401b
7,35,3,0.5,-0.01,-1.594834,-1.163554,0.007434,0.000959,0.059709,a4fa6e88-542f-47b2-a5a3-dd2e53ebecbc
8,40,2,0.5,-0.005,-0.787056,-1.119854,-0.030183,-0.051454,0.118467,24a94582-2c08-41e9-9ea8-5e3b6e8d2ef7
9,40,2,0.5,-0.01,-4.500145,-1.114121,-0.028709,-0.040864,0.118325,a1329734-8b4e-43cf-9afe-8e0ecc24c1e6
