In [1]:
from grid_search_tools import GSTools
from ptstrategy_cointegration import CointStrategy
from custom_analyzer import Metrics
from pandas_datafeed import PandasData
from pair_selector import *

import backtrader as bt
import backtrader.feeds as btfeeds
import pandas as pd
import warnings
import glob
import os
import uuid
import itertools
import json

%load_ext autoreload
%autoreload 2

In [2]:
# INPUT PARAMETERS

DIR = "../ib-data/nyse-daily-tech/"

BT_START_DT = '2016-01-04'
TEST_PERIOD = 252 + 3

PAIR_FORMATION_LEN = 60

# top PCT percentage of the pairs with lowest distance will be backtested
PCT = 0.01

# STRATEGY PARAMETERS
LOOKBACK_VALUE = [30, 35, 40, 45, 50]
ENTER_THRESHOLD_SIZE = [2, 3]
EXIT_THRESHOLD_SIZE = [0.5]
LOSS_LIMIT = [-0.005, -0.01]
MAX_LOOKBACK = max(LOOKBACK_VALUE)

# Where to save the ouputs
DST_DIR = "../backtest-results/cointegration/test1/"

In [3]:
CONFIG = {
    'DIR': DIR,
    'BT_START_DT': BT_START_DT,
    'TEST_PERIOD': TEST_PERIOD,
    'PAIR_FORMATION_LEN': PAIR_FORMATION_LEN,
    'PCT': PCT,
    'LOOKBACK_VALUE': LOOKBACK_VALUE,
    'ENTER_THRESHOLD_SIZE': ENTER_THRESHOLD_SIZE,
    'EXIT_THRESHOLD_SIZE': EXIT_THRESHOLD_SIZE,
    'LOSS_LIMIT': LOSS_LIMIT,
    'MAX_LOOKBACK': MAX_LOOKBACK,
    'DST_DIR': DST_DIR,
}

# create json string
CONFIG_JSON_STR = json.dumps(CONFIG)

# create directory if neccessary
if not os.path.exists(DST_DIR):
    os.makedirs(DST_DIR)
    
# save json string to a file
with open(DST_DIR + 'config.json', 'w') as outfile:
    json.dump(CONFIG_JSON_STR, outfile)

In [4]:
print("---------------------------------------------------------------------")

###################################################################################################################
# Load data
data = GSTools.load_csv_files(DIR)
dt_idx = GSTools.get_trading_dates(data)

print("Initial number of datafeeds: " + str(len(dt_idx)) + ".")

###################################################################################################################
# get position of intended start date of backtest
bt_start_idx = dt_idx.get_loc(BT_START_DT)
size = PAIR_FORMATION_LEN + MAX_LOOKBACK + (len(dt_idx) - bt_start_idx)

print("To fulfill BT_START_DT, PAIR_FORMATION_LEN and MAX_LOOKBACK, size = " + str(size) + ".")

# get datafeeds which fulfill size requirement
data = GSTools.cut_datafeeds(data, size=size)

print("After cutting datafeeds, " + str(len(data.keys())) + " datafeeds remaining.")

###################################################################################################################
# just to be safe, sync the start end dates of the dataframes
data, start_dt, end_dt = GSTools.sync_start_end(data)
dt_idx = GSTools.get_trading_dates(data)

print("Backtest start date: " + str(dt_idx[PAIR_FORMATION_LEN + MAX_LOOKBACK]))
print("Backtest end date: " + str(dt_idx[PAIR_FORMATION_LEN + MAX_LOOKBACK + TEST_PERIOD - 1]))

###################################################################################################################
# get aggregated close prices
close_df = GSTools.get_aggregated(data, col='close')

if close_df.isnull().values.any():
    warnings.warn("There are null values in the aggregated close price df.")
else:
    print("No null values detected in aggregated close price df.")

###################################################################################################################
# total number of stocks remaining
N = len(data.keys())

# number of pairs of interest
K = int(PCT * N * (N-1) / 2)

###################################################################################################################
# pair selection
good_pairs = coint(df=close_df[0:PAIR_FORMATION_LEN], intercept=True, sig_level=0.005)
good_pairs.sort(key=lambda x: x[2])
good_pairs = good_pairs[0 : K]

print("From " + str(int(N * (N-1) / 2)) + " pairs, " + str(len(good_pairs)) + " pairs passed the cointegration test.")

print("---------------------------------------------------------------------")

---------------------------------------------------------------------
Initial number of datafeeds: 2517.
To fulfill BT_START_DT, PAIR_FORMATION_LEN and MAX_LOOKBACK, size = 867.
After cutting datafeeds, 122 datafeeds remaining.
Backtest start date: 2016-01-04 00:00:00
Backtest end date: 2017-01-05 00:00:00
No null values detected in aggregated close price df.
From 7381 pairs, 73 pairs passed the cointegration test.
---------------------------------------------------------------------


In [None]:
# combinations of parameters
param_combinations = list(itertools.product(LOOKBACK_VALUE, ENTER_THRESHOLD_SIZE, EXIT_THRESHOLD_SIZE, LOSS_LIMIT))

# list to store MACRO results
macro_results = []

for i, params in enumerate(param_combinations, 1):
    # set params
    print("Running " + str(i) + "/" + str(len(param_combinations)))
    print("Backtesting all pairs using parameters " + str(params))
    # list to store MICRO results
    results = []

    for pair in good_pairs:
        # get names of both stock
        stk0, stk1, _ = pair

        # get data of both stock
        stk0_df, stk1_df = data[stk0], data[stk1]
        stk0_df_test = stk0_df[PAIR_FORMATION_LEN + MAX_LOOKBACK : PAIR_FORMATION_LEN + MAX_LOOKBACK + TEST_PERIOD]
        stk1_df_test = stk1_df[PAIR_FORMATION_LEN + MAX_LOOKBACK : PAIR_FORMATION_LEN + MAX_LOOKBACK + TEST_PERIOD]
        
        # Create a cerebro
        cerebro = bt.Cerebro()

        # Create data feeds
        data0 = bt.feeds.PandasData(dataname=stk0_df_test, timeframe=(bt.TimeFrame.Days), datetime=0)
        data1 = bt.feeds.PandasData(dataname=stk1_df_test, timeframe=(bt.TimeFrame.Days), datetime=0)

        # add data feeds to cerebro
        cerebro.adddata(data0)
        cerebro.adddata(data1)

        # Add the strategy
        cerebro.addstrategy(CointStrategy, 
                            lookback=params[0],
                            max_lookback=MAX_LOOKBACK,
                            enter_threshold_size=params[1], 
                            exit_threshold_size=params[2], 
                            loss_limit=params[3],
                            consider_borrow_cost=False
                           )

        # Add analyzers
        cerebro.addanalyzer(bt.analyzers.SharpeRatio, _name='mysharpe')
        cerebro.addanalyzer(Metrics, 
                            lookback=MAX_LOOKBACK, 
                            _name='metrics')

        # Add the commission - only stocks like a for each operation
        cerebro.broker.setcash(1000000)

        # And run it
        strat = cerebro.run()

        # get MICRO metrics
        sharperatio = strat[0].analyzers.mysharpe.get_analysis()['sharperatio']
        returnstd = strat[0].analyzers.metrics.returns_std()
        startcash = cerebro.getbroker().startingcash
        endcash = cerebro.getbroker().getvalue()
        profit = (endcash - startcash) / startcash
        
        results.append((stk0 + "-" + stk1, sharperatio, profit, returnstd))
    
    # convert to dataframe
    results_df = pd.DataFrame(results)
    results_df.columns = ['pair', 'sharpe_ratio', 'overall_return', 'returns_std']
    
    # save as csv
    uuid_str = str(uuid.uuid4())
    path = DST_DIR + str(uuid_str) + ".csv" 
    results_df.to_csv(path_or_buf=path, index=False)
    
    # calculate MACRO attributes
    avg_sharpe_ratio = results_df['sharpe_ratio'].mean()
    median_sharpe_ratio = results_df['sharpe_ratio'].median()
    
    avg_overall_return = results_df['overall_return'].mean()
    median_overall_return = results_df['overall_return'].median()
    overall_return_std = results_df['overall_return'].std()

    
    macro_results.append((params[0], 
                          params[1], 
                          params[2], 
                          params[3],
                          avg_sharpe_ratio,
                          median_sharpe_ratio,
                          avg_overall_return,
                          median_overall_return,
                          overall_return_std,
                          uuid_str
                         ))
    
    # nextline
    print("")

macro_results_df = pd.DataFrame(macro_results)
macro_results_df.columns = ['lookback', 
                            'enter_threshold_size', 
                            'exit_threshold_size',
                            'loss_limit', 
                            'avg_sharpe_ratio', 
                            'median_sharpe_ratio',
                            'avg_overall_return', 
                            'median_overall_return',
                            'overall_return_std',
                            'uuid']
macro_results_df.to_csv(DST_DIR + 'summary.csv', index=False)

Running 1/20
Backtesting all pairs using parameters (30, 2, 0.5, -0.005)
-------------------------------------------------------------------------
Running 2/20
Backtesting all pairs using parameters (30, 2, 0.5, -0.01)
-------------------------------------------------------------------------
Running 3/20
Backtesting all pairs using parameters (30, 3, 0.5, -0.005)
-------------------------------------------------------------------------
Running 4/20
Backtesting all pairs using parameters (30, 3, 0.5, -0.01)
-------------------------------------------------------------------------
Running 5/20
Backtesting all pairs using parameters (35, 2, 0.5, -0.005)
-------------------------------------------------------------------------
Running 6/20
Backtesting all pairs using parameters (35, 2, 0.5, -0.01)
-------------------------------------------------------------------------
Running 7/20
Backtesting all pairs using parameters (35, 3, 0.5, -0.005)
------------------------------------------------

In [6]:
macro_results_df

Unnamed: 0,lookback,enter_threshold_size,exit_threshold_size,loss_limit,avg_sharpe_ratio,median_sharpe_ratio,avg_overall_return,median_overall_return,overall_return_std,uuid
0,30,2,0.5,-0.005,-0.671276,-1.047042,-0.012742,-0.01497,0.114639,4abfb0f9-afe1-47c4-839c-1fb9b0c6f0c1
1,30,2,0.5,-0.01,0.103887,-0.434407,-0.004089,-0.005355,0.124199,d6915e8e-b2e0-44b6-97e8-6441189d8e4a
2,30,3,0.5,-0.005,-6.24448,-1.410168,0.004151,0.0,0.050575,97e34a07-3fc2-498e-b5d5-f7434816ae60
3,30,3,0.5,-0.01,-3.026915,-1.031818,0.008372,0.003983,0.05343,3df8a358-c9a3-45e2-b4fd-823840c85cb5
4,35,2,0.5,-0.005,-0.602833,-1.102146,-0.00904,-0.011703,0.117999,0a0d838e-b949-4afa-b4c0-45166b97618a
5,35,2,0.5,-0.01,-4.57278,-1.050581,-0.013024,0.004262,0.127368,d077e551-512c-40e6-914d-0df6e2e5a7a2
6,35,3,0.5,-0.005,-7.505106,-1.348041,0.006111,0.0,0.057321,3434d235-1fbc-48b7-86bd-1dae6f31fe65
7,35,3,0.5,-0.01,-2.395021,-1.034954,0.011132,0.001946,0.061141,0c7741de-a07d-4aa7-94a3-fcf927f501d9
8,40,2,0.5,-0.005,-1.004916,-1.113311,-0.02721,-0.049995,0.114667,04d6adbe-6662-4963-9e98-c74872d9abf2
9,40,2,0.5,-0.01,-1.327229,-1.083111,-0.029143,-0.033886,0.123004,5acedfce-800c-4834-bfc8-bbf888add66d


In [7]:
macro_results_df[macro_results_df['median_overall_return'] == max(macro_results_df['median_overall_return'])]

Unnamed: 0,lookback,enter_threshold_size,exit_threshold_size,loss_limit,avg_sharpe_ratio,median_sharpe_ratio,avg_overall_return,median_overall_return,overall_return_std,uuid
5,35,2,0.5,-0.01,-4.57278,-1.050581,-0.013024,0.004262,0.127368,d077e551-512c-40e6-914d-0df6e2e5a7a2


In [9]:
macro_results_df

Unnamed: 0,lookback,enter_threshold_size,exit_threshold_size,loss_limit,avg_sharpe_ratio,median_sharpe_ratio,avg_overall_return,median_overall_return,overall_return_std,uuid
0,30,2,0.5,-0.005,-0.156478,-1.095593,-0.023391,-0.020477,0.107185,fdf954e1-7ad5-47d1-b60e-ec6d50d8af2b
1,30,2,0.5,-0.01,-0.252219,-0.436165,0.00253,-0.009574,0.126433,6319d346-9d7e-4708-b9d5-80a906d2801f
2,30,3,0.5,-0.005,-5.818735,-1.410168,0.004309,0.0,0.050791,b7a871eb-8264-44f9-b0b6-1386430fd703
3,30,3,0.5,-0.01,-2.904534,-1.192084,0.006192,0.00326,0.052236,d16f4d80-80f0-4fa4-b103-877eec706e18
4,35,2,0.5,-0.005,-0.813441,-1.110579,-0.016751,-0.011906,0.112783,ea227ed2-3ca4-4c3e-bb97-464f0ab55b04
5,35,2,0.5,-0.01,-4.401413,-1.037338,-0.005072,0.001467,0.135263,abafc2c2-3411-43cc-a99f-ac5e9239cc0d
6,35,3,0.5,-0.005,-10.811344,-1.354049,0.004693,0.0,0.047964,1e7f1508-10f0-42b1-8d72-9e6e18b3401b
7,35,3,0.5,-0.01,-1.594834,-1.163554,0.007434,0.000959,0.059709,a4fa6e88-542f-47b2-a5a3-dd2e53ebecbc
8,40,2,0.5,-0.005,-0.787056,-1.119854,-0.030183,-0.051454,0.118467,24a94582-2c08-41e9-9ea8-5e3b6e8d2ef7
9,40,2,0.5,-0.01,-4.500145,-1.114121,-0.028709,-0.040864,0.118325,a1329734-8b4e-43cf-9afe-8e0ecc24c1e6
