In [10]:
from grid_search_tools import GSTools
from ptstrategy_cointegration import CointStrategy
from custom_analyzer import Metrics
from pandas_datafeed import PandasData
from pair_selector import *

import backtrader as bt
import backtrader.feeds as btfeeds
import pandas as pd
import warnings
import glob
import os
import uuid
import itertools
import json

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [19]:
# INPUT PARAMETERS

DIR = "../ib-data/nyse-daily-tech/"

BT_START_DT = '2017-03-20'
TEST_PERIOD = 200

PRE_PAIR_FORMATION = 252 + 252 + 52 - 60 - 40
PAIR_FORMATION_LEN = 60

# top PCT percentage of the pairs with lowest distance will be backtested
PCT = 0.01

# STRATEGY PARAMETERS
LOOKBACK_VALUE = [20, 25, 30, 40]
ENTER_THRESHOLD_SIZE = [2, 3]
EXIT_THRESHOLD_SIZE = [0.5]
LOSS_LIMIT = [-0.005, -0.01, -0.015]
MAX_LOOKBACK = max(LOOKBACK_VALUE)
CONSIDER_BORROW_COST = False
CONSIDER_COMMISSION = True

# ADDITIONAL INFO
OTHER_INFO = ""

# Where to save the ouputs
DST_DIR = "../backtest-results/cointegration/fyp-progress-y2/"

In [20]:
CONFIG = {
    'DIR': DIR,
    'BT_START_DT': BT_START_DT,
    'TEST_PERIOD': TEST_PERIOD,
    'PRE_PAIR_FORMATION': PRE_PAIR_FORMATION,
    'PAIR_FORMATION_LEN': PAIR_FORMATION_LEN,
    'PCT': PCT,
    'LOOKBACK_VALUE': LOOKBACK_VALUE,
    'ENTER_THRESHOLD_SIZE': ENTER_THRESHOLD_SIZE,
    'EXIT_THRESHOLD_SIZE': EXIT_THRESHOLD_SIZE,
    'LOSS_LIMIT': LOSS_LIMIT,
    'MAX_LOOKBACK': MAX_LOOKBACK,
    'CONSIDER_BORROW_COST': CONSIDER_BORROW_COST,
    'CONSIDER_COMMISSION': CONSIDER_COMMISSION,
    'DST_DIR': DST_DIR,
    'OTHER_INFO': OTHER_INFO,
}

# create json string
CONFIG_JSON_STR = json.dumps(CONFIG)

# create directory if neccessary
if not os.path.exists(DST_DIR):
    os.makedirs(DST_DIR)
    
# save json string to a file
with open(DST_DIR + 'config.json', 'w') as outfile:
    json.dump(CONFIG_JSON_STR, outfile)

In [21]:
print("---------------------------------------------------------------------")

###################################################################################################################
# Load data
data = GSTools.load_csv_files(DIR)
dt_idx = GSTools.get_trading_dates(data)

print("Initial number of datafeeds: " + str(len(dt_idx)) + ".")

###################################################################################################################
# get position of intended start date of backtest
bt_start_idx = dt_idx.get_loc(BT_START_DT)
size = PRE_PAIR_FORMATION + PAIR_FORMATION_LEN + MAX_LOOKBACK + (len(dt_idx) - bt_start_idx)

print("To fulfill BT_START_DT, PAIR_FORMATION_LEN and MAX_LOOKBACK, size = " + str(size) + ".")

# get datafeeds which fulfill size requirement
data = GSTools.cut_datafeeds(data, size=size)

print("After cutting datafeeds, " + str(len(data.keys())) + " datafeeds remaining.")

###################################################################################################################
# just to be safe, sync the start end dates of the dataframes
data, start_dt, end_dt = GSTools.sync_start_end(data)
dt_idx = GSTools.get_trading_dates(data)

print("Backtest start date: " + str(dt_idx[PRE_PAIR_FORMATION + PAIR_FORMATION_LEN + MAX_LOOKBACK]))
print("Backtest end date: " + str(dt_idx[PRE_PAIR_FORMATION + PAIR_FORMATION_LEN + MAX_LOOKBACK + TEST_PERIOD - 1]))

###################################################################################################################
# get aggregated close prices
close_df = GSTools.get_aggregated(data, col='close')

if close_df.isnull().values.any():
    warnings.warn("There are null values in the aggregated close price df.")
else:
    print("No null values detected in aggregated close price df.")

###################################################################################################################
# total number of stocks remaining
N = len(data.keys())

# number of pairs of interest
K = int(PCT * N * (N-1) / 2)

###################################################################################################################
# pair selection
good_pairs = coint(df=close_df[PRE_PAIR_FORMATION:PRE_PAIR_FORMATION + PAIR_FORMATION_LEN], intercept=True, sig_level=0.005)
good_pairs.sort(key=lambda x: x[2])
good_pairs = good_pairs[0 : K]

print("From " + str(int(N * (N-1) / 2)) + " pairs, " + str(len(good_pairs)) + " pairs passed the cointegration test.")

print("---------------------------------------------------------------------")

---------------------------------------------------------------------
Initial number of datafeeds: 2517.
To fulfill BT_START_DT, PAIR_FORMATION_LEN and MAX_LOOKBACK, size = 1009.
After cutting datafeeds, 116 datafeeds remaining.
Backtest start date: 2017-03-20 00:00:00
Backtest end date: 2018-01-02 00:00:00
No null values detected in aggregated close price df.
From 6670 pairs, 66 pairs passed the cointegration test.
---------------------------------------------------------------------


In [None]:
# combinations of parameters
param_combinations = list(itertools.product(LOOKBACK_VALUE, ENTER_THRESHOLD_SIZE, EXIT_THRESHOLD_SIZE, LOSS_LIMIT))

# list to store MACRO results
macro_results = []

for i, params in enumerate(param_combinations, 1):
    # set params
    print("Running " + str(i) + "/" + str(len(param_combinations)))
    print("Backtesting all pairs using parameters " + str(params))
    # list to store MICRO results
    results = []

    for pair in good_pairs:
        # get names of both stock
        stk0, stk1, _ = pair

        # get data of both stock
        stk0_df, stk1_df = data[stk0], data[stk1]
        stk0_df_test = stk0_df[PRE_PAIR_FORMATION + PAIR_FORMATION_LEN : PRE_PAIR_FORMATION + PAIR_FORMATION_LEN + MAX_LOOKBACK + TEST_PERIOD]
        stk1_df_test = stk1_df[PRE_PAIR_FORMATION + PAIR_FORMATION_LEN : PRE_PAIR_FORMATION + PAIR_FORMATION_LEN + MAX_LOOKBACK + TEST_PERIOD]
        
        # Create a cerebro
        cerebro = bt.Cerebro()

        # Create data feeds
        data0 = bt.feeds.PandasData(dataname=stk0_df_test, timeframe=(bt.TimeFrame.Days), datetime=0)
        data1 = bt.feeds.PandasData(dataname=stk1_df_test, timeframe=(bt.TimeFrame.Days), datetime=0)

        # add data feeds to cerebro
        cerebro.adddata(data0)
        cerebro.adddata(data1)

        # Add the strategy
        cerebro.addstrategy(CointStrategy, 
                            lookback=params[0],
                            max_lookback=MAX_LOOKBACK,
                            enter_threshold_size=params[1], 
                            exit_threshold_size=params[2], 
                            loss_limit=params[3],
                            consider_borrow_cost=CONSIDER_BORROW_COST,
                            consider_commission=CONSIDER_COMMISSION
                           )

        # Add analyzers
        cerebro.addanalyzer(bt.analyzers.SharpeRatio, _name='mysharpe')
        cerebro.addanalyzer(Metrics, 
                            lookback=MAX_LOOKBACK, 
                            _name='metrics')

        # Add the commission - only stocks like a for each operation
        cerebro.broker.setcash(1000000)

        # And run it
        strat = cerebro.run()

        # get MICRO metrics
        sharperatio = strat[0].analyzers.mysharpe.get_analysis()['sharperatio']
        returnstd = strat[0].analyzers.metrics.returns_std()
        startcash = cerebro.getbroker().startingcash
        endcash = cerebro.getbroker().getvalue()
        profit = (endcash - startcash) / startcash
        
        results.append((stk0 + "-" + stk1, sharperatio, profit, returnstd))
    
    # convert to dataframe
    results_df = pd.DataFrame(results)
    results_df.columns = ['pair', 'sharpe_ratio', 'overall_return', 'returns_std']
    
    # save as csv
    uuid_str = str(uuid.uuid4())
    path = DST_DIR + str(uuid_str) + ".csv" 
    results_df.to_csv(path_or_buf=path, index=False)
    
    # calculate MACRO attributes
    avg_sharpe_ratio = results_df['sharpe_ratio'].mean()
    median_sharpe_ratio = results_df['sharpe_ratio'].median()
    
    avg_overall_return = results_df['overall_return'].mean()
    median_overall_return = results_df['overall_return'].median()
    overall_return_std = results_df['overall_return'].std()

    
    macro_results.append((params[0], 
                          params[1], 
                          params[2], 
                          params[3],
                          avg_sharpe_ratio,
                          median_sharpe_ratio,
                          avg_overall_return,
                          median_overall_return,
                          overall_return_std,
                          uuid_str
                         ))
    
    # nextline
    print("")

macro_results_df = pd.DataFrame(macro_results)
macro_results_df.columns = ['lookback', 
                            'enter_threshold_size', 
                            'exit_threshold_size',
                            'loss_limit', 
                            'avg_sharpe_ratio', 
                            'median_sharpe_ratio',
                            'avg_overall_return', 
                            'median_overall_return',
                            'overall_return_std',
                            'uuid']
macro_results_df.to_csv(DST_DIR + 'summary.csv', index=False)

Running 1/24
Backtesting all pairs using parameters (20, 2, 0.5, -0.005)
-------------------------

In [17]:
macro_results_df = pd.read_csv(DST_DIR + "summary.csv")
macro_results_df

Unnamed: 0,lookback,enter_threshold_size,exit_threshold_size,loss_limit,avg_sharpe_ratio,median_sharpe_ratio,avg_overall_return,median_overall_return,overall_return_std,uuid
0,20,2,0.5,-0.005,-0.98722,-0.778108,0.003969,-0.000879,0.094294,c7f9e4c3-b495-4aad-8ab6-55bb17a2ef5b
1,20,2,0.5,-0.01,-0.051755,-0.144824,0.010456,0.011284,0.095314,003de819-ab43-44aa-af20-dd226cb8ac30
2,20,2,0.5,-0.015,-1.422985,-1.111917,-0.003062,-0.004318,0.092877,7a9f627d-e9cb-4b87-a58f-4e0579b630de
3,20,3,0.5,-0.005,-4.884123,-1.745963,-0.003444,0.0,0.030421,965ec1b8-b9d4-4608-acfd-5ad450fb6058
4,20,3,0.5,-0.01,-4.806193,-1.745963,-0.00405,0.0,0.028189,97019761-d1e4-41ce-b3bd-c8184b5a830f
5,20,3,0.5,-0.015,-3.992952,-1.367414,-0.004239,0.0,0.033994,2709365f-01eb-4110-b030-72bcfad5c56d
6,25,2,0.5,-0.005,-0.513964,-1.065977,-0.004956,0.000665,0.118156,0f2782ba-5bcc-4633-a8e4-f882a49e4d4e
7,25,2,0.5,-0.01,-4.639107,-0.410883,0.002565,-0.001001,0.139008,3395cbfc-4d78-4df2-a9dd-3fb234c076f4
8,25,2,0.5,-0.015,-0.543523,-0.824807,-0.015046,-0.008018,0.127999,a645bb26-7e91-4b8f-8a32-4a8d38ac5e12
9,25,3,0.5,-0.005,-3.167292,-1.432271,0.003866,0.0,0.044517,95017b59-0932-49bf-bb16-1316ec12f7b8


In [18]:
macro_results_df[macro_results_df['avg_overall_return'] == max(macro_results_df['avg_overall_return'])]

Unnamed: 0,lookback,enter_threshold_size,exit_threshold_size,loss_limit,avg_sharpe_ratio,median_sharpe_ratio,avg_overall_return,median_overall_return,overall_return_std,uuid
1,20,2,0.5,-0.01,-0.051755,-0.144824,0.010456,0.011284,0.095314,003de819-ab43-44aa-af20-dd226cb8ac30


In [9]:
macro_results_df

Unnamed: 0,lookback,enter_threshold_size,exit_threshold_size,loss_limit,avg_sharpe_ratio,median_sharpe_ratio,avg_overall_return,median_overall_return,overall_return_std,uuid
0,30,2,0.5,-0.005,-0.156478,-1.095593,-0.023391,-0.020477,0.107185,fdf954e1-7ad5-47d1-b60e-ec6d50d8af2b
1,30,2,0.5,-0.01,-0.252219,-0.436165,0.00253,-0.009574,0.126433,6319d346-9d7e-4708-b9d5-80a906d2801f
2,30,3,0.5,-0.005,-5.818735,-1.410168,0.004309,0.0,0.050791,b7a871eb-8264-44f9-b0b6-1386430fd703
3,30,3,0.5,-0.01,-2.904534,-1.192084,0.006192,0.00326,0.052236,d16f4d80-80f0-4fa4-b103-877eec706e18
4,35,2,0.5,-0.005,-0.813441,-1.110579,-0.016751,-0.011906,0.112783,ea227ed2-3ca4-4c3e-bb97-464f0ab55b04
5,35,2,0.5,-0.01,-4.401413,-1.037338,-0.005072,0.001467,0.135263,abafc2c2-3411-43cc-a99f-ac5e9239cc0d
6,35,3,0.5,-0.005,-10.811344,-1.354049,0.004693,0.0,0.047964,1e7f1508-10f0-42b1-8d72-9e6e18b3401b
7,35,3,0.5,-0.01,-1.594834,-1.163554,0.007434,0.000959,0.059709,a4fa6e88-542f-47b2-a5a3-dd2e53ebecbc
8,40,2,0.5,-0.005,-0.787056,-1.119854,-0.030183,-0.051454,0.118467,24a94582-2c08-41e9-9ea8-5e3b6e8d2ef7
9,40,2,0.5,-0.01,-4.500145,-1.114121,-0.028709,-0.040864,0.118325,a1329734-8b4e-43cf-9afe-8e0ecc24c1e6
