In [1]:
import math
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import yfinance as yf
from collections import namedtuple
from datetime import date, datetime, timedelta

from algo.sde.ornstein_uhlenbeck_optimisation import OptimiserOU
from algo.sde.ornstein_uhlenbeck_parameters import HedgeParamsOU, ModelParamsOU
from etl.yfinance_data import get_pairs_data
from execution.positions import compute_positions, compute_returns
from performance.sharpe import sharpe_ratio_log


sns.set_style("darkgrid")

In [6]:
## Get all data

# Built for hours
interval = "1h"

# Train on all data so far, or rolling train window.
use_fixed_train_size = True

# 730 days is the max, includes today.
# num_data_full = 24*729
num_data_full = 24*365

# Size of initial training set. Note: 23 trading days per month.
# num_train_initial = 24*23*12
num_train_initial = 24*23*6

# Size of each test period. Note: 1 futures trading week = 6 days. TODO: start on Sunday?
num_test_window = 24*6

assert num_data_full >= num_train_initial + num_test_window

# Instruments
ticker1 = "BZ=F"
ticker2 = "CL=F"

# Dates
end_date = date.today()
start_date = end_date - timedelta(hours=num_data_full)
df_full = get_pairs_data(ticker1, ticker2, start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d"), interval=interval)

print(f"\nDates Requested: {start_date} to {end_date}")
print(f"Dates Received: {df_full.index[0]} to {df_full.index[-1]}\n")

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Dates Requested: 2021-11-27 to 2022-11-27
Dates Received: 2021-11-28 18:00:00-05:00 to 2022-11-25 13:00:00-05:00



In [7]:
## Fit model on initial training data

# Model Input Parameters
dt = 1   # Let one unit of time be equivalent to one step in the data.
A = 1.0  # For every $A long/short in asset1, we go $B short/long in asset2.

# Entry/Exit Signals - Z-score
z_entry = 1.0
z_exit = 0.25

In [8]:
if len(df_full) < num_data_full:
    print(f"Requested: {num_data_full}. Received: {len(df_full)}. Updating num_data_full...")
    num_data_full = len(df_full)
    assert num_data_full >= num_train_initial + num_test_window
    
print(f"\tnum_train_initial = {num_train_initial}")
print(f"\tnum_test_window = {num_test_window}")
print(f"\tnum_data_full = {num_data_full}")

num_iter = math.floor((num_data_full - num_train_initial) / num_test_window)
print(f"iterations = {num_iter}")


# TODO: back-calculate (data_size_full-train_size_initial) % test_size_window
start_train_index = (num_data_full - num_train_initial) % num_test_window


end_train_index = start_train_index + num_train_initial
end_test_index = end_train_index + num_test_window

Requested: 8760. Received: 5169. Updating num_data_full...
	num_train_initial = 3312
	num_test_window = 1440
	num_data_full = 5169
iterations = 1


Note: this doesn't carry positions over.
- I.e. if you buy in one week and sell in the next, the test fails to capture this or close the position.
- For this, move to e.g. backtrader.

In [9]:
positions = []

# while end_test_index <= num_data_full:
for i in range(num_iter):
    print(f"Iteration {i}")
    
    # Build datasets
    df_train_test = df_full.iloc[start_train_index : end_test_index].copy()
    df_train = df_train_test.head(len(df_train_test) - num_test_window).copy()
    print(f"\tStart Train: {df_train.index[0]} | End Train: {df_train.index[-1]}")
    
    # Cointegration pre-tade checks
    # TODO
    
    # Train Model
    optimiser_train = OptimiserOU(A=A, dt=dt)
    try:
        hp_train, _ = optimiser_train.optimise(asset1=df_train["S1"].to_numpy(), asset2=df_train["S2"].to_numpy())
    except AssertionError as e:
        print(f"Training Yielded {e}. Skipping.")
        continue
              
    alpha = hp_train.alpha
    beta = hp_train.beta

    # Compute spreads using (alpha, beta) calibrated in training.
    df_train_test["spread"] = alpha*df_train_test["S1"] - beta*df_train_test["S2"]
    
    # Allow the expanding metrics to roll from train to test.
    df_train_test["zscore_expanding"] = (df_train_test["spread"] - df_train_test["spread"].expanding().mean()) / df_train_test["spread"].expanding().std()

    df_test = df_train_test.tail(num_test_window).copy()
    print(f"\tStart Test:  {df_test.index[0]} | End Test:  {df_test.index[-1]}")

    # Plot expanding
    # xmin=df_train.index[0]
    # xmax=df_test.index[-1]
    # plt.figure(figsize=(12, 4))
    # TODO: train_test, hue=<label_to_be_added>
    # plt.plot(df_train.index, df_train["zscore_expanding"], color="blue", label="spread_train")
    # plt.plot(df_test.index, df_test["zscore_expanding"], color="orange", label="spread_test")
    # plt.hlines(z_entry, label="short", colors="red", linestyle="dotted", xmin=xmin, xmax=xmax)
    # plt.hlines(-z_entry, label="long", colors="green", linestyle="dotted", xmin=xmin, xmax=xmax)
    # plt.hlines(z_exit, label="exit", colors="blue", linestyle="dotted", xmin=xmin, xmax=xmax)
    # plt.hlines(-z_exit, label="exit", colors="blue", linestyle="dotted", xmin=xmin, xmax=xmax)
    # plt.legend()
    # plt.title("Expanding Metrics on Z-Score")
    # plt.show()
    
    # Save plots: name contains date of W/C.

    # Trade entry/exit signals.
    threshold_col = "zscore_expanding"
    df_test["long"] = 1.0 * (df_test[threshold_col] <= -z_entry)
    df_test["short"] = 1.0 * (df_test[threshold_col] >= z_entry)
    df_test["exit"] = 1.0 * (np.abs(df_test[threshold_col]) <= z_exit)
    
    df_test = df_test.pipe(compute_positions)
    df_test = df_test.pipe(compute_returns)
    # df_test[["returns_cml", "returns_cml_S1", "returns_cml_S2"]].plot()
    # plt.title("Test Set")
    # plt.show()
    
    # Positions in price-space. Sharpe formula transforms to (log-)returns-space internally.
    positions.extend(df_test["total"].tolist())

    
    # After `num_test_window` data points elapses, add to training set.
    end_train_index += num_test_window
    end_test_index += num_test_window
    
    if use_fixed_train_size:
        start_train_index += num_test_window
    
    print(f"\t\treturns_cml    = {df_test['returns_cml'].iloc[-1]:.2f}")
    print(f"\t\treturns_cml_S1 = {df_test['returns_cml_S1'].iloc[-1]:.2f}")
    print(f"\t\treturns_cml_S2 = {df_test['returns_cml_S2'].iloc[-1]:.2f}")

    
df_pos = pd.DataFrame(columns=["total"])
df_pos["total"] = positions
    
sharpe_ratio = sharpe_ratio_log(df_pos, colname="total")
sharpe_ratio_annual = sharpe_ratio*np.sqrt(23*252)
print(f"Sharpe Ratio Test Annual = {sharpe_ratio_annual}")

Iteration 0
	Start Train: 2021-12-28 03:00:00-05:00 | End Train: 2022-08-16 02:00:00-04:00
	Start Test:  2022-08-16 03:00:00-04:00 | End Test:  2022-11-25 13:00:00-05:00
		returns_cml    = 0.88
		returns_cml_S1 = 0.89
		returns_cml_S2 = 0.86
Sharpe Ratio Test Annual = -0.28828684183135295


  result = getattr(ufunc, method)(*inputs, **kwargs)
