<img src="https://certificate.tpq.io/taim_logo.png" width="350px" align="right">

# Artificial Intelligence in Finance

## Vectorized Backtesting

Dr Yves J Hilpisch | The AI Machine

http://aimachine.io | http://twitter.com/dyjh

In [None]:
import os
import math
import numpy as np
import pandas as pd
from pylab import plt, mpl
plt.style.use('seaborn')
mpl.rcParams['savefig.dpi'] = 300
mpl.rcParams['font.family'] = 'serif'
pd.set_option('mode.chained_assignment', None)
pd.set_option('display.float_format', '{:.4f}'.format)
np.set_printoptions(suppress=True, precision=4)
os.environ['PYTHONHASHSEED'] = '0'

## Backtesting an SMA-Based Strategy

In [None]:
url = 'http://hilpisch.com/aiif_eikon_eod_data.csv'

In [None]:
symbol = 'EUR='

In [None]:
data = pd.DataFrame(pd.read_csv(url, index_col=0,
                                parse_dates=True).dropna()[symbol])

In [None]:
data.info()

In [None]:
data['SMA1'] = data[symbol].rolling(42).mean()

In [None]:
data['SMA2'] = data[symbol].rolling(258).mean()

In [None]:
data.plot(figsize=(10, 6));

In [None]:
data.dropna(inplace=True)

In [None]:
data['p'] = np.where(data['SMA1'] > data['SMA2'], 1, -1)

In [None]:
data['p'] = data['p'].shift(1)

In [None]:
data.dropna(inplace=True)

In [None]:
data.plot(figsize=(10, 6), secondary_y='p');

In [None]:
data['r'] = np.log(data[symbol] / data[symbol].shift(1))

In [None]:
data.dropna(inplace=True)

In [None]:
data['s'] = data['p'] * data['r']

In [None]:
data[['r', 's']].sum().apply(np.exp)  # gross performance

In [None]:
data[['r', 's']].sum().apply(np.exp) - 1  # net performance

In [None]:
data[['r', 's']].cumsum().apply(np.exp).plot(figsize=(10, 6));

In [None]:
sum(data['p'].diff() != 0) + 1

In [None]:
pc = 0.005

In [None]:
data['s_'] = np.where(data['p'].diff() != 0,
                      data['s'] - pc, data['s'])

In [None]:
# data['s_'].iloc[0] -= pc

In [None]:
data['s_'].iloc[-1] -= pc

In [None]:
data[['r', 's', 's_']][data['p'].diff() != 0]

In [None]:
data[['r', 's', 's_']].sum().apply(np.exp)

In [None]:
data[['r', 's', 's_']].sum().apply(np.exp) - 1

In [None]:
data[['r', 's', 's_']].cumsum().apply(np.exp).plot(figsize=(10, 6));

In [None]:
data[['r', 's', 's_']].std()

In [None]:
data[['r', 's', 's_']].std() * math.sqrt(252)

## Backtesting a Daily DNN-Based Strategy

In [None]:
data = pd.DataFrame(pd.read_csv(url, index_col=0,
                                parse_dates=True).dropna()[symbol])

In [None]:
data.info()

In [None]:
lags = 5

In [None]:
def add_lags(data, symbol, lags, window=20):
    cols = []
    df = data.copy()
    df.dropna(inplace=True)
    df['r'] = np.log(df / df.shift(1))
    df['sma'] = df[symbol].rolling(window).mean()
    df['min'] = df[symbol].rolling(window).min()
    df['max'] = df[symbol].rolling(window).max()
    df['mom'] = df['r'].rolling(window).mean()
    df['vol'] = df['r'].rolling(window).std()
    df.dropna(inplace=True)
    df['d'] = np.where(df['r'] > 0, 1, 0)
    features = [symbol, 'r', 'd', 'sma', 'min', 'max', 'mom', 'vol']
    for f in features:
        for lag in range(1, lags + 1):
            col = f'{f}_lag_{lag}'
            df[col] = df[f].shift(lag)
            cols.append(col)
    df.dropna(inplace=True)
    return df, cols

In [None]:
data, cols = add_lags(data, symbol, lags, window=20)

In [None]:
import random
import tensorflow as tf
from keras.layers import Dense, Dropout
from keras.models import Sequential
from keras.regularizers import l1
from keras.optimizers import Adam
from sklearn.metrics import accuracy_score

In [None]:
def set_seeds(seed=100):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
set_seeds()

In [None]:
optimizer = Adam(learning_rate=0.0001)

In [None]:
def create_model(hl=2, hu=128, dropout=False, rate=0.3,
                regularize=False, reg=l1(0.0005),
                optimizer=optimizer, input_dim=len(cols)):
    if not regularize:
        reg = None
    model = Sequential()
    model.add(Dense(hu, input_dim=input_dim,
                 activity_regularizer=reg,  
                 activation='relu'))
    if dropout:
        model.add(Dropout(rate, seed=100))
    for _ in range(hl):
        model.add(Dense(hu, activation='relu',
                     activity_regularizer=reg))
        if dropout:
            model.add(Dropout(rate, seed=100))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])
    return model

In [None]:
split = '2018-01-01'

In [None]:
train = data.loc[:split].copy()

In [None]:
np.bincount(train['d'])

In [None]:
mu, std = train.mean(), train.std()

In [None]:
train_ = (train - mu) / std

In [None]:
set_seeds()
model = create_model(hl=2, hu=64)

In [None]:
%%time 
model.fit(train_[cols], train['d'],
        epochs=20, verbose=False,
        validation_split=0.2, shuffle=False)

In [None]:
model.evaluate(train_[cols], train['d'])

In [None]:
train['p'] = np.where(model.predict(train_[cols]) > 0.5, 1, 0)

In [None]:
train['p'] = np.where(train['p'] == 1, 1, -1)

In [None]:
train['p'].value_counts()

In [None]:
train['s'] = train['p'] * train['r']

In [None]:
train[['r', 's']].sum().apply(np.exp)

In [None]:
train[['r', 's']].sum().apply(np.exp)  - 1

In [None]:
train[['r', 's']].cumsum().apply(np.exp).plot(figsize=(10, 6));

In [None]:
test = data.loc[split:].copy()

In [None]:
test_ = (test - mu) / std

In [None]:
model.evaluate(test_[cols], test['d'])

In [None]:
test['p'] = np.where(model.predict(test_[cols]) > 0.5, 1, -1)

In [None]:
test['p'].value_counts()

In [None]:
test['s'] = test['p'] * test['r']

In [None]:
test[['r', 's']].sum().apply(np.exp)

In [None]:
test[['r', 's']].sum().apply(np.exp) - 1

In [None]:
test[['r', 's']].cumsum().apply(np.exp).plot(figsize=(10, 6));

In [None]:
sum(test['p'].diff() != 0) + 1

In [None]:
spread = 0.00012
pc = spread / data[symbol].mean()
print(f'{pc:.6f}')

In [None]:
test['s_'] = np.where(test['p'].diff() != 0,
                      test['s'] - pc, test['s'])

In [None]:
# test['s_'].iloc[0] -= pc

In [None]:
test['s_'].iloc[-1] -= pc

In [None]:
test[['r', 's', 's_']].sum().apply(np.exp)

In [None]:
test[['r', 's', 's_']].sum().apply(np.exp) - 1

In [None]:
test[['r', 's', 's_']].cumsum().apply(np.exp).plot(figsize=(10, 6));

## Backtesting an Intraday DNN-Based Strategy

In [None]:
url = 'http://hilpisch.com/aiif_eikon_id_eur_usd.csv'

In [None]:
symbol = 'EUR='

In [None]:
data = pd.DataFrame(pd.read_csv(url, index_col=0,
                    parse_dates=True).dropna()['CLOSE'])
data.columns = [symbol]

In [None]:
data = data.resample('5min', label='right').last().ffill()

In [None]:
data.info()

In [None]:
data.head()

In [None]:
data[symbol].plot(figsize=(10, 6));

In [None]:
lags = 5

In [None]:
data, cols = add_lags(data, symbol, lags, window=20)

In [None]:
split = int(len(data) * 0.85)

In [None]:
train = data.iloc[:split].copy()

In [None]:
np.bincount(train['d'])

In [None]:
def cw(df):
    c0, c1 = np.bincount(df['d'])
    w0 = (1 / c0) * (len(df)) / 2
    w1 = (1 / c1) * (len(df)) / 2
    return {0: w0, 1: w1}

In [None]:
mu, std = train.mean(), train.std()

In [None]:
train_ = (train - mu) / std

In [None]:
set_seeds()
model = create_model(hl=1, hu=128,
                     reg=True, dropout=False)

In [None]:
%%time 
model.fit(train_[cols], train['d'],
          epochs=40, verbose=False,
          validation_split=0.2, shuffle=False,
          class_weight=cw(train))

In [None]:
model.evaluate(train_[cols], train['d'])

In [None]:
train['p'] = np.where(model.predict(train_[cols]) > 0.5, 1, -1)

In [None]:
train['p'].value_counts()

In [None]:
train['s'] = train['p'] * train['r']

In [None]:
train[['r', 's']].sum().apply(np.exp)

In [None]:
train[['r', 's']].sum().apply(np.exp) - 1

In [None]:
train[['r', 's']].cumsum().apply(np.exp).plot(figsize=(10, 6));

In [None]:
test = data.iloc[split:].copy()

In [None]:
test_ = (test - mu) / std

In [None]:
model.evaluate(test_[cols], test['d'])

In [None]:
test['p'] = np.where(model.predict(test_[cols]) > 0.5, 1, -1)

In [None]:
test['p'].value_counts()

In [None]:
test['s'] = test['p'] * test['r']

In [None]:
test[['r', 's']].sum().apply(np.exp)

In [None]:
test[['r', 's']].sum().apply(np.exp) - 1

In [None]:
test[['r', 's']].cumsum().apply(np.exp).plot(figsize=(10, 6));

In [None]:
sum(test['p'].diff() != 0) + 1

In [None]:
spread = 0.00012
pc_1 = spread / test[symbol]

In [None]:
spread = 0.00006
pc_2 = spread / test[symbol]

In [None]:
test['s_1'] = np.where(test['p'].diff() != 0,
                       test['s'] - pc_1, test['s'])

In [None]:
# test['s_1'].iloc[0] -= pc_1.iloc[0]
test['s_1'].iloc[-1] -= pc_1.iloc[0]

In [None]:
test['s_2'] = np.where(test['p'].diff() != 0,
                       test['s'] - pc_2, test['s'])

In [None]:
# test['s_2'].iloc[0] -= pc_2.iloc[0]
test['s_2'].iloc[-1] -= pc_2.iloc[0]

In [None]:
test[['r', 's', 's_1', 's_2']].sum().apply(np.exp)

In [None]:
test[['r', 's', 's_1', 's_2']].sum().apply(np.exp) - 1

In [None]:
test[['r', 's', 's_1', 's_2']].cumsum().apply(
    np.exp).plot(figsize=(10, 6), style=['-', '-', '--', '--']);

<img src='http://hilpisch.com/taim_logo.png' width="350px" align="right">

<br><br><br><a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:ai@tpq.io">ai@tpq.io</a>