In [10]:
import pandas as pd
import numpy as np
import time
import os

from config.config import rebalance_window, validation_window
from preprocessing.preprocessors import *
from config import config
from model.models import *

In [11]:
df = pd.read_csv('done_data.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,datadate,tic,adjcp,open,high,low,volume,macd,rsi,cci,adx,turbulence
0,0,20090102,AAPL,12.964286,12.268571,13.005714,12.165714,26641980.0,0.0,100.0,66.666667,100.0,0.0
1,1,20090102,AXP,19.33,18.57,19.52,18.4,10955620.0,0.0,100.0,66.666667,100.0,0.0
2,2,20090102,BA,45.25,42.8,45.56,42.78,7010171.0,0.0,100.0,66.666667,100.0,0.0
3,3,20090102,CAT,46.91,44.91,46.98,44.71,7116726.0,0.0,0.0,66.666667,100.0,0.0
4,4,20090102,CSCO,16.96,16.41,17.0,16.25,40977480.0,0.0,100.0,66.666667,100.0,0.0


In [12]:
insample_turbulence = df[(df.datadate < config.init_turbulence_sample_end_date)
                             & (df.datadate >= config.init_turbulence_sample_start_date)]
insample_turbulence = insample_turbulence.drop_duplicates(subset=['datadate'])
insample_turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, .90)
print(insample_turbulence_threshold)

96.08032158358378


### For testing purpose, we test the first iteration in the rebalancing window

In [13]:
rebalance_window, validation_window = 63, 63
i = rebalance_window + validation_window

unique_trade_date = df[(df.datadate > 20151001)&(df.datadate <= 20200707)].datadate.unique()
end_date_index = df.index[df["datadate"] == unique_trade_date[i - rebalance_window - validation_window]].to_list()[-1]
start_date_index = end_date_index - validation_window * 30 + 1

train = data_split(df, 
                   start=config.init_turbulence_sample_start_date, 
                   end=unique_trade_date[i - rebalance_window - validation_window])
train.head()

Unnamed: 0.1,Unnamed: 0,datadate,tic,adjcp,open,high,low,volume,macd,rsi,cci,adx,turbulence
0,0,20090102,AAPL,12.964286,12.268571,13.005714,12.165714,26641980.0,0.0,100.0,66.666667,100.0,0.0
0,1,20090102,AXP,19.33,18.57,19.52,18.4,10955620.0,0.0,100.0,66.666667,100.0,0.0
0,2,20090102,BA,45.25,42.8,45.56,42.78,7010171.0,0.0,100.0,66.666667,100.0,0.0
0,3,20090102,CAT,46.91,44.91,46.98,44.71,7116726.0,0.0,0.0,66.666667,100.0,0.0
0,4,20090102,CSCO,16.96,16.41,17.0,16.25,40977480.0,0.0,100.0,66.666667,100.0,0.0


In [14]:
historical_turbulence = df.iloc[start_date_index:(end_date_index + 1), :]
historical_turbulence = historical_turbulence.drop_duplicates(subset=['datadate'])
historical_turbulence_mean = np.mean(historical_turbulence.turbulence.values)

if historical_turbulence_mean > insample_turbulence_threshold:
    # if the mean of the historical data is greater than the 90% quantile of insample turbulence data
    # then we assume that the current market is volatile,
    # therefore we set the 90% quantile of insample turbulence data as the turbulence threshold
    # meaning the current turbulence can't exceed the 90% quantile of insample turbulence data
    turbulence_threshold = insample_turbulence_threshold
else:
    # if the mean of the historical data is less than the 90% quantile of insample turbulence data
    # then we tune up the turbulence_threshold, meaning we lower the risk
    turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, 1)

print("historical_turbulence_mean: ", historical_turbulence_mean)
print("turbulence_threshold: ", turbulence_threshold)

historical_turbulence_mean:  86.50335037987186
turbulence_threshold:  171.09407156310584


In [15]:
env_train = DummyVecEnv([lambda: StockEnvTrain(train)])

In [16]:
validation = data_split(df, start=unique_trade_date[i - rebalance_window - validation_window],
                                end=unique_trade_date[i - rebalance_window])
env_val = DummyVecEnv([lambda: StockEnvValidation(validation, turbulence_threshold=turbulence_threshold, iteration=i)])
obs_val = env_val.reset()

In [17]:
print("======Model training from: ", 20090000, "to ", unique_trade_date[i - rebalance_window - validation_window])
print("======A2C Training========")
model_a2c = train_A2C(env_train, model_name="A2C_30k_dow_{}".format(i), timesteps=30000)

Terminal Asset Value: 2192825.1043001395
Sharpe Ratio:  0.8003756350899986
Terminal Asset Value: 2536769.4516109326
Sharpe Ratio:  0.8887743514588239
Terminal Asset Value: 2002392.0343921601
Sharpe Ratio:  0.6377205892729272
Terminal Asset Value: 2209729.4645457105
Sharpe Ratio:  0.7918861218140326
Terminal Asset Value: 2024192.1328580913
Sharpe Ratio:  0.6789549663819399
Terminal Asset Value: 1809409.3556987317
Sharpe Ratio:  0.5368464930327658
Terminal Asset Value: 2032694.5754586123
Sharpe Ratio:  0.6125026221370751
Terminal Asset Value: 1803298.5639141698
Sharpe Ratio:  0.5393808827576971
Terminal Asset Value: 2016851.4790737156
Sharpe Ratio:  0.6509927288148067
Terminal Asset Value: 1803440.6554027998
Sharpe Ratio:  0.5433700316762237
Terminal Asset Value: 2511614.071808287
Sharpe Ratio:  0.7555621487452442
Terminal Asset Value: 2554102.608934414
Sharpe Ratio:  0.7998708019768572
Terminal Asset Value: 2561385.3895269167
Sharpe Ratio:  0.829474729739545
Terminal Asset Value: 212883

In [18]:
print("======A2C Validation from: ", unique_trade_date[i - rebalance_window - validation_window], "to ",
      unique_trade_date[i - rebalance_window])

DRL_validation(model=model_a2c, test_data=validation, test_env=env_val, test_obs=obs_val)
sharpe_a2c = get_validation_sharpe(i)
print("A2C Sharpe Ratio: ", sharpe_a2c)

Terminal Asset Value: 1003754.6111621729
Sharpe Ratio:  0.17691121239550509
A2C Sharpe Ratio:  0.022288717720828993
