In [None]:
# Code based off https://github.com/AI4Finance-LLC/Deep-Reinforcement-Learning-for-Automated-Stock-Trading-Ensemble-Strategy-ICAIF-2020

In [11]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np

# common library
import pandas as pd
import numpy as np
import time
import gym

# RL models from stable-baselines
from stable_baselines import GAIL, SAC
from stable_baselines import ACER
from stable_baselines import PPO2
from stable_baselines import A2C
from stable_baselines import DDPG
from stable_baselines import TD3

from stable_baselines.ddpg.policies import DDPGPolicy
from stable_baselines.common.policies import MlpPolicy, MlpLstmPolicy, MlpLnLstmPolicy
from stable_baselines.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise, AdaptiveParamNoiseSpec
from stable_baselines.common.vec_env import DummyVecEnv
from preprocessing.preprocessors import *
from config import config

# customized env
from env.EnvMultipleStock_train import StockEnvTrain
from env.EnvMultipleStock_validation import StockEnvValidation
from env.EnvMultipleStock_trade import StockEnvTrade

In [15]:
def train_PPO(env_train, model_name, timesteps=50000):
    """PPO model"""

    start = time.time()
    model = PPO2('MlpPolicy', env_train, ent_coef = 0.005, nminibatches = 8)
    #model = PPO2('MlpPolicy', env_train, ent_coef = 0.005)

    model.learn(total_timesteps=timesteps)
    end = time.time()

    model.save(f"{config.TRAINED_MODEL_DIR}/{model_name}")
    print('Training time (PPO): ', (end - start) / 60, ' minutes')
    return model

In [17]:
def DRL_prediction(df,
                   model,
                   name,
                   last_state,
                   iter_num,
                   start_date,
                   end_date,
                   turbulence_threshold,
                   initial):
    ### make a prediction based on trained model###

    ## trading env
    trade_data = data_split(df, start=start_date, end=end_date)
    env_trade = DummyVecEnv([lambda: StockEnvTrade(trade_data,
                                                   turbulence_threshold=turbulence_threshold,
                                                   initial=initial,
                                                   previous_state=last_state,
                                                   model_name=name,
                                                   iteration=iter_num)])
    obs_trade = env_trade.reset()

    for i in range(len(trade_data.index.unique())):
        action, _states = model.predict(obs_trade)
        obs_trade, rewards, dones, info = env_trade.step(action)
        if i == (len(trade_data.index.unique()) - 2):
            # print(env_test.render())
            last_state = env_trade.render()

    df_last_state = pd.DataFrame({'last_state': last_state})
    df_last_state.to_csv('results/last_state_{}_{}.csv'.format(name, i), index=False)
    return last_state

In [19]:
def get_validation_sharpe(iteration):
    ###Calculate Sharpe ratio based on validation results###
    df_total_value = pd.read_csv('results/account_value_train.csv', index_col=0)
    df_total_value.columns = ['account_value_train']
    df_total_value['daily_return'] = df_total_value.pct_change(1)
    sharpe = (4 ** 0.5) * df_total_value['daily_return'].mean() / \
             df_total_value['daily_return'].std()
    return sharpe

In [20]:
preprocessed_path = "done_data.csv"
if os.path.exists(preprocessed_path):
    data = pd.read_csv(preprocessed_path, index_col=0)
else:
    data = preprocess_data()
    data = add_turbulence(data)
    data.to_csv(preprocessed_path)

print(data.head())
print(data.size)

   datadate   tic      adjcp       open       high        low      volume  \
0  20090102  AAPL  12.964286  12.268571  13.005714  12.165714  26641980.0   
1  20090102   AXP  19.330000  18.570000  19.520000  18.400000  10955620.0   
2  20090102    BA  45.250000  42.800000  45.560000  42.780000   7010171.0   
3  20090102   CAT  46.910000  44.910000  46.980000  44.710000   7116726.0   
4  20090102  CSCO  16.960000  16.410000  17.000000  16.250000  40977480.0   

   macd    rsi        cci    adx  turbulence  
0   0.0  100.0  66.666667  100.0         0.0  
1   0.0  100.0  66.666667  100.0         0.0  
2   0.0  100.0  66.666667  100.0         0.0  
3   0.0    0.0  66.666667  100.0         0.0  
4   0.0  100.0  66.666667  100.0         0.0  
1053360


In [21]:
insample_turbulence = data[(data['datadate']>=20090102) & (data['datadate']<=20101223)]
insample_turbulence = insample_turbulence.drop_duplicates(subset=['datadate'])
insample_turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, .90)

turbulence_threshold = np.quantile(insample_turbulence.turbulence.values, 1)
print("turbulence_threshold: ", turbulence_threshold)

turbulence_threshold:  158.23844086639005


In [22]:
start_date = 20090102
end_date = 20101223
start_val = 20101224
end_val = 20111223
start_trading = 20111224
end_trading = 20121224
initial = True

last_state_ensemble = []

ppo_sharpe_list = []
ddpg_sharpe_list = []
a2c_sharpe_list = []

model_use = []

In [37]:
start = time.time()
i = 1

train = data_split(data, start=start_date, end=end_date)
env_train = DummyVecEnv([lambda: StockEnvTrain(train)])

print("======PPO Training========")
model_ppo = train_PPO(env_train, model_name="PPO_100k_dow_{}".format(i), timesteps=100000)
sharpe_ppo = get_validation_sharpe(i)
print("PPO Sharpe Ratio: ", sharpe_ppo)

Training time (PPO):  3.361064012845357  minutes
PPO Sharpe Ratio:  0.18164089330037592
