# Part 0. Google Colab Set Up

In [None]:
# Mount Google Drive
from google.colab import drive

ROOT = "/content/drive"     
print(ROOT)                 

drive.mount(ROOT, force_remount=True)


In [None]:
% cd /content/drive/MyDrive/rl_forex

## Anaconda Set Up

In [None]:
% env PYTHONPATH=

In [None]:
%%bash

MINICONDA_INSTALLER_SCRIPT=Miniconda3-py37_4.9.2-Linux-x86_64.sh	
MINICONDA_PREFIX=/usr/local
chmod +x $MINICONDA_INSTALLER_SCRIPT
./$MINICONDA_INSTALLER_SCRIPT -b -f -p $MINICONDA_PREFIX

In [None]:
%%bash

conda install --channel defaults conda=4.9.2 python=3.7 --yes
conda config --add channels conda-forge
conda config --set channel_priority strict

In [None]:
import sys
 
# Add conda packages to PATH
_ = (sys.path
        .append("/usr/local/envs/rl_fx/lib/python3.7/site-packages"))

# Remove Colab preinstalled libraries
sys.path.remove('/usr/local/lib/python3.7/dist-packages')

## Import Conda Environment from yml 

In [None]:
%%bash

conda env create -f environment.yml

In [None]:
! source activate rl_fx && pip install yfinance==0.1.55


# Train

In [None]:
%load_ext autoreload
%autoreload

%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import datetime
import time
from statistics import mean
from itertools import accumulate
from empyrical import sortino_ratio

from finrl.config import config
from finrl.preprocessing.preprocessors import FeatureEngineer
from finrl.preprocessing import data
from finrl.preprocessing.data import data_split
from finrl.trade.backtest import evaluate_policy_rewards
from finrl.env.env_stocktrading import StockTradingEnv
from finrl.model.models import A2C
from finrl.trade.backtest import BackTestStats, BaselineStats, BackTestPlot

import sys

In [None]:
month = 1
year = 17

if month < 10:
  str_month = "0" + str(month)
else:
  str_month = str(month)
monthdata = str_month + '_' + str(year)
print(monthdata)
EURUSD_df=data.load_ohlc_dataset(f"15min/EURUSD/{monthdata}.csv")
GBPUSD_df=data.load_ohlc_dataset(f"15min/GBPUSD/{monthdata}.csv")
USDJPY_df=data.load_ohlc_dataset(f"15min/USDJPY/{monthdata}.csv")
USDCHF_df=data.load_ohlc_dataset(f"15min/USDJPY/{monthdata}.csv")

In [None]:
param_map = {
                'sma_9': {'talib_name' : 'SMA', 'time_period' : 9}, #time_period in seoonds
                'ema_9': {'talib_name' : 'EMA', 'time_period' : 9}, #time_period in seoonds
                'sma_21' : {'talib_name' : 'SMA', 'time_period' : 21},
                'ema_21' : {'talib_name' : 'EMA', 'time_period' : 21},
                'bbands_9':{'talib_name':'BBANDS','time_period':9,'nbdevup':2.0,'nbdevdn':2.0},
                'bbands_12':{'talib_name':'BBANDS','time_period':12,'nbdevup':2.0,'nbdevdn':2.0},
                'macd_entry':{'talib_name':'MACD', 'fastperiod':12, 'slowperiod':26,'signalperiod':9},
                'macd_exit':{'talib_name':'MACD', 'fastperiod':19, 'slowperiod':39,'signalperiod':9},
                'stoch':{'talib_name':'STOCH', 'fastk_period':5, 'slowk_period':3, 'slowk_matype':0, 'slowd_period':3, 'slowd_matype':0},
                'rsi_14':{'talib_name':'RSI', 'time_period':14},
                'rsi_4':{'talib_name':'RSI','time_period':4},
                'mom_10':{'talib_name':'MOM', 'time_period':10},
                'stochrsi_14':{'talib_name':'STOCHRSI', 'time_period':14, 'fastk_period':5,'fastd_period':3, 'fastd_matype':0},
                'kama_30':{'talib_name':'KAMA', 'time_period':30},
                't3_5':{'talib_name':'T3', 'time_period':5, 'vfactor':0.7},
                'atr_14':{'talib_name':'ATR', 'time_period':14},
                'natr_14':{'talib_name':'NATR', 'time_period':14},
                'tsf_14':{'talib_name':'TSF', 'time_period':14},
}

In [None]:
from finrl.preprocessing import data, preprocessors

EURUSD_train, tech_indicator_list = preprocessors.FeatureEngineer(EURUSD_df,
                          tech_indicator_params_map = param_map,
                          use_technical_indicator=True,
                          user_defined_feature=False).preprocess_data()

GBPUSD_train, tech_indicator_list = preprocessors.FeatureEngineer(GBPUSD_df,
                          tech_indicator_params_map = param_map,
                          use_technical_indicator=True,
                          user_defined_feature=False).preprocess_data()

USDJPY_train, tech_indicator_list = preprocessors.FeatureEngineer(USDJPY_df,
                          tech_indicator_params_map = param_map,
                          use_technical_indicator=True,
                          user_defined_feature=False).preprocess_data()

USDCHF_train, tech_indicator_list = preprocessors.FeatureEngineer(USDCHF_df,
                          tech_indicator_params_map = param_map,
                          use_technical_indicator=True,
                          user_defined_feature=False).preprocess_data()

In [None]:
dfs_list = {
    "EURUSD" : EURUSD_train,
    "GBPUSD" : GBPUSD_train,
    "USDJPY" : USDJPY_train,
    "USDCHF" : USDCHF_train
}

In [None]:
stock_dimension = len(dfs_list)
state_space = 1 + 3*stock_dimension + len(tech_indicator_list)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")
model_input_space = 2 + 4*stock_dimension + len(tech_indicator_list)*stock_dimension
print(f"Input Dimension: {stock_dimension}, State Space: {state_space}")


In [None]:
env_kwargs = {
    "hmax": 10000, 
    "dfs_list" : dfs_list,
    "initial_amount": 100000, 
    "buy_cost_pct": 0, 
    "sell_cost_pct": 0, 
    "state_space": state_space, 
    "tech_indicator_list": tech_indicator_list, 
    "action_space": stock_dimension, 
    "reward_scaling": 1e-4
}

In [None]:
bo_iter = 0
def A2C_train(learning_rate_val, epsilon):

    global bo_iter

    start = time.time()

    bo_iter+=1

    e_train_gym = StockTradingEnv(**env_kwargs)
    print(e_train_gym.data)

    env_train, _ = e_train_gym.get_sb_env()
    
    num_episodes = 3
    total_timesteps = num_episodes * len(EURUSD_train)

    model_name = "a2c"
    MODELS = {"a2c": A2C}
    MODEL_PARAMS = {"n_steps": 20, "ent_coef": 0.001, "learning_rate": learning_rate_val, 'epsilon': epsilon}
    model_a2c = MODELS[model_name](
            policy="MlpLstmPolicy",
            env=env_train,
            tensorboard_log=f"{config.TENSORBOARD_LOG_DIR}/{model_name}",
            verbose=1,
            policy_kwargs=None,
            seed = 1,
            **MODEL_PARAMS,
        )
    trained_a2c, hidden_states = model_a2c.learn(total_timesteps=total_timesteps, tb_log_name='a2c')

    env_train, _ = e_train_gym.get_sb_env()
    episodes_rewards, episode_lengths, rewards_memory_episodes = evaluate_lstm_rewards(trained_a2c, env_train, model_input_space, monthdata, deterministic=False)

    fig, axs = plt.subplots()

    axs.plot(list(accumulate(rewards_memory_episodes[0])))
    axs.set_title("Accumulated rewards (Gains in NOP) against timesteps")
    fig.tight_layout()

    fig.savefig(f'plots/bo_results/{month_data}/iteration_{bo_iter}')

    trained_a2c.save(f"saved_models/{month_data}/model-{bo_iter}")
    if hidden_states is not None:
        with open(f"saved_models/{monthdata}/hidden_state-{bo_iter}.npy", 'wb') as f:
            np.save(f, hidden_states)

    mean_reward = mean(episodes_rewards)
    print("Mean Episodic Reward : ", mean_reward)

    sortino = sortino_ratio(pd.Series(rewards_memory_episodes[0]))
    print("Sortino Ratio :", sortino)

    end = time.time()
    print("Elapsed time: ", end-start)
    
    return sortino

In [None]:
from bayes_opt import BayesianOptimization

pbounds = {'learning_rate_val': (0.00001, 0.002), 'epsilon': (1e-06, 2e-05)}

optimizer = BayesianOptimization(
    f=A2C_train,
    pbounds=pbounds,
    verbose=2, # verbose = 1 prints only when a maximum is observed, verbose = 0 is silent
    random_state=1,
)

In [None]:
from bayes_opt.logger import JSONLogger
from bayes_opt.event import Events

logger = JSONLogger(path=f"results/{monthdata}/BO_logs.json")
optimizer.subscribe(Events.OPTIMIZATION_STEP, logger)

optimizer.maximize(init_points=1, n_iter=2)

# Trade

In [None]:
EURUSD_trade_df=data.load_ohlc_dataset("15min/EURUSD/01_18.csv")
GBPUSD_trade_df=data.load_ohlc_dataset("15min/GBPUSD/01_18.csv")
USDJPY_trade_df=data.load_ohlc_dataset("15min/USDJPY/01_18.csv")
USDCHF_trade_df=data.load_ohlc_dataset("15min/USDJPY/01_18.csv")

EURUSD_trade, tech_indicator_list = preprocessors.FeatureEngineer(EURUSD_trade_df,
                          tech_indicator_params_map = param_map,
                          use_technical_indicator=True,
                          user_defined_feature=False).preprocess_data()
GBPUSD_trade, tech_indicator_list = preprocessors.FeatureEngineer(GBPUSD_trade_df,
                          tech_indicator_params_map = param_map,
                          use_technical_indicator=True,
                          user_defined_feature=False).preprocess_data()
USDJPY_trade, tech_indicator_list = preprocessors.FeatureEngineer(USDJPY_trade_df,
                          tech_indicator_params_map = param_map,
                          use_technical_indicator=True,
                          user_defined_feature=False).preprocess_data()
USDCHF_trade, tech_indicator_list = preprocessors.FeatureEngineer(USDCHF_trade_df,
                          tech_indicator_params_map = param_map,
                          use_technical_indicator=True,
                          user_defined_feature=False).preprocess_data()
dfs_list = {
    "EURUSD" : EURUSD_trade,
    "GBPUSD" : GBPUSD_trade,
    "USDJPY" : USDJPY_trade,
    "USDCHF" : USDCHF_trade
}
stock_dimension = len(dfs_list)
state_space = 1 + 3*stock_dimension + len(tech_indicator_list)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")
model_input_space = 2 + 4*stock_dimension + len(tech_indicator_list)*stock_dimension
print(f"Input Dimension: {stock_dimension}, State Space: {state_space}")

In [None]:
env_kwargs = {
    "hmax": 10000, 
    "dfs_list" : dfs_list,
    "initial_amount": 100000, 
    "buy_cost_pct": 0, 
    "sell_cost_pct": 0, 
    "state_space": state_space, 
    "tech_indicator_list": tech_indicator_list, 
    "action_space": stock_dimension, 
    "reward_scaling": 1e-4
}

In [None]:
e_trade_gym = StockTradingEnv(**env_kwargs)
env_trade, _ = e_trade_gym.get_sb_env()

In [None]:
from stable_baselines3.a2c import A2C

for i, res in enumerate(optimizer.res):
  if res == optimizer.max:
    break
  
trained_a2c = A2C.load(f"saved_models/model-{i+1}")

In [None]:
episodes_rewards, episode_lengths, rewards_memory_episodes = evaluate_lstm_rewards(trained_a2c, env_trade, model_input_space, monthdata, deterministic=False)

In [None]:
from matplotlib import gridspec

def plot_rewards_with_spread(rewards_memory_episodes : list):
    for i in range(len(rewards_memory_episodes)):
        fig = plt.figure()
        gs = gridspec.GridSpec(2, 1, hspace=0)
        ax0 = plt.subplot(gs[0])
        fig.suptitle("Rewards (Gains in NOP)(Top) &\n Bid-Ask spreads(Bottom) against timesteps")
        line0, = ax0.plot(rewards_memory_episodes[i])
        ax1 = plt.subplot(gs[1], sharex = ax0)
        yticks = ax1.yaxis.get_major_ticks()
        yticks[-1].label1.set_visible(False)
        line1, = ax1.plot(e_trade_gym.df[('EURUSD', 'ask_close')]-e_trade_gym.df[('EURUSD','bid_close')], color='b', linestyle='--')
        plt.setp(ax0.get_xticklabels(), visible=False)
        fig.savefig(f'plots/performance_{i}')

In [None]:
plot_rewards_with_spread(rewards_memory_episodes)

In [None]:
episodes_rewards

In [None]:
perf_stats_all = BackTestStats(account_value=rewards_memory_episodes[0])