In [None]:
%load_ext autoreload
%autoreload

%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import datetime
from statistics import mean
from itertools import accumulate
from empyrical import sortino_ratio

from finrl.config import config
from finrl.preprocessing.preprocessors import FeatureEngineer
from finrl.preprocessing import data
from finrl.preprocessing.data import data_split
from finrl.trade.backtest import evaluate_policy_rewards, evaluate_lstm_rewards
from finrl.env.env_stocktrading import StockTradingEnv
from finrl.model.models import DRLAgent
from finrl.trade.backtest import BackTestStats, BaselineStats, BackTestPlot

import sys

In [None]:
EURUSD_df=data.load_ohlc_dataset("15min/EURUSD/01_17.csv")
GBPUSD_df=data.load_ohlc_dataset("15min/GBPUSD/01_17.csv")
USDJPY_df=data.load_ohlc_dataset("15min/USDJPY/01_17.csv")

In [None]:
param_map = {
                'sma_9': {'talib_name' : 'SMA', 'time_period' : 9}, #time_period in seoonds
                'ema_9': {'talib_name' : 'EMA', 'time_period' : 9}, #time_period in seoonds
                'sma_21' : {'talib_name' : 'SMA', 'time_period' : 21},
                'ema_21' : {'talib_name' : 'EMA', 'time_period' : 21},
                'bbands_9':{'talib_name':'BBANDS','time_period':9,'nbdevup':2.0,'nbdevdn':2.0},
                'bbands_12':{'talib_name':'BBANDS','time_period':12,'nbdevup':2.0,'nbdevdn':2.0},
                'macd_entry':{'talib_name':'MACD', 'fastperiod':12, 'slowperiod':26,'signalperiod':9},
                'macd_exit':{'talib_name':'MACD', 'fastperiod':19, 'slowperiod':39,'signalperiod':9},
                'stoch':{'talib_name':'STOCH', 'fastk_period':5, 'slowk_period':3, 'slowk_matype':0, 'slowd_period':3, 'slowd_matype':0},
                'rsi_14':{'talib_name':'RSI', 'time_period':14},
                'rsi_4':{'talib_name':'RSI','time_period':4},
                'mom_10':{'talib_name':'MOM', 'time_period':10},
                'stochrsi_14':{'talib_name':'STOCHRSI', 'time_period':14, 'fastk_period':5,'fastd_period':3, 'fastd_matype':0},
                'kama_30':{'talib_name':'KAMA', 'time_period':30},
                't3_5':{'talib_name':'T3', 'time_period':5, 'vfactor':0.7},
                'atr_14':{'talib_name':'ATR', 'time_period':14},
                'natr_14':{'talib_name':'NATR', 'time_period':14},
                'tsf_14':{'talib_name':'TSF', 'time_period':14},
}

In [None]:
from finrl.preprocessing import data, preprocessors

EURUSD_train, tech_indicator_list = preprocessors.FeatureEngineer(EURUSD_df,
                          tech_indicator_params_map = param_map,
                          use_technical_indicator=True,
                          user_defined_feature=False).preprocess_data()

GBPUSD_train, tech_indicator_list = preprocessors.FeatureEngineer(GBPUSD_df,
                          tech_indicator_params_map = param_map,
                          use_technical_indicator=True,
                          user_defined_feature=False).preprocess_data()

USDJPY_train, tech_indicator_list = preprocessors.FeatureEngineer(USDJPY_df,
                          tech_indicator_params_map = param_map,
                          use_technical_indicator=True,
                          user_defined_feature=False).preprocess_data()

In [None]:
dfs_list = {
    "EURUSD" : EURUSD_train,
    "GBPUSD" : GBPUSD_train,
    "USDJPY" : USDJPY_train
}

In [None]:
stock_dimension = len(dfs_list)
state_space = 1 + 3*stock_dimension + len(tech_indicator_list)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")
model_input_space = 2 + 4*stock_dimension + len(tech_indicator_list)*stock_dimension
print(f"Input Dimension: {stock_dimension}, State Space: {state_space}")


In [None]:
env_kwargs = {
    "hmax": 10000, 
    "dfs_list" : dfs_list,
    "initial_amount": 100000, 
    "buy_cost_pct": 0, 
    "sell_cost_pct": 0, 
    "state_space": state_space, 
    "tech_indicator_list": tech_indicator_list, 
    "action_space": stock_dimension, 
    "reward_scaling": 1e-4
}

In [None]:
import time
from a2c import A2C

start = time.time()

e_train_gym = StockTradingEnv(**env_kwargs)
print(e_train_gym.data)

env_train, _ = e_train_gym.get_sb_env()

agent = DRLAgent(env = env_train)

num_episodes = 3
total_timesteps = num_episodes * len(EURUSD_train)

model_name = "a2c"
MODELS = {"a2c": A2C}
A2C_PARAMS = {"n_steps": 20, "ent_coef": 0.001, "learning_rate": 0.0008398737893581223, 'epsilon': 1.4686165375401005e-05}
model_a2c = MODELS[model_name](
            policy="MlpLstmPolicy",
            env=env_train,
            model_input_space=model_input_space,
            tensorboard_log=f"{config.TENSORBOARD_LOG_DIR}/{model_name}",
            verbose=1,
            policy_kwargs=None,
            seed = 1,
            **A2C_PARAMS,
        )
trained_a2c = agent.train_model(model=model_a2c, 
                            tb_log_name='a2c',
                            total_timesteps=total_timesteps)

env_train, _ = e_train_gym.get_sb_env()
episodes_rewards, episode_lengths, rewards_memory_episodes = evaluate_policy_rewards(trained_a2c, env_train, deterministic=False)

fig, axs = plt.subplots()

axs.plot(list(accumulate(rewards_memory_episodes[0])))
axs.set_title("Accumulated rewards (Gains in NOP) against timesteps")
fig.tight_layout()


mean_reward = mean(episodes_rewards)
print("Mean Episodic Reward : ", mean_reward)

sortino = sortino_ratio(pd.Series(rewards_memory_episodes[0]))
print("Sortino Ratio :", sortino)

end = time.time()
print("Elapsed time: ", end-start)

In [None]:
EURUSD_trade_df=data.load_ohlc_dataset("15min/EURUSD/01_18.csv")
GBPUSD_trade_df=data.load_ohlc_dataset("15min/GBPUSD/01_18.csv")
USDJPY_trade_df=data.load_ohlc_dataset("15min/USDJPY/01_18.csv")
EURUSD_trade, tech_indicator_list = preprocessors.FeatureEngineer(EURUSD_trade_df,
                          tech_indicator_params_map = param_map,
                          use_technical_indicator=True,
                          user_defined_feature=False).preprocess_data()

GBPUSD_trade, tech_indicator_list = preprocessors.FeatureEngineer(GBPUSD_trade_df,
                          tech_indicator_params_map = param_map,
                          use_technical_indicator=True,
                          user_defined_feature=False).preprocess_data()
USDJPY_trade, tech_indicator_list = preprocessors.FeatureEngineer(USDJPY_trade_df,
                          tech_indicator_params_map = param_map,
                          use_technical_indicator=True,
                          user_defined_feature=False).preprocess_data()
dfs_list = {
    "EURUSD" : EURUSD_trade,
    "GBPUSD" : GBPUSD_trade,
    "USDJPY" : USDJPY_trade
}
stock_dimension = len(dfs_list)
state_space = 1 + 3*stock_dimension + len(tech_indicator_list)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")


In [None]:
env_kwargs = {
    "hmax": 10000, 
    "dfs_list" : dfs_list,
    "initial_amount": 100000, 
    "buy_cost_pct": 0, 
    "sell_cost_pct": 0, 
    "state_space": state_space, 
    "tech_indicator_list": tech_indicator_list, 
    "action_space": stock_dimension, 
    "reward_scaling": 1e-4
}

In [None]:
e_trade_gym = StockTradingEnv(**env_kwargs)
env_trade, _ = e_trade_gym.get_sb_env()

In [None]:
episodes_rewards, episode_lengths, rewards_memory_episodes = evaluate_lstm_rewards(trained_a2c, env_trade, deterministic=False)

In [None]:
from stable_baselines3.common.evaluation import evaluate_policy

def evaluateCallback(locals_, globals_):
    print(locals_)
    if locals_["done"]:
        print("locals")
        print(locals_)
        print("globals")
        print(globals_)
        

evaluate_policy(trained_a2c, env_trade,n_eval_episodes=1, callback=evaluateCallback, deterministic=False)

In [None]:
import numpy as np
obs = np.array([[1,2,3]])
last_action = np.array([[1,2]])
last_reward = np.array([[3]])

In [None]:
np.concatenate([obs, last_action, last_reward], axis = 1)