In [None]:
%load_ext autoreload
%autoreload

%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import datetime

from finrl.config import config
from finrl.preprocessing.preprocessors import FeatureEngineer
from finrl.preprocessing.data import data_split
from finrl.env.env_stocktrading import StockTradingEnv
from finrl.model.models import DRLAgent
from finrl.trade.backtest import BackTestStats, BaselineStats, BackTestPlot

import sys

In [None]:
from finrl.preprocessing import data, preprocessors
EURUSD_df=data.load_ohlc_dataset("15min/EURUSD/01_17.csv")
GBPUSD_df=data.load_ohlc_dataset("15min/GBPUSD/01_17.csv")

In [None]:
param_map = {
                'sma_9': {'talib_name' : 'SMA', 'time_period' : 9}, #time_period in seoonds
                'ema_9': {'talib_name' : 'EMA', 'time_period' : 9}, #time_period in seoonds
                'sma_21' : {'talib_name' : 'SMA', 'time_period' : 21},
                'ema_21' : {'talib_name' : 'EMA', 'time_period' : 21},
                'bbands_9':{'talib_name':'BBANDS','time_period':9,'nbdevup':2.0,'nbdevdn':2.0},
                'bbands_12':{'talib_name':'BBANDS','time_period':12,'nbdevup':2.0,'nbdevdn':2.0},
                'macd_entry':{'talib_name':'MACD', 'fastperiod':12, 'slowperiod':26,'signalperiod':9},
                'macd_exit':{'talib_name':'MACD', 'fastperiod':19, 'slowperiod':39,'signalperiod':9},
                'stoch':{'talib_name':'STOCH', 'fastk_period':5, 'slowk_period':3, 'slowk_matype':0, 'slowd_period':3, 'slowd_matype':0},
                'rsi_14':{'talib_name':'RSI', 'time_period':14},
                'rsi_4':{'talib_name':'RSI','time_period':4},
                'mom_10':{'talib_name':'MOM', 'time_period':10},
                'stochrsi_14':{'talib_name':'STOCHRSI', 'time_period':14, 'fastk_period':5,'fastd_period':3, 'fastd_matype':0},
                'kama_30':{'talib_name':'KAMA', 'time_period':30},
                't3_5':{'talib_name':'T3', 'time_period':5, 'vfactor':0.7},
                'atr_14':{'talib_name':'ATR', 'time_period':14},
                'natr_14':{'talib_name':'NATR', 'time_period':14},
                'tsf_14':{'talib_name':'TSF', 'time_period':14},
}

In [None]:
from finrl.preprocessing import data, preprocessors

EURUSD_train, tech_indicator_list = preprocessors.FeatureEngineer(EURUSD_df,
                          tech_indicator_params_map = param_map,
                          use_technical_indicator=True,
                          user_defined_feature=False).preprocess_data()

GBPUSD_train, tech_indicator_list = preprocessors.FeatureEngineer(GBPUSD_df,
                          tech_indicator_params_map = param_map,
                          use_technical_indicator=True,
                          user_defined_feature=False).preprocess_data()

In [None]:
dfs_list = {
    "EURUSD" : EURUSD_train,
    "GBPUSD" : GBPUSD_train
}

In [None]:
stock_dimension = len(dfs_list)
state_space = 1 + 3*stock_dimension + len(tech_indicator_list)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")


In [None]:
env_kwargs = {
    "hmax": 10000, 
    "dfs_list" : dfs_list,
    "initial_amount": 100000, 
    "buy_cost_pct": 0, 
    "sell_cost_pct": 0, 
    "state_space": state_space, 
    "tech_indicator_list": tech_indicator_list, 
    "action_space": stock_dimension, 
    "reward_scaling": 1e-4
}

In [None]:
from stable_baselines3.common.evaluation import evaluate_policy

e_train_gym = StockTradingEnv(**env_kwargs)
print(e_train_gym.data)

env_train, _ = e_train_gym.get_sb_env()

agent = DRLAgent(env = env_train)

num_episodes = 5
total_timesteps = num_episodes * len(EURUSD_train)

A2C_PARAMS = {"n_steps": 20, "ent_coef": 0.001, "learning_rate": 0.002, 'rms_prop_eps': 1e-05}
model_a2c = agent.get_model(model_name="a2c",model_kwargs = A2C_PARAMS)
trained_a2c = agent.train_model(model=model_a2c, 
                            tb_log_name='a2c',
                            total_timesteps=total_timesteps)

sb3_episode_rewards = trained_a2c.env.envs[0].get_episode_rewards()
finrl_episode_rewards = e_train_gym.episode_rewards
fig, axs = plt.subplots()

axs.plot(finrl_episode_rewards)
axs.title.set_text("Episode rewards (Gains in NOP) against episodes")
fig.tight_layout()

env_test, _ = e_train_gym.get_sb_env()  

print(evaluate_policy(trained_a2c, env_test, n_eval_episodes=5))

In [None]:
EURUSD_trade_df=data.load_ohlc_dataset("15min/EURUSD/02_17.csv")
GBPUSD_trade_df=data.load_ohlc_dataset("15min/GBPUSD/02_17.csv")
EURUSD_trade, tech_indicator_list = preprocessors.FeatureEngineer(EURUSD_trade_df,
                          tech_indicator_params_map = param_map,
                          use_technical_indicator=True,
                          user_defined_feature=False).preprocess_data()

GBPUSD_trade, tech_indicator_list = preprocessors.FeatureEngineer(GBPUSD_trade_df,
                          tech_indicator_params_map = param_map,
                          use_technical_indicator=True,
                          user_defined_feature=False).preprocess_data()
dfs_list = {
    "EURUSD" : EURUSD_trade,
    "GBPUSD" : GBPUSD_trade
}
stock_dimension = len(dfs_list)
state_space = 1 + 3*stock_dimension + len(tech_indicator_list)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")


In [None]:
env_kwargs = {
    "hmax": 10000, 
    "dfs_list" : dfs_list,
    "initial_amount": 100000, 
    "buy_cost_pct": 0, 
    "sell_cost_pct": 0, 
    "state_space": state_space, 
    "tech_indicator_list": tech_indicator_list, 
    "action_space": stock_dimension, 
    "reward_scaling": 1e-4
}

In [None]:
e_trade_gym = StockTradingEnv(**env_kwargs)
env_trade, _ = e_trade_gym.get_sb_env()
dfs_list = {
    "EURUSD" : EURUSD_trade,
    "GBPUSD" : GBPUSD_trade
}
stock_dimension = len(dfs_list)
state_space = 1 + 3*stock_dimension + len(tech_indicator_list)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")
agent = DRLAgent(env = env_trade)

In [None]:
class EvaluateCallbackInstance:

    def __init__(self):
        self.rewards_arrays = []
        self.reward_array_run = []
        self.result = None
        self.ccy_dims = 0
        self.local = None
        self.count = 0

    def evaluateCallback(self, locals_, globals_):

        if self.count == 0 :
            self.local = np.copy(locals_["obs"])
            self.count += 1

        self.reward_array_run.append(locals_["reward"][0])
        
        if locals_["done"]:
            self.rewards_arrays.append(self.reward_array_run)
            self.reward_array_run = []

In [None]:
e_trade_gym.df["EURUSD"].iloc[:, 0:15]

In [None]:
from stable_baselines3.common.vec_env import VecEnv
from typing import List, Tuple

def evaluate_policy_rewards(
    model: "base_class.BaseAlgorithm",
    env: VecEnv,
    n_eval_episodes: int = 10,
    deterministic: bool = True,
    render: bool = False
) -> Tuple[List[float], List[int], List[int]]:

    if isinstance(env, VecEnv):
            assert env.num_envs == 1, "You must pass only one environment when using this function"
    episode_rewards, episode_lengths, rewards_memory_episodes= [], [], []
    for i in range(n_eval_episodes):

        if not isinstance(env, VecEnv) or i == 0:
            obs = env.reset()
        rewards_memory = []
        done, state = False, None
        episode_reward = 0.0
        episode_length = 0
        while not done:
            action, state = model.predict(obs, state=state, deterministic=deterministic)
            obs, reward, done, _info = env.step(action)
            rewards_memory.append(reward)
            episode_reward += reward
            episode_length += 1
            if render:
                env.render()
        rewards_memory_episodes.append(rewards_memory)
        episode_rewards.append(episode_reward)
        episode_lengths.append(episode_length)

    return episode_rewards, episode_lengths, rewards_memory_episodes


    

In [None]:
evaluate_policy_rewards(trained_a2c, env_trade, deterministic=False)

In [None]:
e_trade_gym.df

In [None]:
obs

In [None]:
from stable_baselines3.common.evaluation import evaluate_policy

instance = EvaluateCallbackInstance()
mean_episode_rewards, std_episode_rewards = evaluate_policy(model = trained_a2c, env = env_trade, n_eval_episodes=1, callback=instance.evaluateCallback) 

In [None]:
mean_episode_rewards

In [None]:
from itertools import accumulate

asd = list(accumulate(instance.rewards_arrays[-1]))

In [None]:
instance.rewards_arrays[-1]

In [None]:
asd[-1]

In [None]:
instance.rewards_arrays

In [None]:
locals().keys()