In [2]:
from finrl import config
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer

In [3]:
from utils import data_split, convert_daily_return_to_pyfolio_ts, extract_weights

In [4]:
import pandas as pd
import numpy as np

In [5]:
import os
from finrl import config
from finrl import config_tickers

if not os.path.exists("./" + config.DATA_SAVE_DIR):
    os.makedirs("./" + config.DATA_SAVE_DIR)
if not os.path.exists("./" + config.TRAINED_MODEL_DIR):
    os.makedirs("./" + config.TRAINED_MODEL_DIR)
if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR):
    os.makedirs("./" + config.TENSORBOARD_LOG_DIR)
if not os.path.exists("./" + config.RESULTS_DIR):
    os.makedirs("./" + config.RESULTS_DIR)

In [6]:
from settings import DOW_30_TICKER, PPO_PARAMS

In [7]:
from data_loader import Yahoo_Downloader

df = Yahoo_Downloader(start_date = '2008-01-01',
                     end_date = '2021-09-02',
                     ticker_list = DOW_30_TICKER).fetch_data()

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

Shape of DataFrame:  (100385, 8)


In [8]:
df.head()

Unnamed: 0,date,open,high,low,close,volume,tic,day
0,2008-01-02,7.116786,7.152143,6.876786,5.89112,1079178800,AAPL,2
1,2008-01-02,46.599998,47.040001,46.259998,33.499519,7934400,AMGN,2
2,2008-01-02,52.09,52.32,50.790001,39.338642,8053700,AXP,2
3,2008-01-02,87.57,87.839996,86.0,63.481625,4303000,BA,2
4,2008-01-02,72.559998,72.669998,70.050003,45.395149,6337800,CAT,2


In [9]:
fe = FeatureEngineer(use_technical_indicator=True, use_turbulence=False, user_defined_feature=False)

df = fe.preprocess_data(df)

Successfully added technical indicators


In [10]:
df.head()

Unnamed: 0,date,open,high,low,close,volume,tic,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma
0,2008-01-02,7.116786,7.152143,6.876786,5.89112,1079178800,AAPL,2,0.0,5.896337,5.888629,100.0,-66.666667,100.0,5.89112,5.89112
3442,2008-01-02,46.599998,47.040001,46.259998,33.499519,7934400,AMGN,2,0.0,5.896337,5.888629,100.0,-66.666667,100.0,33.499519,33.499519
6884,2008-01-02,52.09,52.32,50.790001,39.338642,8053700,AXP,2,0.0,5.896337,5.888629,100.0,-66.666667,100.0,39.338642,39.338642
10326,2008-01-02,87.57,87.839996,86.0,63.481625,4303000,BA,2,0.0,5.896337,5.888629,100.0,-66.666667,100.0,63.481625,63.481625
13768,2008-01-02,72.559998,72.669998,70.050003,45.395149,6337800,CAT,2,0.0,5.896337,5.888629,100.0,-66.666667,100.0,45.395149,45.395149


In [11]:
# add covariance matrix as states
df = df.sort_values(["date", "tic"], ignore_index=True)
df.index = df.date.factorize()[0]

cov_list = []
return_list = []

# look back is one year
lookback = 252
for i in range(lookback, len(df.index.unique())):
    data_lookback = df.loc[i - lookback : i, :]
    price_lookback = data_lookback.pivot_table(index="date", columns="tic", values="close")
    return_lookback = price_lookback.pct_change().dropna()
    return_list.append(return_lookback)

    covs = return_lookback.cov().values
    cov_list.append(covs)


df_cov = pd.DataFrame(
    {"date": df.date.unique()[lookback:], "cov_list": cov_list, "return_list": return_list}
)
df = df.merge(df_cov, on="date")
df = df.sort_values(["date", "tic"]).reset_index(drop=True)

In [12]:
df.head()

Unnamed: 0,date,open,high,low,close,volume,tic,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,cov_list,return_list
0,2008-12-31,3.070357,3.133571,3.047857,2.580616,607541200,AAPL,2,-0.082498,3.089709,2.451164,42.254781,-80.459604,16.129793,2.746056,2.858024,"[[0.001348969289176713, 0.00042841286661066716...",tic AAPL AMGN AXP ...
1,2008-12-31,57.110001,58.220001,57.060001,41.51498,6287200,AMGN,2,0.15554,42.37576,40.536299,51.060615,51.513555,10.432018,40.739552,40.28882,"[[0.001348969289176713, 0.00042841286661066716...",tic AAPL AMGN AXP ...
2,2008-12-31,17.969999,18.75,17.91,14.488909,9625600,AXP,2,-0.92969,18.529416,12.580723,42.554855,-75.453278,25.776759,15.644893,17.505409,"[[0.001348969289176713, 0.00042841286661066716...",tic AAPL AMGN AXP ...
3,2008-12-31,41.59,43.049999,41.5,32.005878,5443100,BA,2,-0.2798,32.174379,28.867839,47.44022,156.9945,5.366299,30.327214,32.389916,"[[0.001348969289176713, 0.00042841286661066716...",tic AAPL AMGN AXP ...
4,2008-12-31,43.700001,45.099998,43.700001,29.472111,6277400,CAT,2,0.652586,30.208138,25.338258,51.205302,98.368728,26.331746,26.566471,26.301739,"[[0.001348969289176713, 0.00042841286661066716...",tic AAPL AMGN AXP ...


In [13]:
train = data_split(df, "2009-01-01", "2020-06-30")

In [14]:
stock_dimension = len(train.tic.unique())
state_space = stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")
tech_indicator_list = ["macd", "rsi_30", "cci_30", "dx_30"]
feature_dimension = len(tech_indicator_list)
print(f"Feature Dimension: {feature_dimension}")

Stock Dimension: 28, State Space: 28
Feature Dimension: 4


In [15]:
from env import StockPortfolioEnv

In [16]:
env_kwargs = {
    "hmax": 100,
    "initial_amount": 1000000,
    "transaction_cost_pct": 0,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": tech_indicator_list,
    "action_space": stock_dimension,
    "reward_scaling": 1e-1,
}

e_train_gym = StockPortfolioEnv(df=train, **env_kwargs)

In [17]:
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))

<class 'stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv'>




In [18]:
from drl_agent import DRLAgent

In [19]:
agent = DRLAgent(env=env_train)

A2C_PARAMS = {"n_steps": 10, "ent_coef": 0.005, "learning_rate": 0.0004}
model_a2c = agent.get_model(model_name="a2c", model_kwargs=A2C_PARAMS)

{'n_steps': 10, 'ent_coef': 0.005, 'learning_rate': 0.0004}
Using cpu device


In [20]:
model_a2c

<stable_baselines3.a2c.a2c.A2C at 0x1682017e0>

In [21]:
trained_a2c = agent.train_model(model=model_a2c, tb_log_name="a2c", total_timesteps=40000)

-------------------------------------
| time/                 |           |
|    fps                | 1061      |
|    iterations         | 100       |
|    time_elapsed       | 0         |
|    total_timesteps    | 1000      |
| train/                |           |
|    entropy_loss       | -39.6     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0004    |
|    n_updates          | 99        |
|    policy_loss        | 4.27e+08  |
|    reward             | 2143720.2 |
|    std                | 0.996     |
|    value_loss         | 1.6e+14   |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 1091      |
|    iterations         | 200       |
|    time_elapsed       | 1         |
|    total_timesteps    | 2000      |
| train/                |           |
|    entropy_loss       | -39.6     |
|    explained_variance | 5.96e-08  |
|    learning_rate      | 0.0004    |
|    n_updat

In [22]:
agent = DRLAgent(env=env_train)

model_ppo = agent.get_model("ppo", model_kwargs=PPO_PARAMS)

{'n_steps': 2048, 'ent_coef': 0.005, 'learning_rate': 0.001, 'batch_size': 128}
Using cpu device


In [23]:
trained_ppo = agent.train_model(model=model_ppo, tb_log_name="ppo", total_timesteps=40000)

----------------------------------
| time/              |           |
|    fps             | 1485      |
|    iterations      | 1         |
|    time_elapsed    | 1         |
|    total_timesteps | 2048      |
| train/             |           |
|    reward          | 4084083.8 |
----------------------------------
begin_total_asset:1000000
end_total_asset:6059117.57936095
Sharpe:  0.9514065572742054
---------------------------------------
| time/                   |           |
|    fps                  | 1307      |
|    iterations           | 2         |
|    time_elapsed         | 3         |
|    total_timesteps      | 4096      |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -39.7     |
|    explained_variance   | 0         |
|    learning_rate        | 0.001     |
|    loss                 | 8.1e+14   |
|    n_updates            | 10        

In [24]:
trade = data_split(df, "2020-07-01", "2021-09-02")
e_trade_gym = StockPortfolioEnv(df=trade, **env_kwargs)

In [25]:
import torch
%matplotlib inline
import plotly.express as px

In [26]:
from pyfolio import timeseries

df_daily_return_a2c, df_actions_a2c = DRLAgent.DRL_prediction(
    model=trained_a2c, environment=e_trade_gym
)
df_daily_return_ppo, df_actions_ppo = DRLAgent.DRL_prediction(
    model=trained_ppo, environment=e_trade_gym
)


Module "zipline.assets" not found; mutltipliers will not be applied to position notionals.


You provided an OpenAI Gym environment. We strongly recommend transitioning to Gymnasium environments. Stable-Baselines3 is automatically wrapping your environments in a compatibility layer, which could potentially cause issues.



begin_total_asset:1000000
end_total_asset:1390478.3706971814
Sharpe:  2.128375461658797
hit end!
begin_total_asset:1000000
end_total_asset:1409504.518459843
Sharpe:  2.1808144476832703
hit end!



You provided an OpenAI Gym environment. We strongly recommend transitioning to Gymnasium environments. Stable-Baselines3 is automatically wrapping your environments in a compatibility layer, which could potentially cause issues.



In [39]:
trained_ppo.save("trained_ppo")

In [40]:
model = agent.get_model("ppo", model_kwargs=PPO_PARAMS)

{'n_steps': 2048, 'ent_coef': 0.005, 'learning_rate': 0.001, 'batch_size': 128}
Using cpu device


In [41]:
model.load("trained_ppo")

<stable_baselines3.ppo.ppo.PPO at 0x168460fd0>

In [42]:
df_daily_return_ppo, df_actions_ppo = DRLAgent.DRL_prediction(
    model=model, environment=e_trade_gym
)


You provided an OpenAI Gym environment. We strongly recommend transitioning to Gymnasium environments. Stable-Baselines3 is automatically wrapping your environments in a compatibility layer, which could potentially cause issues.



begin_total_asset:1000000
end_total_asset:1409325.1891910199
Sharpe:  2.180276058026452
hit end!


In [27]:
time_ind = pd.Series(df_daily_return_a2c.date)
a2c_cumpod = (df_daily_return_a2c.daily_return + 1).cumprod() - 1
ppo_cumpod = (df_daily_return_ppo.daily_return + 1).cumprod() - 1
DRL_strat_a2c = convert_daily_return_to_pyfolio_ts(df_daily_return_a2c)
DRL_strat_ppo = convert_daily_return_to_pyfolio_ts(df_daily_return_ppo)

In [28]:
perf_func = timeseries.perf_stats
perf_stats_all_a2c = perf_func(
    returns=DRL_strat_a2c,
    factor_returns=DRL_strat_a2c,
    positions=None,
    transactions=None,
    turnover_denom="AGB",
)
perf_stats_all_ppo = perf_func(
    returns=DRL_strat_ppo,
    factor_returns=DRL_strat_ppo,
    positions=None,
    transactions=None,
    turnover_denom="AGB",
)

In [29]:
from utils import extract_weights

In [30]:
a2c_weights = extract_weights(df_actions_a2c)
ppo_weights = extract_weights(df_actions_ppo)

In [31]:
baseline_df = Yahoo_Downloader(ticker_list=['^DJI'], start_date='2020-07-01', end_date='2021-09-02').fetch_data()

[*********************100%%**********************]  1 of 1 completed

Shape of DataFrame:  (296, 8)





In [32]:
from finrl.plot import backtest_stats, get_daily_return

In [33]:
baseline_df_stats = backtest_stats(baseline_df, value_col_name="close")
baseline_returns = get_daily_return(baseline_df, value_col_name="close")

Annual return          0.309122
Cumulative returns     0.372161
Annual volatility      0.140541
Sharpe ratio           1.994221
Calmar ratio           3.461310
Stability              0.950010
Max drawdown          -0.089308
Omega ratio            1.394724
Sortino ratio          2.971027
Skew                        NaN
Kurtosis                    NaN
Tail ratio             1.095028
Daily value at risk   -0.016594
dtype: float64


In [34]:
dji_cumpod = (baseline_returns + 1).cumprod() - 1

In [35]:
dji_cumpod

date
2020-07-01 00:00:00+00:00         NaN
2020-07-02 00:00:00+00:00    0.003590
2020-07-06 00:00:00+00:00    0.021452
2020-07-07 00:00:00+00:00    0.006031
2020-07-08 00:00:00+00:00    0.012913
                               ...   
2021-08-26 00:00:00+00:00    0.368298
2021-08-27 00:00:00+00:00    0.377728
2021-08-30 00:00:00+00:00    0.375554
2021-08-31 00:00:00+00:00    0.374034
2021-09-01 00:00:00+00:00    0.372161
Name: daily_return, Length: 296, dtype: float64

In [36]:
from datetime import datetime as dt

import matplotlib.pyplot as plt
import plotly
import plotly.graph_objs as go

trace1_portfolio = go.Scatter(x=time_ind, y=a2c_cumpod, mode="lines", name="A2C")
trace2_portfolio = go.Scatter(x=time_ind, y=ppo_cumpod, mode="lines", name="PPO")
trace3_portfolio = go.Scatter(x=time_ind, y=dji_cumpod, mode="lines", name="DJIA")

In [37]:
fig = go.Figure()
fig.add_trace(trace1_portfolio)
fig.add_trace(trace2_portfolio)

fig.add_trace(trace3_portfolio)

fig.update_layout(
    legend=dict(
        x=0,
        y=1,
        traceorder="normal",
        font=dict(family="sans-serif", size=15, color="black"),
        bgcolor="White",
        bordercolor="white",
        borderwidth=2,
    ),
)
fig.update_layout(
    title={
        #'text': "Cumulative Return using FinRL",
        "y": 0.85,
        "x": 0.5,
        "xanchor": "center",
        "yanchor": "top",
    }
)

fig.update_layout(
    paper_bgcolor="rgba(1,1,0,0)",
    plot_bgcolor="rgba(1, 1, 0, 0)",
    xaxis_title="Date",
    yaxis=dict(titlefont=dict(size=30), title="Cumulative Return"),
    font=dict(
        size=40,
    ),
)
fig.update_layout(font_size=20)
fig.update_traces(line=dict(width=2))

fig.update_xaxes(
    showline=True,
    linecolor="black",
    showgrid=True,
    gridwidth=1,
    gridcolor="LightSteelBlue",
    mirror=True,
)
fig.update_yaxes(
    showline=True,
    linecolor="black",
    showgrid=True,
    gridwidth=1,
    gridcolor="LightSteelBlue",
    mirror=True,
)
fig.update_yaxes(zeroline=True, zerolinewidth=1, zerolinecolor="LightSteelBlue")

fig.show()