In [1]:
from drl_agent import DRLAgent
from env import StockPortfolioEnv
from data_loader import Yahoo_Downloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer
import pandas as pd
from utils import data_split
from pyfolio import timeseries



In [4]:
from settings import MODEL_MAP

In [35]:
def get_data(tickers, start_date, end_date):
    df = Yahoo_Downloader(ticker_list=tickers, start_date=start_date, end_date=end_date).fetch_data()
    fe = FeatureEngineer(use_technical_indicator=True, use_turbulence=False, user_defined_feature=False)
    df = fe.preprocess_data(df)

    # add covariance matrix as states
    df = df.sort_values(["date", "tic"], ignore_index=True)
    df.index = df.date.factorize()[0]

    cov_list = []
    return_list = []

    # look back is one year
    lookback = 252
    for i in range(lookback, len(df.index.unique())):
        data_lookback = df.loc[i - lookback : i, :]
        price_lookback = data_lookback.pivot_table(index="date", columns="tic", values="close")
        return_lookback = price_lookback.pct_change().dropna()
        return_list.append(return_lookback)

        covs = return_lookback.cov().values
        cov_list.append(covs)

    df_cov = pd.DataFrame(
        {"date": df.date.unique()[lookback:], "cov_list": cov_list, "return_list": return_list}
    )
    df = df.merge(df_cov, on="date")
    df = df.sort_values(["date", "tic"]).reset_index(drop=True)
    df = data_split(df, start_date, end_date)
    return df

In [73]:
def generate_environment(
    df,
    initial_amount,
    transaction_cost_pct=0,
    tech_indicator_list=["macd", "rsi_30", "cci_30", "dx_30"],
):
    stock_dimension = len(df.tic.unique())
    state_space = stock_dimension
    env_kwargs = {
        "hmax": 100,
        "initial_amount": initial_amount,
        "transaction_cost_pct": transaction_cost_pct,
        "state_space": state_space,
        "stock_dim": stock_dimension,
        "tech_indicator_list": tech_indicator_list,
        "action_space": stock_dimension,
        "reward_scaling": 1e-1,
    }
    e_gym = StockPortfolioEnv(df=df, **env_kwargs)
    return e_gym

In [74]:
def get_agent(env):
    return DRLAgent(env)

In [66]:
def get_model(model_name, agent):
    model_params = MODEL_MAP[model_name]
    model = agent.get_model(model_name, model_kwargs = model_params)
    return model

In [45]:
from settings import DOW_30_TICKER

In [39]:
df = get_data(tickers=DOW_30_TICKER, start_date="2004-01-01", end_date="2021-01-01")

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

Shape of DataFrame:  (123395, 8)
Successfully added technical indicators


In [75]:
env = generate_environment(df, initial_amount=1000000)

In [77]:
env_train, _ = env.get_sb_env()
agent = get_agent(env_train)
model = get_model("ppo", agent)

{'n_steps': 2048, 'ent_coef': 0.005, 'learning_rate': 0.001, 'batch_size': 128}
Using cpu device




In [61]:
trained_ppo = agent.train_model(model=model, tb_log_name="ppo", total_timesteps=40000)

----------------------------------
| time/              |           |
|    fps             | 1233      |
|    iterations      | 1         |
|    time_elapsed    | 1         |
|    total_timesteps | 2048      |
| train/             |           |
|    reward          | 2368749.2 |
----------------------------------
begin_total_asset:1000000
end_total_asset:7489155.539859982
Sharpe:  0.7521440833312614
---------------------------------------
| time/                   |           |
|    fps                  | 1076      |
|    iterations           | 2         |
|    time_elapsed         | 3         |
|    total_timesteps      | 4096      |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -38.3     |
|    explained_variance   | 0         |
|    learning_rate        | 0.001     |
|    loss                 | 3.28e+14  |
|    n_updates            | 10       

In [62]:
trained_ppo.save("ppo_dow30_2004_2021")

In [68]:
model = get_model("a2c", agent)

{'n_steps': 10, 'ent_coef': 0.005, 'learning_rate': 0.0004}
Using cpu device


In [69]:
trained_a2c = agent.train_model(model=model, tb_log_name="a2c", total_timesteps=40000)

-------------------------------------
| time/                 |           |
|    fps                | 864       |
|    iterations         | 100       |
|    time_elapsed       | 1         |
|    total_timesteps    | 1000      |
| train/                |           |
|    entropy_loss       | -38.3     |
|    explained_variance | 0         |
|    learning_rate      | 0.0004    |
|    n_updates          | 99        |
|    policy_loss        | 2.07e+08  |
|    reward             | 1051628.5 |
|    std                | 0.998     |
|    value_loss         | 3.82e+13  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 914       |
|    iterations         | 200       |
|    time_elapsed       | 2         |
|    total_timesteps    | 2000      |
| train/                |           |
|    entropy_loss       | -38.2     |
|    explained_variance | 0         |
|    learning_rate      | 0.0004    |
|    n_updat

In [70]:
trained_a2c.save("a2c_dow30_2004_2021")

In [78]:
df_daily_return_a2c, df_actions_a2c = DRLAgent.DRL_prediction(
    model=trained_a2c, environment=env
)

begin_total_asset:1000000
end_total_asset:8187789.915619479
Sharpe:  0.7830369430486934
hit end!


In [106]:
from utils import convert_daily_return_to_pyfolio_ts, extract_weights

In [108]:
import plotly.graph_objs as go

In [115]:
def prediction_plot(models, env):
    traces = []
    for model in models:
        df_daily_return, df_actions = DRLAgent.DRL_prediction(model=model, environment=env)
        df_cumprod = (df_daily_return.daily_return + 1).cumprod() - 1
        time_ind = pd.Series(df_daily_return.date)
        trace_portfolio = go.Scatter(x=time_ind, y=df_cumprod, mode="lines", name=model.__class__.__name__)
        traces.append(trace_portfolio)

    fig = go.Figure()
    for trace in traces:
        fig.add_trace(trace)
    fig.update_layout(
        legend=dict(
            x=0,
            y=1,
            traceorder="normal",
            font=dict(family="sans-serif", size=15, color="black"),
            bgcolor="White",
            bordercolor="white",
            borderwidth=2,
        ),
    )
    fig.update_layout(
        title={
            #'text': "Cumulative Return using FinRL",
            "y": 0.85,
            "x": 0.5,
            "xanchor": "center",
            "yanchor": "top",
        }
    )

    fig.update_layout(
        paper_bgcolor="rgba(1,1,0,0)",
        plot_bgcolor="rgba(1, 1, 0, 0)",
        xaxis_title="Date",
        yaxis=dict(titlefont=dict(size=30), title="Cumulative Return"),
        font=dict(
            size=40,
        ),
    )
    fig.update_layout(font_size=20)
    fig.update_traces(line=dict(width=2))

    fig.update_xaxes(
        showline=True,
        linecolor="black",
        showgrid=True,
        gridwidth=1,
        gridcolor="LightSteelBlue",
        mirror=True,
    )
    fig.update_yaxes(
        showline=True,
        linecolor="black",
        showgrid=True,
        gridwidth=1,
        gridcolor="LightSteelBlue",
        mirror=True,
    )
    fig.update_yaxes(zeroline=True, zerolinewidth=1, zerolinecolor="LightSteelBlue")
    return fig

In [116]:
prediction_plot([trained_ppo, trained_a2c], env)


You provided an OpenAI Gym environment. We strongly recommend transitioning to Gymnasium environments. Stable-Baselines3 is automatically wrapping your environments in a compatibility layer, which could potentially cause issues.



begin_total_asset:1000000
end_total_asset:7608292.692738084
Sharpe:  0.75865048405573
hit end!



You provided an OpenAI Gym environment. We strongly recommend transitioning to Gymnasium environments. Stable-Baselines3 is automatically wrapping your environments in a compatibility layer, which could potentially cause issues.



begin_total_asset:1000000
end_total_asset:8187789.915619479
Sharpe:  0.7830369430486934
hit end!
