In [1]:
# For vast.ai
# !git clone https://github.com/sobiodum/quantumai2.git
# !pip3 install --no-cache-dir -r requirements.txt

In [2]:
import gymnasium as gym
import pandas as pd
import time
import optuna
from optuna.visualization import plot_optimization_history, plot_param_importances
import joblib
import numpy as np
from stable_baselines3 import A2C
from env.flo_portfolio import FlorianPortfolioEnv
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.callbacks import ProgressBarCallback,CallbackList, EvalCallback
from models.callbacks import CustomCallBack, HParamCallback
from models.models import DRLAgent
from gymnasium import spaces
from pyfolio import timeseries
import pyfolio
from plot.plot import convert_daily_return_to_pyfolio_ts, convert_account_value_to_pyfolio_ts, get_baseline, backtest_stats, get_daily_return
from hyperoptimizer.optunaoptimizer import optimize_optuna



In [3]:
df = pd.read_csv('SPX_2.csv', skiprows=6).dropna(axis=0)
df['date'] = pd.to_datetime(df['date']).dt.strftime('%Y-%m-%d')
df

Unnamed: 0,date,close,open,low,high,RSI14,RSI30,RSI3,MA200,MA50,MA20,tic
1,1990-01-03,358.76,359.66,357.89,360.59,64.92,65.48,88.64,332.20,344.28,349.39,SPX
2,1990-01-04,355.67,358.74,352.89,358.76,58.72,61.98,56.59,332.52,344.49,349.69,SPX
3,1990-01-05,352.20,355.65,351.35,355.67,52.64,58.36,35.17,332.83,344.66,349.88,SPX
4,1990-01-08,353.79,352.19,350.54,354.24,54.94,59.48,48.55,333.16,344.89,350.19,SPX
5,1990-01-09,349.62,353.83,349.61,354.17,48.31,55.43,26.79,333.45,345.12,350.23,SPX
...,...,...,...,...,...,...,...,...,...,...,...,...
8450,2023-07-19,4565.72,4563.87,4557.48,4578.43,75.00,67.61,94.86,4043.67,4311.72,4435.98,SPX
8451,2023-07-20,4534.87,4554.38,4527.56,4564.74,67.94,64.81,53.04,4048.41,4319.65,4444.44,SPX
8452,2023-07-21,4536.34,4550.16,4535.79,4555.00,68.09,64.88,54.47,4052.70,4327.99,4452.17,SPX
8453,2023-07-24,4554.64,4543.39,4541.29,4563.41,70.02,65.78,71.01,4056.52,4336.33,4462.48,SPX


In [4]:
train_start = "1990-01-03"
train_end = "2015-12-31"
validate_start = "2016-01-01"
validate_end = "2020-12-31"
test_start = "12021-01-01"
test_end = "2023-07-25"

In [5]:
def data_split(df, start, end, target_date_col="date"):
    data = df[(df[target_date_col] >= start) & (df[target_date_col] < end)]
    data = data.sort_values([target_date_col, "tic"], ignore_index=True)
    data.index = data[target_date_col].factorize()[0]
    return data

In [6]:
train = data_split(df, train_start, train_end)
validate = data_split(df, validate_start, validate_end)

In [7]:
# stock_dimension = len(train.tic.unique())
# state_space = stock_dimension

In [8]:
df.columns

Index(['date', 'close', 'open', 'low', 'high', 'RSI14', 'RSI30', 'RSI3',
       'MA200', 'MA50', 'MA20', 'tic'],
      dtype='object')

In [21]:
indicators = ['RSI14', 'RSI30', 'RSI3','MA200', 'MA50', 'MA20']
stock_dimension = len(train.tic.unique())
state_space = 1+2*stock_dimension + len(indicators)
additional_price_info= ['open', 'low', 'high']
env_kwargs = {
    "initial_amount": 1000000, 
    "trade_cost_pct": 0.001, 
    "state_space": state_space, 
    "stock_dim": stock_dimension,
    "indicators": indicators, 
    "action_space": stock_dimension,
    "short_selling_allowed": False,
    "take_leverage_allowed": True,
    "reward_scaling": 0.00022786244568524788, # the magnitude of rewards can significantly affect the learning process. If the rewards are too large, they can cause the learning algorithm to become unstable. On the other hand, if the rewards are too small, the agent might not learn effectively because the rewards don't provide a strong enough signal.
    "hmax": 100, #that should be increased when trading SPX
    # Not for optimization
    "make_plots": False,
    "num_stock_shares": [2700], #number of initioal shares
    "model_name": "A-2C",
    "mode": "training", #can be anything, just for plots
    "iteration": "1000"#can be anything, just for plots
    
}




In [22]:

best_kwargs = {
    'learning_rate': 0.00033286121728969784, 
    'n_steps': 4323, 
    'gamma': 0.9936290127775395, 
    'gae_lambda': 0.9278567210820339, 
    'ent_coef': 0.026456032771495535, 
    'vf_coef': 0.13422186269360714, 
    'max_grad_norm': 0.20075683668791203, 
    'reward_scaling': 6.994468092813741e-05, 
    'short_selling_allowed': False, 
    'take_leverage_allowed': False
    }

best_kwargs_2 = {
    "learning_rate": 8.129895404502722e-05,
    "n_steps": 1963,
    # "total_timesteps": 13332,
    "gamma": 0.9397975225298403,
    "gae_lambda": 0.9506776772576876,
    "ent_coef": 0.023630640628023298,
    "vf_coef": 0.17576363274940895,
    "max_grad_norm": 0.21362181690007082,
    # "reward_scaling": 0.00022786244568524788,
    # "short_selling_allowed": False,
    # "take_leverage_allowed": True,

}

#initate env
env = FlorianPortfolioEnv(df=train, **env_kwargs)
# check_env(env, warn=True)

In [11]:
#EvalCallback: Evaluate periodically the performance of an agent, using a separate test environment. It will save the best model if best_model_save_path folder is specified and save the evaluations results in a numpy archive (evaluations.npz) if log_path folder is specified.
eval_callback = EvalCallback(eval_env=env, best_model_save_path="./logs/", eval_freq=500, deterministic=True, render=False)
progress_callback = ProgressBarCallback()
params_callback = HParamCallback()


In [12]:
#Chaining Callbacks
callbacks = CallbackList([eval_callback,progress_callback, params_callback ])

In [13]:
env_train = FlorianPortfolioEnv(df=train, **env_kwargs)
env_validate = FlorianPortfolioEnv(df=validate, **env_kwargs)

# study = optimize_optuna(env_train, env_validate, callbacks=eval_callback, n_trial_runs=100)

In [14]:
# optuna.visualization.plot_optimization_history(study)

In [15]:
# optuna.visualization.plot_parallel_coordinate(study)

In [16]:

#Define and Train the agend
# model = A2C('MlpPolicy', env, verbose=0, tensorboard_log="./tensorboard/a2c_florian/")
# model.learn(total_timesteps=50000, callback=callbacks, tb_log_name="a2c_florian_any_name")
# Once the learn function is called, you can monitor the RL agent during or after the training, with the following bash command:
# tensorboard --logdir ./a2c_cartpole_tensorboard/

In [23]:
A2C_PARAMS = {"n_steps": 5, "ent_coef": 0.005, "learning_rate": 0.0002}
model = A2C('MlpPolicy', env, verbose=0, tensorboard_log="./tensorboard/a2c_florian/", **best_kwargs_2)
trained_model = model.learn(total_timesteps=13332, callback=None, tb_log_name="a2c_florian_any_name")
# trained_model = model.learn(callback=None, tb_log_name="a2c_florian_any_name")


In [24]:
trained_model.save("test_model_after_optuna")

In [None]:
# loaded_model = A2C.load("test_model_after_optuna")

Trading

In [26]:
test = data_split(df,test_start, test_end)
env_trade = FlorianPortfolioEnv(df=test, **env_kwargs)

In [27]:
df_account_memory, df_actions, df_daily_return = DRLAgent.DRL_prediction_flo(model=trained_model,
                        environment = env_trade)

hit end!


In [28]:
df_account_memory

Unnamed: 0,date,account_value
0,1990-01-03,1.968652e+06
1,1990-01-04,9.999931e+05
2,1990-01-05,9.999785e+05
3,1990-01-08,9.999873e+05
4,1990-01-09,9.999532e+05
...,...,...
8448,2023-07-18,1.119213e+07
8449,2023-07-19,1.121852e+07
8450,2023-07-20,1.114272e+07
8451,2023-07-21,1.114633e+07


In [29]:
df_daily_return

Unnamed: 0,date,daily_return
0,1990-01-03,0.000000
1,1990-01-04,-0.000007
2,1990-01-05,-0.000015
3,1990-01-08,0.000009
4,1990-01-09,-0.000034
...,...,...
8448,2023-07-18,0.007117
8449,2023-07-19,0.002358
8450,2023-07-20,-0.006757
8451,2023-07-21,0.000324


In [30]:
df_actions

Unnamed: 0,date,actions
0,1990-01-03,[2]
1,1990-01-04,[2]
2,1990-01-05,[2]
3,1990-01-08,[2]
4,1990-01-09,[2]
...,...,...
8447,2023-07-17,[0]
8448,2023-07-18,[0]
8449,2023-07-19,[0]
8450,2023-07-20,[0]


In [31]:
DRL_strat = convert_daily_return_to_pyfolio_ts(df_daily_return)
perf_func = timeseries.perf_stats 
perf_stats_all = perf_func( returns=DRL_strat, 
                              factor_returns=DRL_strat, 
                                positions=None, transactions=None, turnover_denom="AGB")

  stats = pd.Series()


In [32]:
df_daily_return

Unnamed: 0,date,daily_return
0,1990-01-03,0.000000
1,1990-01-04,-0.000007
2,1990-01-05,-0.000015
3,1990-01-08,0.000009
4,1990-01-09,-0.000034
...,...,...
8448,2023-07-18,0.007117
8449,2023-07-19,0.002358
8450,2023-07-20,-0.006757
8451,2023-07-21,0.000324


In [33]:
perf_stats_all

Annual return           0.074655
Cumulative returns     10.191294
Annual volatility       0.177635
Sharpe ratio            0.494362
Calmar ratio            0.131511
Stability               0.875382
Max drawdown           -0.567674
Omega ratio             1.101750
Sortino ratio           0.697864
Skew                   -0.189307
Kurtosis               11.655681
Tail ratio              0.948751
Daily value at risk    -0.022031
Alpha                   0.000000
Beta                    1.000000
dtype: float64

In [34]:
print("==============Get Baseline Stats===========")
baseline_df = get_baseline(
        ticker="^SPX", 
        start = df_daily_return.loc[0,'date'],
        end = df_daily_return.loc[len(df_daily_return)-1,'date'])

stats = backtest_stats(baseline_df, value_col_name = 'close')

[*********************100%***********************]  1 of 1 completed
Shape of DataFrame:  (8452, 8)
Annual return           0.078583
Cumulative returns     11.644497
Annual volatility       0.182350
Sharpe ratio            0.506299
Calmar ratio            0.138411
Stability               0.873883
Max drawdown           -0.567754
Omega ratio             1.100300
Sortino ratio           0.715581
Skew                         NaN
Kurtosis                     NaN
Tail ratio              0.950741
Daily value at risk    -0.022608
dtype: float64


  stats = pd.Series()


In [35]:
%matplotlib inline
baseline_df = get_baseline(
        ticker='^SPX', start=df_daily_return.loc[0,'date'], end=test_end
    )

baseline_returns = get_daily_return(baseline_df, value_col_name="close")

with pyfolio.plotting.plotting_context(font_scale=1.1):
        pyfolio.create_full_tear_sheet(returns = DRL_strat,
        
                                       benchmark_rets=baseline_returns, set_context=False)


[*********************100%***********************]  1 of 1 completed
Shape of DataFrame:  (8453, 8)


  stats = pd.Series()
  for stat, value in perf_stats[column].iteritems():


Start date,1990-01-03,1990-01-03
End date,2023-07-24,2023-07-24
Total months,402,402
Unnamed: 0_level_3,Backtest,Unnamed: 2_level_3
Annual return,7.5%,
Cumulative returns,1019.1%,
Annual volatility,17.8%,
Sharpe ratio,0.49,
Calmar ratio,0.13,
Stability,0.88,
Max drawdown,-56.8%,
Omega ratio,1.10,
Sortino ratio,0.70,
Skew,-0.19,


AttributeError: 'numpy.int64' object has no attribute 'to_pydatetime'