In [15]:
import gymnasium as gym
import pandas as pd
import numpy as np
from stable_baselines3 import A2C
from env.flo_portfolio import FlorianPortfolioEnv
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.callbacks import ProgressBarCallback,CallbackList, EvalCallback
from models.callbacks import CustomCallBack, HParamCallback
from gymnasium import spaces

In [2]:
df = pd.read_csv('/Users/floriankockler/My Drive/SPX_2.csv', skiprows=6).dropna(axis=0)
df['date'] = pd.to_datetime(df['date']).dt.strftime('%Y-%m-%d')
df

Unnamed: 0,date,close,open,low,high,RSI14,RSI30,RSI3,MA200,MA50,MA20,tic
1,1990-01-03,358.76,359.66,357.89,360.59,64.92,65.48,88.64,332.20,344.28,349.39,SPX
2,1990-01-04,355.67,358.74,352.89,358.76,58.72,61.98,56.59,332.52,344.49,349.69,SPX
3,1990-01-05,352.20,355.65,351.35,355.67,52.64,58.36,35.17,332.83,344.66,349.88,SPX
4,1990-01-08,353.79,352.19,350.54,354.24,54.94,59.48,48.55,333.16,344.89,350.19,SPX
5,1990-01-09,349.62,353.83,349.61,354.17,48.31,55.43,26.79,333.45,345.12,350.23,SPX
...,...,...,...,...,...,...,...,...,...,...,...,...
8450,2023-07-19,4565.72,4563.87,4557.48,4578.43,75.00,67.61,94.86,4043.67,4311.72,4435.98,SPX
8451,2023-07-20,4534.87,4554.38,4527.56,4564.74,67.94,64.81,53.04,4048.41,4319.65,4444.44,SPX
8452,2023-07-21,4536.34,4550.16,4535.79,4555.00,68.09,64.88,54.47,4052.70,4327.99,4452.17,SPX
8453,2023-07-24,4554.64,4543.39,4541.29,4563.41,70.02,65.78,71.01,4056.52,4336.33,4462.48,SPX


In [3]:
train_start = "1990-01-03"
train_end = "2015-12-31"

In [4]:
def data_split(df, start, end, target_date_col="date"):
    data = df[(df[target_date_col] >= start) & (df[target_date_col] < end)]
    data = data.sort_values([target_date_col, "tic"], ignore_index=True)
    data.index = data[target_date_col].factorize()[0]
    return data

In [5]:
train = data_split(df, train_start, train_end)

In [6]:
stock_dimension = len(train.tic.unique())
state_space = stock_dimension

In [7]:
df.columns

Index(['date', 'close', 'open', 'low', 'high', 'RSI14', 'RSI30', 'RSI3',
       'MA200', 'MA50', 'MA20', 'tic'],
      dtype='object')

In [12]:
stock_dimension

1

In [16]:
indicators = ['RSI14', 'RSI30', 'RSI3','MA200', 'MA50', 'MA20','open', 'low', 'high']
env_kwargs = {
    "initial_amount": 1000000, 
    "trade_cost_pct": 0.001, 
    "state_space": (state_space+len(indicators)), 
    "stock_dim": stock_dimension,
    # "eco_indicator_list": eco_indicator_list, 
    # "tech_indicator_list": new_tech_indics, 
    "indicators": indicators, 
    # "tech_indicator_list": config.INDICATORS, 
    "action_space": (stock_dimension+len(indicators)),
    "short_selling_allowed": False,
    "take_leverage_allowed": False,
    ## FINE TUNING NEEDED
    "reward_scaling": 1e-4, # the magnitude of rewards can significantly affect the learning process. If the rewards are too large, they can cause the learning algorithm to become unstable. On the other hand, if the rewards are too small, the agent might not learn effectively because the rewards don't provide a strong enough signal.
    "hmax": 100, #that should be increased when trading SPX

    # Not for optimization
    "make_plots": False,
    "num_stock_shares": [50], #number of initioal shares
    "model_name": "A-2C",
    "mode": "training", #can be anything, just for plots
    "iteration": "1000"#can be anything, just for plots
    
}

observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(state_space,))
observation_space



Box(-inf, inf, (1,), float32)

In [17]:

#initate env
env = FlorianPortfolioEnv(df=train, **env_kwargs)
check_env(env, warn=True)

AssertionError: The observation returned by the `reset()` method does not match the shape of the given observation space Box(-inf, inf, (10,), float32). Expected: (10,), actual shape: (12,)

In [None]:
#EvalCallback: Evaluate periodically the performance of an agent, using a separate test environment. It will save the best model if best_model_save_path folder is specified and save the evaluations results in a numpy archive (evaluations.npz) if log_path folder is specified.
eval_callback = EvalCallback(eval_env=env, best_model_save_path="./logs/", eval_freq=500, deterministic=True, render=False)
progress_callback = ProgressBarCallback()
params_callback = HParamCallback()


In [None]:
#Chaining Callbacks
callbacks = CallbackList([eval_callback,progress_callback, params_callback ])

In [None]:

#Define and Train the agend
model = A2C('MlpPolicy', env, verbose=0, tensorboard_log="./tensorboard/a2c_florian/")
model.learn(total_timesteps=500000, callback=callbacks, tb_log_name="a2c_florian_any_name")
# Once the learn function is called, you can monitor the RL agent during or after the training, with the following bash command:
# tensorboard --logdir ./a2c_cartpole_tensorboard/