In [None]:
import gymnasium as gym
import pandas as pd
import matplotlib.pyplot as plt
import time
import optuna
from optuna.visualization import plot_optimization_history, plot_param_importances
import joblib
import numpy as np
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import VecNormalize, DummyVecEnv
from env.flo_portfolio import FlorianPortfolioEnv
from env.portfolio_multi_reward import FlorianPortfolioEnvMultiReward
from env.portfolio_bbg import Portfolio_BBG
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.callbacks import ProgressBarCallback,CallbackList, EvalCallback
from models.callbacks import CustomCallBack, HParamCallback, TensorboardCallback
from models.models import DRLAgent
from gymnasium import spaces
from pyfolio import timeseries
import pyfolio
from plot.plot import convert_daily_return_to_pyfolio_ts, convert_account_value_to_pyfolio_ts, get_baseline, backtest_stats, get_daily_return
from hyperoptimizer.optunaoptimizer import optimize_optuna, optimize_optuna_FlorianPortfolioEnvMultiReward, optimize_optuna_BBG_Env
from config import tickers
from preprocessors.preprocessors import DataProcessor
from wandb_env import wandb_opt

In [None]:
df = pd.read_csv('SPX_2.csv', skiprows=6).dropna(axis=0)
df['date'] = pd.to_datetime(df['date']).dt.strftime('%Y-%m-%d')
df

In [None]:
train_start = "1990-01-03"
train_end = "2015-12-31"
validate_start = "2016-01-01"
validate_end = "2020-12-31"
test_start = "2016-01-01"
test_end = "2023-07-25"

In [None]:
def data_split(df, start, end, target_date_col="date"):
    data = df[(df[target_date_col] >= start) & (df[target_date_col] < end)]
    data = data.sort_values([target_date_col, "tic"], ignore_index=True)
    data.index = data[target_date_col].factorize()[0]
    return data

train = data_split(df, train_start, train_end)
validate = data_split(df, validate_start, validate_end)
train_df = train
train_df

In [None]:
stock_list = train.tic.unique()
stock_list = stock_list.tolist()
indicators = ['RSI14', 'RSI30', 'RSI3','MA200', 'MA50', 'MA20']
stock_dimension = len(train.tic.unique())
state_space = 1+2*stock_dimension + len(indicators)
additional_price_info= ['open', 'low', 'high']
env_kwargs = {
    "initial_amount": 1000000, 
    "trade_cost_pct": 0.001, 
    "state_space": state_space, 
    "stock_dim": stock_dimension,
    "stock_list": stock_list,
    "indicators": indicators, 
    "action_space": stock_dimension,
    "sharpe_ratio_weight": 0.1, #! to fine tune
    "loss_penalty_weight": 0.1, #! to fine tune
    "short_selling_allowed": True,
    "take_leverage_allowed": True,
    "reward_scaling": 0.00022786244568524788, # the magnitude of rewards can significantly affect the learning process. If the rewards are too large, they can cause the learning algorithm to become unstable. On the other hand, if the rewards are too small, the agent might not learn effectively because the rewards don't provide a strong enough signal.
    "hmax": 100, #! Fine tune
    # Not for optimization
    "make_plots": False,
    "num_stock_shares": [0], #number of initioal shares
    "model_name": "A-2C",
    "mode": "training", #can be anything, just for plots
    "iteration": "1000"#can be anything, just for plots
    
}



In [None]:
wandb_opt(environment=FlorianPortfolioEnvMultiReward, train_df=train_df, project_name="portfolio1", state_space=state_space, stock_list=stock_list, indicators=indicators, stock_dimension=1)