In [None]:
import sys
sys.path.append("..")

import os
project_root = os.path.dirname(os.path.dirname(os.path.abspath("__file__")))

import pandas as pd
from training import TrainingManager, TrainingConfig
from envs.trading_env import MyTradingEnv

N_EPISODES=4_000

MAX_STEPS=2_000
LEARNING_RATE=0.05
DISCOUNT_FACTOR=0.9
EPSILON_START=1.0
EPSILON_END=0.01
EPSILON_DECAY=0.9998
EVAL_FREQUANCY=200
SAVE_FREQUANCY=1_000

TRAIN_VERSION="v1"

data_path = "../data/data_1h_2021.csv"
df1 = pd.read_csv(data_path, index_col=0, parse_dates=True, date_format="iso8601")
data_path = "../data/data_1h_2022.csv"
df2 = pd.read_csv(data_path, index_col=0, parse_dates=True, date_format="iso8601")
data_path = "../data/data_1h_2023.csv"
df3 = pd.read_csv(data_path, index_col=0, parse_dates=True, date_format="iso8601")

df = pd.concat([df1])

INITIAL_BALANCE = 1000.0
WINDOW_SIZE = 10
COMMISSION = 0.0001
SLIPPAGE = 0.0005
MAX_HOLDING_TIME = 60 * 24
HOLDING_THRESHOLD = 24
MAX_DRAWDOWN_THRESHOLD = 0.05
LAMBDA_DRAWDOWN = 0.05
LAMBDA_HOLD = 0.005
REWARD_SCALING=1000.0

In [None]:
def run_training(
    agent_type: str,
    df,
    project_root: str,
    env_params: dict,
    train_params: dict,
):

    if agent_type == "SARSA":
        from agents.classical.sarsa_agent import SarsaAgent

        agent = SarsaAgent()

    elif agent_type == "SARSA_Lambda":
        from agents.classical.sarsa_lambda_agent import SarsaLambdaAgent

        agent = SarsaLambdaAgent()

    elif agent_type == "QLearning":
        from agents.classical.qlearning_agent import QLearningAgent

        agent = QLearningAgent()

    elif agent_type == "Monte_Carlo":
        from agents.classical.monte_carlo_agent import MonteCarloAgent

        agent = MonteCarloAgent()

    else:
        raise ValueError(f"Unknown agent type: {agent_type}")

    env = MyTradingEnv(df=df, **env_params)

    config = TrainingConfig(
        agent_name=f"{agent_type}_{TRAIN_VERSION}",
        agent_type=agent_type,
        **train_params,
    )

    manager = TrainingManager(
        base_log_dir=os.path.join(project_root, "training_data/logs"),
        base_checkpoint_dir=os.path.join(project_root, "training_data/checkpoints"),
    )

    experiment_name = f"exp_{agent_type.lower()}_{TRAIN_VERSION}"
    return manager.train_agent(agent, env, config, experiment_name)

In [None]:
env_params = {
    "initial_balance": INITIAL_BALANCE,
    "window_size": WINDOW_SIZE,
    "commission": COMMISSION,
    "slippage": SLIPPAGE,
    "max_holding_time": MAX_HOLDING_TIME,
    "lambda_drawdown": LAMBDA_DRAWDOWN,
    "lambda_hold": LAMBDA_HOLD,
    "reward_scaling": REWARD_SCALING,
    "max_steps": MAX_STEPS,
}

train_params = {
    "n_episodes": N_EPISODES,
    "max_steps": MAX_STEPS,
    "learning_rate": LEARNING_RATE,
    "discount_factor": DISCOUNT_FACTOR,
    "epsilon_start": EPSILON_START,
    "epsilon_end": EPSILON_END,
    "epsilon_decay": EPSILON_DECAY,
    "eval_frequency": EVAL_FREQUANCY,
    "save_frequency": SAVE_FREQUANCY,
}

### Обучение QLearningAgent

In [4]:
results = run_training(
    agent_type="QLearning",
    df=df,
    project_root=project_root,
    env_params=env_params,
    train_params=train_params
)

KeyboardInterrupt: 

### Обучение MonteCarloAgent

In [None]:
train_params["max_steps"] = MAX_STEPS // 2

results = run_training(
    agent_type="Monte_Carlo",
    df=df,
    project_root=project_root,
    env_params=env_params,
    train_params=train_params
)
train_params["max_steps"] = MAX_STEPS

### Обучение SarsaAgent

In [None]:
results = run_training(
    agent_type="SARSA",
    df=df,
    project_root=project_root,
    env_params=env_params,
    train_params=train_params
)

### Обучение SarsaLambdaAgent

In [None]:
train_params_lambda = {
    **train_params,
    "lambda_param": 0.8,
}

results = run_training(
    agent_type="SARSA_Lambda",
    df=df,
    project_root=project_root,
    env_params=env_params,
    train_params=train_params_lambda
)

In [None]:

import pandas as pd
import matplotlib.pyplot as plt
import json
from pathlib import Path

experiment_dirs = [
    f"../training_data/logs/exp_qlearning_{TRAIN_VERSION}",
    f"../training_data/logs/exp_monte_carlo_{TRAIN_VERSION}",
    f"../training_data/logs/exp_sarsa_{TRAIN_VERSION}",
    f"../training_data/logs/exp_sarsa_lambda_{TRAIN_VERSION}",
]

all_data = []
for exp_dir in experiment_dirs:
    exp_dir = Path(exp_dir)
    episodes_df = pd.read_csv(exp_dir / "episodes.csv")
    with open(exp_dir / "training_summary.json") as f:
        summary = json.load(f)
    agent_name = summary["config"]["agent_name"]
    episodes_df["agent"] = agent_name
    all_data.append(episodes_df)


df = pd.concat(all_data, ignore_index=True)

plt.figure(figsize=(12, 6))
for agent in df["agent"].unique():
    agent_data = df[df["agent"] == agent]
    smoothed = agent_data["reward"].rolling(window=50, min_periods=1).mean()
    plt.plot(agent_data["episode"], smoothed, label=agent)

plt.title("Сравнение средней награды (скользящее окно = 50)")
plt.xlabel("Эпизод")
plt.ylabel("Награда (сглаженная)")
plt.legend()
plt.grid(True)
plt.show()


plt.figure(figsize=(12, 6))
for agent in df["agent"].unique():
    agent_data = df[df["agent"] == agent]
    plt.plot(agent_data["episode"], agent_data["portfolio_value"], label=agent)

plt.title("Динамика стоимости портфеля")
plt.xlabel("Эпизод")
plt.ylabel("Portfolio Value ($)")
plt.legend()
plt.grid(True)
plt.show()


final_metrics = df.groupby("agent").tail(1)[
    ["agent", "reward", "portfolio_value", "win_rate", "n_trades", "max_drawdown"]
]
display(final_metrics.round(2))