In [None]:
import sys
sys.path.append("..")

import os
project_root = os.path.dirname(os.path.dirname(os.path.abspath("__file__")))

import pandas as pd
from training import TrainingManager, TrainingConfig


TRAIN_VERSION="v2"

data_path = "../data/data_1h_2021.csv"
df1 = pd.read_csv(data_path, index_col=0, parse_dates=True, date_format="iso8601")
data_path = "../data/data_1h_2022.csv"
df2 = pd.read_csv(data_path, index_col=0, parse_dates=True, date_format="iso8601")
data_path = "../data/data_1h_2023.csv"
df3 = pd.read_csv(data_path, index_col=0, parse_dates=True, date_format="iso8601")

env_params = {
    "initial_balance": 1000.0,
    "window_size": 10,
    "commission": 0.0001,
    "slippage": 0.0001,
    "max_holding_time": 48,
    "holding_threshold": 12, 
    "max_drawdown_threshold": 0.05,
    "lambda_drawdown": 0.5,
    "lambda_hold": 0.1,
    "reward_scaling": 1.0, 
    "max_steps": 1000
}

df_train = pd.concat([df1, df2.iloc[:len(df2)//2]])


base_train_params = {
    "n_episodes": 3000,
    "n_episodes_start": 0,
    "max_steps": 1000,
    "eval_frequency": 100,
    "save_frequency": 500,
    "patience": 200,
    "initial_balance": 1000.0,

    "window_size": 10,
    "commission": 0.0001,
    "slippage": 0.0001,
    "max_holding_time": 48,
    "holding_threshold": 12,
    "max_drawdown_threshold": 0.05,
    "lambda_drawdown": 0.5,
    "lambda_hold": 0.1,
    "reward_scaling": 1.0,
}


agents_config = {
    "QLearning": {
        "learning_rate": 0.05,
        "discount_factor": 0.99,
        "epsilon_start": 1.0,
        "epsilon_end": 0.01,
        "epsilon_decay": 0.998,
    },
    "SARSA": {
        "learning_rate": 0.05,
        "discount_factor": 0.99,
        "epsilon_start": 1.0,
        "epsilon_end": 0.01,
        "epsilon_decay": 0.998,
    },
    "SARSA_Lambda": {
        "learning_rate": 0.02, 
        "discount_factor": 0.99,
        "epsilon_start": 1.0,
        "epsilon_end": 0.01,
        "epsilon_decay": 0.998,
        "trace_decay": 0.9,
    },
    "Monte_Carlo": {
        "learning_rate": 0.02,
        "discount_factor": 0.99,
        "epsilon_start": 1.0,
        "epsilon_end": 0.05,  
        "epsilon_decay": 0.999, 
    }
}


In [None]:
def run_training(
    agent_type: str,
    df,
    project_root: str,
    env_params: dict,
    train_params: dict,
):

    if agent_type == "SARSA":
        from agents.classical.sarsa_agent import SarsaAgent

        agent = SarsaAgent()

    elif agent_type == "SARSA_Lambda":
        from agents.classical.sarsa_lambda_agent import SarsaLambdaAgent

        agent = SarsaLambdaAgent()

    elif agent_type == "QLearning":
        from agents.classical.qlearning_agent import QLearningAgent

        agent = QLearningAgent()

    elif agent_type == "Monte_Carlo":
        from agents.classical.monte_carlo_agent import MonteCarloAgent

        agent = MonteCarloAgent()

    else:
        raise ValueError(f"Unknown agent type: {agent_type}")

    config = TrainingConfig(
        agent_name=f"{agent_type}_{TRAIN_VERSION}",
        agent_type=agent_type,
        **train_params,
    )

    manager = TrainingManager(
        base_log_dir=os.path.join(project_root, "training_data/logs"),
        base_checkpoint_dir=os.path.join(project_root, "training_data/checkpoints"),
    )

    experiment_name = f"exp_{agent_type.lower()}_{TRAIN_VERSION}"
    return manager.train_agent(agent, df, config, experiment_name)

### Обучение QLearningAgent

In [None]:
SELECTED_AGENT = "QLearning"

final_train_params = {**base_train_params, **agents_config[SELECTED_AGENT]}

result = run_training(
    agent_type=SELECTED_AGENT,
    df=df_train,
    project_root=project_root,
    env_params=env_params,
    train_params=final_train_params
)

### Обучение MonteCarloAgent

In [None]:
SELECTED_AGENT = "Monte_Carlo"

final_train_params = {**base_train_params, **agents_config[SELECTED_AGENT]}

result = run_training(
    agent_type=SELECTED_AGENT,
    df=df_train,
    project_root=project_root,
    env_params=env_params,
    train_params=final_train_params
)

### Обучение SarsaAgent

In [None]:
SELECTED_AGENT = "SARSA"

final_train_params = {**base_train_params, **agents_config[SELECTED_AGENT]}

result = run_training(
    agent_type=SELECTED_AGENT,
    df=df_train,
    project_root=project_root,
    env_params=env_params,
    train_params=final_train_params
)

### Обучение SarsaLambdaAgent

In [None]:
SELECTED_AGENT = "SARSA_Lambda"

final_train_params = {**base_train_params, **agents_config[SELECTED_AGENT]}

result = run_training(
    agent_type=SELECTED_AGENT,
    df=df_train,
    project_root=project_root,
    env_params=env_params,
    train_params=final_train_params
)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import json
from pathlib import Path

WINDOW_SIZE = 100 

experiment_dirs = [
    f"../training_data/logs/exp_qlearning_{TRAIN_VERSION}",
    f"../training_data/logs/exp_monte_carlo_{TRAIN_VERSION}",
    f"../training_data/logs/exp_sarsa_{TRAIN_VERSION}",
    f"../training_data/logs/exp_sarsa_lambda_{TRAIN_VERSION}",
]

all_data = []
for exp_dir_str in experiment_dirs:
    exp_dir = Path(exp_dir_str)

    episodes_df = pd.read_csv(exp_dir / "episodes.csv")
    
    with open(exp_dir / "training_summary.json") as f:
        summary = json.load(f)
    agent_raw_name = summary["config"]["agent_name"]
    agent_name = agent_raw_name.split('_')[0] 
    
    episodes_df["agent"] = agent_name
    all_data.append(episodes_df)

df = pd.concat(all_data, ignore_index=True)

plt.style.use('seaborn-v0_8-darkgrid')
agents = df["agent"].unique()

fig, axes = plt.subplots(1, 2, figsize=(16, 6))

ax1 = axes[0]
for agent in agents:
    agent_data = df[df["agent"] == agent]
    smoothed = agent_data["reward"].rolling(window=WINDOW_SIZE, min_periods=1).mean()
    ax1.plot(agent_data["episode"], smoothed, label=agent, linewidth=2)

ax1.set_title(f"Сравнение средней награды (сглаживание {WINDOW_SIZE})", fontsize=14, fontweight='bold')
ax1.set_xlabel("Эпизод")
ax1.set_ylabel("Reward (сглаженная)")
ax1.legend(title="Агент")
ax1.grid(True, alpha=0.5)

ax2 = axes[1]
for agent in agents:
    agent_data = df[df["agent"] == agent]
    smoothed_balance = agent_data["portfolio_value"].rolling(window=WINDOW_SIZE, min_periods=1).mean()
    ax2.plot(agent_data["episode"], smoothed_balance, label=agent, linewidth=2)

INITIAL_BALANCE = 1000.0
ax2.axhline(y=INITIAL_BALANCE, color='black', linestyle='--', alpha=0.7, label='Начальный баланс')

ax2.set_title(f"Динамика стоимости портфеля (сглаживание {WINDOW_SIZE})", fontsize=14, fontweight='bold')
ax2.set_xlabel("Эпизод")
ax2.set_ylabel("Portfolio Value ($)")
ax2.legend(title="Агент")
ax2.grid(True, alpha=0.5)

plt.tight_layout()
plt.show()

final_metrics = (
    df.groupby("agent")
    .tail(WINDOW_SIZE)
    .groupby("agent")
    [["reward", "portfolio_value", "win_rate", "n_trades", "max_drawdown"]]
    .mean()
)

final_metrics.columns = [
    "Reward (Avg)", 
    "Balance $ (Avg)", 
    "Win Rate % (Avg)", 
    "Trades (Avg)", 
    "Max DD (Avg)"
]

final_metrics = final_metrics.sort_values("Reward (Avg)", ascending=False)
print(f" Финальные результаты (среднее за последние {WINDOW_SIZE} эпизодов)")

display(final_metrics.round(2))