In [1]:
import sys
import os
sys.path.append("..") 
project_root = os.path.dirname(os.path.dirname(os.path.abspath(os.getcwd())))

import pandas as pd
from training import TrainingManager, TrainingConfig

TRAIN_VERSION = "v2" 
N_EPISODES_INITIAL = 3000   

data_path_2021 = "../data/data_1h_2021.csv"
df1 = pd.read_csv(data_path_2021, index_col=0, parse_dates=True, date_format="iso8601")
data_path_2022 = "../data/data_1h_2022.csv"
df2 = pd.read_csv(data_path_2022, index_col=0, parse_dates=True, date_format="iso8601")

df_train = pd.concat([df1, df2.iloc[:len(df2)//2]])

env_params = {
    "initial_balance": 1000.0,
    "window_size": 10,
    "commission": 0.0001,
    "slippage": 0.0001,
    "max_holding_time": 72,
    "holding_threshold": 24,
    "max_drawdown_threshold": 0.08,
    "lambda_drawdown": 0.3,
    "lambda_hold": 0.05,
    "reward_scaling": 1.0,
    "max_steps": 1000,
}

base_train_params = {
    "n_episodes": 5000,
    "n_episodes_start": 0,
    "max_steps": 1000,
    "eval_frequency": 50,
    "save_frequency": 500,
    "patience": 50,
    "seed": 42,
    "initial_balance": 1000.0,
    **{k: env_params[k] for k in env_params if k != "max_steps"}
}
agents_config = {
    "QLearning": {
        "learning_rate": 0.1,
        "discount_factor": 0.95,  # Меньше для краткосрочной торговли
        "epsilon_start": 1.0,
        "epsilon_end": 0.05,      # Больше для exploration
        "epsilon_decay": 0.9995,  # Медленнее decay
        "lr_decay": 0.99995,      # Очень медленный decay LR
        "min_learning_rate": 0.01,
    },
    "SARSA": {
        "learning_rate": 0.1,
        "discount_factor": 0.95,
        "epsilon_start": 1.0,
        "epsilon_end": 0.05,
        "epsilon_decay": 0.9995,
        "lr_decay": 0.99995,
        "min_learning_rate": 0.01,
    },
    "SARSA_Lambda": {
        "learning_rate": 0.05,    # Меньше для SARSA(λ)
        "discount_factor": 0.95,
        "epsilon_start": 1.0,
        "epsilon_end": 0.05,
        "epsilon_decay": 0.9995,
        "lr_decay": 0.99995,
        "min_learning_rate": 0.005,
        "lambda_param": 0.7,      # Traces decay
        "replace_traces": True,
    },
    "Monte_Carlo": {
        "learning_rate": None,    # Не используется в MC
        "discount_factor": 0.99,  # Выше для MC
        "epsilon_start": 1.0,
        "epsilon_end": 0.1,       # Больше exploration
        "epsilon_decay": 0.9997,  # Еще медленнее
        "first_visit": True,
        "use_sample_average": False,  # Используем постоянный LR
    }
}

def _get_agent_instance(agent_type: str, hyperparams: dict):
    if agent_type == "SARSA":
        from agents.classical.sarsa_agent import SarsaAgent
        agent = SarsaAgent()
    elif agent_type == "SARSA_Lambda":
        from agents.classical.sarsa_lambda_agent import SarsaLambdaAgent
        agent = SarsaLambdaAgent()
    elif agent_type == "QLearning":
        from agents.classical.qlearning_agent import QLearningAgent
        agent = QLearningAgent()
    elif agent_type == "Monte_Carlo":
        from agents.classical.monte_carlo_agent import MonteCarloAgent
        agent = MonteCarloAgent()
    else:
        raise ValueError(f"Unknown agent type: {agent_type}")
        
    for key, value in hyperparams.items():
        if hasattr(agent, key):
             setattr(agent, key, value)
    return agent


def run_training(
    agent_type: str,
    df,
    project_root: str,
    env_params: dict,
    train_params: dict,
):
    hyperparams = agents_config.get(agent_type, {})
    agent = _get_agent_instance(agent_type, hyperparams)

    config = TrainingConfig(
        agent_name=f"{agent_type}_{TRAIN_VERSION}",
        agent_type=agent_type,
        **train_params,
    )

    manager = TrainingManager(
        base_log_dir=os.path.join(project_root, "training_data/logs"),
        base_checkpoint_dir=os.path.join(project_root, "training_data/checkpoints"),
    )

    experiment_name = f"exp_{agent_type.lower()}_{TRAIN_VERSION}"
    print(f" НАЧАЛО ПЕРВИЧНОГО ОБУЧЕНИЯ: {agent_type} (Ep: 0 -> {train_params['n_episodes']})")
    print(f"=======================================================")
    return manager.train_agent(agent, df, config, experiment_name)

In [2]:
# --- ЗАПУСК QLearning ---
run_training(
    agent_type="QLearning",
    df=df_train,
    project_root=project_root,
    env_params=env_params,
    train_params=base_train_params,
)

 НАЧАЛО ПЕРВИЧНОГО ОБУЧЕНИЯ: QLearning (Ep: 0 -> 5000)

Начало обучения: QLearning_v2
Эксперимент: exp_qlearning_v2
Эпизодов: 5000
Max steps: 1000
Learning rate: 0.1
Discount factor: 0.95
Epsilon: 1.0->0.01



TypeError: Cannot index by location index with a non-integer key

In [None]:
# --- ЗАПУСК SARSA ---
run_training(
    agent_type="SARSA",
    df=df_train,
    project_root=project_root,
    env_params=env_params,
    train_params=base_train_params,
)

In [None]:
# --- ЗАПУСК SARSA_Lambda ---
run_training(
    agent_type="SARSA_Lambda",
    df=df_train,
    project_root=project_root,
    env_params=env_params,
    train_params=base_train_params,
)

In [None]:
# --- ЗАПУСК Monte_Carlo ---
run_training(
    agent_type="Monte_Carlo",
    df=df_train,
    project_root=project_root,
    env_params=env_params,
    train_params=base_train_params,
)