In [None]:
import sys
import os
from pathlib import Path

current_dir = Path.cwd()
if current_dir.name == "notebooks":
    project_root = current_dir.parent
    sys.path.insert(0, str(project_root))
else:
    project_root = Path(os.getcwd())
    sys.path.insert(0, str(project_root))

import pandas as pd
from training import TrainingManager, TrainingConfig

TRAIN_VERSION = "v2"
N_EPISODES_INITIAL = 100   

data_path_2021 = project_root / "data" / "data_1h_2021.csv"
data_path_2022 = project_root / "data" / "data_1h_2022.csv"

if not data_path_2021.exists():
    data_path_2021 = project_root.parent / "data" / "data_1h_2021.csv"
    data_path_2022 = project_root.parent / "data" / "data_1h_2022.csv"

df1 = pd.read_csv(data_path_2021, index_col=0, parse_dates=True, date_format="iso8601")
df2 = pd.read_csv(data_path_2022, index_col=0, parse_dates=True, date_format="iso8601")

# df_train = pd.concat([df1, df2.iloc[:len(df2)//2]])
df_train = pd.concat([df1])

print(f"Загружено данных для обучения: {len(df_train)} строк")
print(f"Период: {df_train.index[0]} - {df_train.index[-1]}")

env_params = {
    "initial_balance": 1000.0,
    "window_size": 10,
    "commission": 0.0001,
    "slippage": 0.0001,
    "max_holding_time": 72,
    "max_drawdown_threshold": 0.08,
    "max_steps": 1000,
}

base_train_params = {
    "n_episodes": N_EPISODES_INITIAL,
    "n_episodes_start": 0,
    "max_steps": 1000,
    "eval_frequency": 100,
    "save_frequency": 500,
    "patience": 30,
    "seed": 42,
    "initial_balance": 1000.0,
    **{k: env_params[k] for k in env_params if k not in ["max_steps"]}
}

agents_config = {
    "QLearning": {
        "learning_rate": 0.1,
        "discount_factor": 0.95,
        "epsilon_start": 1.0,
        "epsilon_end": 0.01,
        "epsilon_decay": 0.9998,
    },
    "SARSA": {
        "learning_rate": 0.1,
        "discount_factor": 0.95,
        "epsilon_start": 1.0,
        "epsilon_end": 0.01,
        "epsilon_decay": 0.9998,
    },
    "SARSA_Lambda": {
        "learning_rate": 0.1,
        "discount_factor": 0.95,
        "epsilon_start": 1.0,
        "epsilon_end": 0.01,
        "epsilon_decay": 0.9998,
        "lambda_param": 0.8,
        "replace_traces": True,
    },
    "Monte_Carlo": {
        "learning_rate": 0.1,
        "discount_factor": 0.95,
        "epsilon_start": 1.0,
        "epsilon_end": 0.01,
        "epsilon_decay": 0.9998,
        "first_visit": True,
        "use_sample_average": False,
    }
}

def _get_agent_instance(agent_type: str, hyperparams: dict):
    if agent_type == "SARSA":
        from agents.classical.sarsa_agent import SarsaAgent
        agent = SarsaAgent()
    elif agent_type == "SARSA_Lambda":
        from agents.classical.sarsa_lambda_agent import SarsaLambdaAgent
        agent = SarsaLambdaAgent()
    elif agent_type == "QLearning":
        from agents.classical.qlearning_agent import QLearningAgent
        agent = QLearningAgent()
    elif agent_type == "Monte_Carlo":
        from agents.classical.monte_carlo_agent import MonteCarloAgent
        agent = MonteCarloAgent()
    else:
        raise ValueError(f"Unknown agent type: {agent_type}")
        
    for key, value in hyperparams.items():
        if hasattr(agent, key):
            setattr(agent, key, value)
        else:
            print(f"Предупреждение: агент {agent_type} не имеет атрибута {key}")
    
    return agent


def run_training(
    agent_type: str,
    df: pd.DataFrame,
    project_root: Path,
    env_params: dict,
    train_params: dict,
    verbose: bool = True,
):
    hyperparams = agents_config.get(agent_type, {})
    agent = _get_agent_instance(agent_type, hyperparams)

    config = TrainingConfig(
        agent_name=f"{agent_type}_{TRAIN_VERSION}",
        agent_type=agent_type,
        **train_params,
    )

    manager = TrainingManager(
        base_log_dir=str(project_root / "training_data" / "logs"),
        base_checkpoint_dir=str(project_root / "training_data" / "checkpoints"),
        seed=train_params.get("seed", 42),
    )

    experiment_name = f"exp_{agent_type.lower()}_{TRAIN_VERSION}"
    
    if verbose:
        print(f"\n{'='*80}")
        print(f"НАЧАЛО ПЕРВИЧНОГО ОБУЧЕНИЯ: {agent_type}")
        print(f"{'='*80}")
        print(f"Эпизодов: {train_params['n_episodes']}")
        print(f"Эксперимент: {experiment_name}")
        print(f"{'='*80}\n")
    
    return manager.train_agent(agent, df, config, experiment_name, verbose=verbose)

In [None]:
run_training(
    agent_type="QLearning",
    df=df_train,
    project_root=project_root,
    env_params=env_params,
    train_params=base_train_params,
)

In [None]:
run_training(
    agent_type="SARSA",
    df=df_train,
    project_root=project_root,
    env_params=env_params,
    train_params=base_train_params,
)

In [None]:
run_training(
    agent_type="SARSA_Lambda",
    df=df_train,
    project_root=project_root,
    env_params=env_params,
    train_params=base_train_params,
)

In [None]:
run_training(
    agent_type="Monte_Carlo",
    df=df_train,
    project_root=project_root,
    env_params=env_params,
    train_params=base_train_params,
)