In [None]:
import sys
import os
from pathlib import Path

current_dir = Path.cwd()
if current_dir.name == "notebooks":
    project_root = current_dir.parent
    sys.path.insert(0, str(project_root))
else:
    project_root = Path(os.getcwd())
    sys.path.insert(0, str(project_root))

import pandas as pd
from training import TrainingManager, TrainingConfig

TRAIN_VERSION = "v2"
N_EPISODES_INITIAL = 8_000   

data_path_2021 = project_root / "data" / "data_1h_2021.csv"
data_path_2022 = project_root / "data" / "data_1h_2022.csv"

if not data_path_2021.exists():
    data_path_2021 = project_root.parent / "data" / "data_1h_2021.csv"
    data_path_2022 = project_root.parent / "data" / "data_1h_2022.csv"

df1 = pd.read_csv(data_path_2021, index_col=0, parse_dates=True, date_format="iso8601")
df2 = pd.read_csv(data_path_2022, index_col=0, parse_dates=True, date_format="iso8601")

# df_train = pd.concat([df1, df2.iloc[:len(df2)//2]])
df_train = pd.concat([df1])

print(f"Загружено данных для обучения: {len(df_train)} строк")
print(f"Период: {df_train.index[0]} - {df_train.index[-1]}")

env_params = {
    "initial_balance": 1000.0,
    "window_size": 10,
    "commission": 0.0001,
    "slippage": 0.0001,
    "max_holding_time": 72,
    "max_drawdown_threshold": 0.08,
    "max_steps": 1000,
}

base_train_params = {
    "n_episodes": N_EPISODES_INITIAL,
    "n_episodes_start": 0,
    "max_steps": 1000,
    "eval_frequency": 100,
    "save_frequency": 1000,
    "patience": 500,
    "seed": 42,
    "initial_balance": 1000.0,
    **{k: env_params[k] for k in env_params if k not in ["max_steps"]}
}

agents_config = {
    "QLearning": {
        "learning_rate": 0.1,
        "discount_factor": 0.95,
        "epsilon_start": 1.0,
        "epsilon_end": 0.01,
        "epsilon_decay": 0.9998,
    },
    "SARSA": {
        "learning_rate": 0.1,
        "discount_factor": 0.95,
        "epsilon_start": 1.0,
        "epsilon_end": 0.01,
        "epsilon_decay": 0.9998,
    },
    "SARSA_Lambda": {
        "learning_rate": 0.1,
        "discount_factor": 0.95,
        "epsilon_start": 1.0,
        "epsilon_end": 0.01,
        "epsilon_decay": 0.9998,
        "lambda_param": 0.8,
        "replace_traces": True,
    },
    "Monte_Carlo": {
        "learning_rate": 0.1,
        "discount_factor": 0.95,
        "epsilon_start": 1.0,
        "epsilon_end": 0.01,
        "epsilon_decay": 0.9998,
        "first_visit": True,
        "use_sample_average": False,
    }
}

def _get_agent_instance(agent_type: str, hyperparams: dict):
    if agent_type == "SARSA":
        from agents.classical.sarsa_agent import SarsaAgent
        agent = SarsaAgent()
    elif agent_type == "SARSA_Lambda":
        from agents.classical.sarsa_lambda_agent import SarsaLambdaAgent
        agent = SarsaLambdaAgent()
    elif agent_type == "QLearning":
        from agents.classical.qlearning_agent import QLearningAgent
        agent = QLearningAgent()
    elif agent_type == "Monte_Carlo":
        from agents.classical.monte_carlo_agent import MonteCarloAgent
        agent = MonteCarloAgent()
    else:
        raise ValueError(f"Unknown agent type: {agent_type}")
        
    for key, value in hyperparams.items():
        if hasattr(agent, key):
            setattr(agent, key, value)
        else:
            print(f"Предупреждение: агент {agent_type} не имеет атрибута {key}")
    
    return agent


def run_training(
    agent_type: str,
    df: pd.DataFrame,
    project_root: Path,
    env_params: dict,
    train_params: dict,
    verbose: bool = True,
):
    hyperparams = agents_config.get(agent_type, {})
    agent = _get_agent_instance(agent_type, hyperparams)

    config = TrainingConfig(
        agent_name=f"{agent_type}_{TRAIN_VERSION}",
        agent_type=agent_type,
        **train_params,
    )

    manager = TrainingManager(
        base_log_dir=str(project_root / "training_data" / "logs"),
        base_checkpoint_dir=str(project_root / "training_data" / "checkpoints"),
        seed=train_params.get("seed", 42),
    )

    experiment_name = f"exp_{agent_type.lower()}_{TRAIN_VERSION}"
    
    if verbose:
        print(f"\n{'='*80}")
        print(f"НАЧАЛО ПЕРВИЧНОГО ОБУЧЕНИЯ: {agent_type}")
        print(f"{'='*80}")
        print(f"Эпизодов: {train_params['n_episodes']}")
        print(f"Эксперимент: {experiment_name}")
        print(f"{'='*80}\n")
    
    return manager.train_agent(agent, df, config, experiment_name, verbose=verbose)

Загружено данных для обучения: 8722 строк
Период: 28966.36 - 47558.35


In [2]:
# run_training(
#     agent_type="QLearning",
#     df=df_train,
#     project_root=project_root,
#     env_params=env_params,
#     train_params=base_train_params,
# )

In [None]:
# run_training(
#     agent_type="SARSA",
#     df=df_train,
#     project_root=project_root,
#     env_params=env_params,
#     train_params=base_train_params,
# )


НАЧАЛО ПЕРВИЧНОГО ОБУЧЕНИЯ: SARSA
Эпизодов: 8000
Эксперимент: exp_sarsa_v1


 НАЧАЛО ОБУЧЕНИЯ
Агент:          SARSA_v1
Эксперимент:    exp_sarsa_v1
Эпизодов:       8000
Max steps:       1000
Learning rate:  0.1
Discount:       0.95
Epsilon:        1.0 → 0.01
Eval frequency: 100
Patience:       500
Initial balance: $1,000.00


Эпизод   100/8000 [  1.2%] |░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░|
----------------------------------------------------------------------------------------------------
НАГРАДЫ:
   Текущая:         -51.71 | Средняя (100):     -32.73 | Eval:     -30.44 ± 15.25
ПОРТФЕЛЬ:
   Значение:       $879.69 | Изменение:        -12.03%
ТОРГОВЛЯ (на основе eval):
   Сделок:           123.4 | Win Rate:        60.7% | Profit Factor:   0.89
   Avg PnL:      $    -0.83 | Total PnL:     $   -95.54
ПАРАМЕТРЫ:
   Epsilon:          0.9802 | Learning Rate:     0.1000 | States:     35

Эпизод   200/8000 [  2.5%] |░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░|
--------------------------------------------------

{'experiment_name': 'exp_sarsa_v1',
 'log_dir': '/mnt/d/Study/python/TradingAgentClassicRL/training_data/logs/exp_sarsa_v1',
 'checkpoint_dir': '/mnt/d/Study/python/TradingAgentClassicRL/training_data/checkpoints/exp_sarsa_v1',
 'final_agent_path': '/mnt/d/Study/python/TradingAgentClassicRL/training_data/checkpoints/exp_sarsa_v1/final_agent.pkl',
 'training_time': 1513.2381529808044,
 'final_metrics': {'episode': 8000,
  'reward': -17.9004295992297,
  'steps': 1000,
  'epsilon': 0.20186421282886519,
  'portfolio_value': 963.356685039623,
  'n_trades': 74,
  'win_rate': 51.35135135135135,
  'avg_pnl': -0.3926038776301356,
  'max_drawdown': 0.09006251725372019,
  'total_pnl': -29.052686944630036,
  'timestamp': 1765819193.4608035},
 'final_evaluation': {'mean_reward': -0.7094290028646182,
  'std_reward': 1.4208418358896981,
  'min_reward': -1.828037206772096,
  'max_reward': 3.4795561515625573,
  'mean_portfolio': 1001.6599435063829,
  'mean_trades': 1.0,
  'mean_steps': 1000.0,
  'mean_

In [None]:
# run_training(
#     agent_type="SARSA_Lambda",
#     df=df_train,
#     project_root=project_root,
#     env_params=env_params,
#     train_params=base_train_params,
# )


НАЧАЛО ПЕРВИЧНОГО ОБУЧЕНИЯ: SARSA_Lambda
Эпизодов: 8000
Эксперимент: exp_sarsa_lambda_v1


 НАЧАЛО ОБУЧЕНИЯ
Агент:          SARSA_Lambda_v1
Эксперимент:    exp_sarsa_lambda_v1
Эпизодов:       8000
Max steps:       1000
Learning rate:  0.1
Discount:       0.95
Epsilon:        1.0 → 0.01
Eval frequency: 100
Patience:       500
Initial balance: $1,000.00


Эпизод   100/8000 [  1.2%] |░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░|
----------------------------------------------------------------------------------------------------
НАГРАДЫ:
   Текущая:         -51.71 | Средняя (100):     -32.83 | Eval:     -22.96 ±  8.02
ПОРТФЕЛЬ:
   Значение:       $903.79 | Изменение:         -9.62%
ТОРГОВЛЯ (на основе eval):
   Сделок:           108.0 | Win Rate:        54.3% | Profit Factor:   0.83
   Avg PnL:      $    -0.80 | Total PnL:     $   -71.90
ПАРАМЕТРЫ:
   Epsilon:          0.9802 | Learning Rate:     0.1000 | States:     35

Эпизод   200/8000 [  2.5%] |░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░|
----------------------

{'experiment_name': 'exp_sarsa_lambda_v1',
 'log_dir': '/mnt/d/Study/python/TradingAgentClassicRL/training_data/logs/exp_sarsa_lambda_v1',
 'checkpoint_dir': '/mnt/d/Study/python/TradingAgentClassicRL/training_data/checkpoints/exp_sarsa_lambda_v1',
 'final_agent_path': '/mnt/d/Study/python/TradingAgentClassicRL/training_data/checkpoints/exp_sarsa_lambda_v1/final_agent.pkl',
 'training_time': 2192.095310688019,
 'final_metrics': {'episode': 8000,
  'reward': 14.874943824950734,
  'steps': 1000,
  'epsilon': 0.20186421282886519,
  'portfolio_value': 1208.7323320983721,
  'n_trades': 158,
  'win_rate': 56.32911392405063,
  'avg_pnl': 1.4343264833979168,
  'max_drawdown': 0.08870158851020883,
  'total_pnl': 226.62358437687087,
  'timestamp': 1765821408.343343},
 'final_evaluation': {'mean_reward': -22.77469610609984,
  'std_reward': 30.867257506580493,
  'min_reward': -69.1426601188424,
  'max_reward': 21.276025479107382,
  'mean_portfolio': 880.8299315783319,
  'mean_trades': 111.6,
  'me

In [None]:
# run_training(
#     agent_type="Monte_Carlo",
#     df=df_train,
#     project_root=project_root,
#     env_params=env_params,
#     train_params=base_train_params,
# )


НАЧАЛО ПЕРВИЧНОГО ОБУЧЕНИЯ: Monte_Carlo
Эпизодов: 8000
Эксперимент: exp_monte_carlo_v1


 НАЧАЛО ОБУЧЕНИЯ
Агент:          Monte_Carlo_v1
Эксперимент:    exp_monte_carlo_v1
Эпизодов:       8000
Max steps:       1000
Learning rate:  0.1
Discount:       0.95
Epsilon:        1.0 → 0.01
Eval frequency: 100
Patience:       500
Initial balance: $1,000.00


Эпизод   100/8000 [  1.2%] |░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░|
----------------------------------------------------------------------------------------------------
НАГРАДЫ:
   Текущая:         -52.59 | Средняя (100):     -32.89 | Eval:     -21.15 ± 26.49
ПОРТФЕЛЬ:
   Значение:       $947.98 | Изменение:         -5.20%
ТОРГОВЛЯ (на основе eval):
   Сделок:            84.2 | Win Rate:        38.7% | Profit Factor:   0.97
   Avg PnL:      $    -0.58 | Total PnL:     $   -46.06
ПАРАМЕТРЫ:
   Epsilon:          0.9802 | Learning Rate:     0.1000 | States:     35

Эпизод   200/8000 [  2.5%] |░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░|
--------------------------

{'experiment_name': 'exp_monte_carlo_v1',
 'log_dir': '/mnt/d/Study/python/TradingAgentClassicRL/training_data/logs/exp_monte_carlo_v1',
 'checkpoint_dir': '/mnt/d/Study/python/TradingAgentClassicRL/training_data/checkpoints/exp_monte_carlo_v1',
 'final_agent_path': '/mnt/d/Study/python/TradingAgentClassicRL/training_data/checkpoints/exp_monte_carlo_v1/final_agent.pkl',
 'training_time': 1431.4304559230804,
 'final_metrics': {'episode': 8000,
  'reward': 2.0087186176789524,
  'steps': 1000,
  'epsilon': 0.20186421282886519,
  'portfolio_value': 1074.941971721125,
  'n_trades': 89,
  'win_rate': 53.93258426966292,
  'avg_pnl': 0.9518275900714708,
  'max_drawdown': 0.08314038468975696,
  'total_pnl': 84.7126555163609,
  'timestamp': 1765822880.751662},
 'final_evaluation': {'mean_reward': -0.8233171274735696,
  'std_reward': 1.3725938110890468,
  'min_reward': -2.311417009841917,
  'max_reward': 3.058516951483701,
  'mean_portfolio': 1001.2030637824864,
  'mean_trades': 0.4,
  'mean_step