In [1]:
import sys
import os
from pathlib import Path


current_dir = Path.cwd()
if current_dir.name == "notebooks":
    project_root = current_dir.parent
    sys.path.insert(0, str(project_root))
else:
    project_root = Path(os.getcwd())
    sys.path.insert(0, str(project_root))

import pandas as pd
from training import TrainingManager, TrainingConfig

TRAIN_VERSION = "v1"
N_EPISODES_INITIAL = 8_000
N_EPISODES_FINETUNE = 10_000
N_EPISODES_START_FINETUNE = N_EPISODES_INITIAL
N_EPISODES_END_FINETUNE = N_EPISODES_INITIAL + N_EPISODES_FINETUNE


data_path_2021 = project_root / "data" / "data_1h_2021.csv"
data_path_2022 = project_root / "data" / "data_1h_2022.csv"
data_path_2023 = project_root / "data" / "data_1h_2023.csv"

if not data_path_2021.exists():
    data_path_2021 = project_root.parent / "data" / "data_1h_2021.csv"
    data_path_2022 = project_root.parent / "data" / "data_1h_2022.csv"
    data_path_2023 = project_root.parent / "data" / "data_1h_2023.csv"

df1 = pd.read_csv(data_path_2021, index_col=0, parse_dates=True, date_format="iso8601")
df2 = pd.read_csv(data_path_2022, index_col=0, parse_dates=True, date_format="iso8601")
df3 = (
    pd.read_csv(data_path_2023, index_col=0, parse_dates=True, date_format="iso8601")
    if data_path_2023.exists()
    else None
)


# df_train = pd.concat([df1, df2.iloc[: len(df2) // 2]])


# if df3 is not None:
#     df_finetune = pd.concat([df2.iloc[len(df2)//2:], df3])
# else:
# df_finetune = df2.iloc[len(df2) // 2 :]

df_finetune = pd.concat([df1])

print(f"Данные для дообучения: {len(df_finetune)} строк")
print(f"Период: {df_finetune.index[0]} - {df_finetune.index[-1]}")


env_params = {
    "initial_balance": 1000.0,
    "window_size": 10,
    "commission": 0.0001,
    "slippage": 0.0001,
    "max_holding_time": 72,
    "max_drawdown_threshold": 0.08,
    "max_steps": 1000,
}


base_train_params = {
    "n_episodes": N_EPISODES_INITIAL,
    "n_episodes_start": 0,
    "max_steps": 1000,
    "eval_frequency": 100,
    "save_frequency": 500,
    "patience": 1000,
    "initial_balance": 1000.0,
    **{k: env_params[k] for k in env_params if k != "max_steps"},
}


finetune_train_params = {
    "n_episodes": N_EPISODES_END_FINETUNE,
    "n_episodes_start": N_EPISODES_START_FINETUNE,
    "max_steps": 1000,
    "eval_frequency": 100,
    "save_frequency": 1000,
    "patience": 1000,
    "initial_balance": 1000.0,
    **{k: env_params[k] for k in env_params if k != "max_steps"},
}


agents_config = {
    "QLearning": {
        "learning_rate": 0.05,
        "discount_factor": 0.99,
        "epsilon_start": 1.0,
        "epsilon_end": 0.01,
        "epsilon_decay": 0.998,
    },
    "SARSA": {
        "learning_rate": 0.05,
        "discount_factor": 0.99,
        "epsilon_start": 1.0,
        "epsilon_end": 0.01,
        "epsilon_decay": 0.998,
    },
    "SARSA_Lambda": {
        "learning_rate": 0.02,
        "discount_factor": 0.99,
        "epsilon_start": 1.0,
        "epsilon_end": 0.01,
        "epsilon_decay": 0.998,
        "trace_decay": 0.9,
    },
    "Monte_Carlo": {
        "learning_rate": 0.02,
        "discount_factor": 0.99,
        "epsilon_start": 1.0,
        "epsilon_end": 0.05,
        "epsilon_decay": 0.999,
    },
}


def _get_agent_instance(agent_type: str, hyperparams: dict):
    if agent_type == "SARSA":
        from agents.classical.sarsa_agent import SarsaAgent

        agent = SarsaAgent()
    elif agent_type == "SARSA_Lambda":
        from agents.classical.sarsa_lambda_agent import SarsaLambdaAgent

        agent = SarsaLambdaAgent()
    elif agent_type == "QLearning":
        from agents.classical.qlearning_agent import QLearningAgent

        agent = QLearningAgent()
    elif agent_type == "Monte_Carlo":
        from agents.classical.monte_carlo_agent import MonteCarloAgent

        agent = MonteCarloAgent()
    else:
        raise ValueError(f"Unknown agent type: {agent_type}")

    for key, value in hyperparams.items():
        if hasattr(agent, key):
            setattr(agent, key, value)
    return agent


def run_finetuning(
    agent_type: str,
    df,
    project_root: str,
    env_params: dict,
    train_params: dict,
):
    hyperparams = agents_config.get(agent_type, {})

    experiment_name_base = f"exp_{agent_type.lower()}_{TRAIN_VERSION}"
    experiment_name_finetune = f"{experiment_name_base}_finetune"

    checkpoint_path = os.path.join(
        project_root,
        "training_data",
        "checkpoints",
        experiment_name_base,
        "final_agent.pkl",
    )

    if not Path(checkpoint_path).exists():
        print(f"!!! ОШИБКА: Файл чекпоинта не найден: {checkpoint_path}")
        raise FileNotFoundError("Сначала запустите и завершите ПЕРВИЧНОЕ ОБУЧЕНИЕ.")

    agent = _get_agent_instance(agent_type, hyperparams)

    continue_config = TrainingConfig(
        agent_name=f"{agent_type}_{TRAIN_VERSION}_finetune",
        agent_type=agent_type,
        **train_params,
    )

    manager = TrainingManager(
        base_log_dir=os.path.join(project_root, "training_data/logs"),
        base_checkpoint_dir=os.path.join(project_root, "training_data/checkpoints"),
    )

    print(f"\n=======================================================")
    print(
        f" НАЧАЛО ДООБУЧЕНИЯ: {agent_type} (Ep: {train_params['n_episodes_start']} -> {train_params['n_episodes']})"
    )
    print(f"=======================================================")

    return manager.continue_training(
        agent=agent,
        df=df,
        config=continue_config,
        checkpoint_path=checkpoint_path,
        experiment_name=experiment_name_finetune,
        verbose=True,
    )

Данные для дообучения: 8722 строк
Период: 28966.36 - 47558.35


In [2]:
run_finetuning(
    agent_type="QLearning",
    df=df_finetune,
    project_root=project_root,
    env_params=env_params,
    train_params=finetune_train_params,
)


 НАЧАЛО ДООБУЧЕНИЯ: QLearning (Ep: 8000 -> 18000)

Загрузка агента из /mnt/d/Study/python/TradingAgentClassicRL/training_data/checkpoints/exp_qlearning_v1/final_agent.pkl

 НАЧАЛО ОБУЧЕНИЯ
Агент:          QLearning_v1_finetune
Эксперимент:    exp_qlearning_v1_finetune
Эпизодов:       18000
Max steps:       1000
Learning rate:  0.1
Discount:       0.95
Epsilon:        1.0 → 0.01
Eval frequency: 100
Patience:       1000
Initial balance: $1,000.00


Эпизод  8100/18000 [ 45.0%] |█████████████░░░░░░░░░░░░░░░░░|
----------------------------------------------------------------------------------------------------
НАГРАДЫ:
   Текущая:          +0.68 | Средняя (100):     -28.25 | Eval:     -23.39 ± 19.57
ПОРТФЕЛЬ:
   Значение:       $885.48 | Изменение:        -11.45%
ТОРГОВЛЯ (на основе eval):
   Сделок:            68.8 | Win Rate:        80.4% | Profit Factor:   0.90
   Avg PnL:      $    -1.22 | Total PnL:     $   -94.92
ПАРАМЕТРЫ:
   Epsilon:          0.8186 | Learning Rate:     0.0500 | St

{'experiment_name': 'exp_qlearning_v1_finetune',
 'log_dir': '/mnt/d/Study/python/TradingAgentClassicRL/training_data/logs/exp_qlearning_v1_finetune',
 'checkpoint_dir': '/mnt/d/Study/python/TradingAgentClassicRL/training_data/checkpoints/exp_qlearning_v1_finetune',
 'final_agent_path': '/mnt/d/Study/python/TradingAgentClassicRL/training_data/checkpoints/exp_qlearning_v1_finetune/final_agent.pkl',
 'training_time': 2038.3617041110992,
 'final_metrics': {'episode': 18000,
  'reward': -33.58395029710397,
  'steps': 1000,
  'epsilon': 0.01,
  'portfolio_value': 812.1923393473586,
  'n_trades': 62,
  'win_rate': 67.74193548387096,
  'avg_pnl': -2.946202107289227,
  'max_drawdown': 0.10697146847287241,
  'total_pnl': -182.66453065193207,
  'timestamp': 1765825724.4320982},
 'final_evaluation': {'mean_reward': -1.042534023840186,
  'std_reward': 34.19190466008278,
  'min_reward': -54.85378317594199,
  'max_reward': 45.27639785914226,
  'mean_portfolio': 1033.4135972298345,
  'mean_trades': 6

In [3]:
run_finetuning(
    agent_type="SARSA",
    df=df_finetune,
    project_root=project_root,
    env_params=env_params,
    train_params=finetune_train_params,
)


 НАЧАЛО ДООБУЧЕНИЯ: SARSA (Ep: 8000 -> 18000)

Загрузка агента из /mnt/d/Study/python/TradingAgentClassicRL/training_data/checkpoints/exp_sarsa_v1/final_agent.pkl

 НАЧАЛО ОБУЧЕНИЯ
Агент:          SARSA_v1_finetune
Эксперимент:    exp_sarsa_v1_finetune
Эпизодов:       18000
Max steps:       1000
Learning rate:  0.1
Discount:       0.95
Epsilon:        1.0 → 0.01
Eval frequency: 100
Patience:       1000
Initial balance: $1,000.00


Эпизод  8100/18000 [ 45.0%] |█████████████░░░░░░░░░░░░░░░░░|
----------------------------------------------------------------------------------------------------
НАГРАДЫ:
   Текущая:          -5.07 | Средняя (100):     -26.15 | Eval:     -34.69 ± 11.55
ПОРТФЕЛЬ:
   Значение:       $827.14 | Изменение:        -17.29%
ТОРГОВЛЯ (на основе eval):
   Сделок:            50.6 | Win Rate:        83.1% | Profit Factor:   0.76
   Avg PnL:      $    -2.94 | Total PnL:     $  -155.96
ПАРАМЕТРЫ:
   Epsilon:          0.8186 | Learning Rate:     0.0500 | States:     35

Эп

{'experiment_name': 'exp_sarsa_v1_finetune',
 'log_dir': '/mnt/d/Study/python/TradingAgentClassicRL/training_data/logs/exp_sarsa_v1_finetune',
 'checkpoint_dir': '/mnt/d/Study/python/TradingAgentClassicRL/training_data/checkpoints/exp_sarsa_v1_finetune',
 'final_agent_path': '/mnt/d/Study/python/TradingAgentClassicRL/training_data/checkpoints/exp_sarsa_v1_finetune/final_agent.pkl',
 'training_time': 2008.1495373249054,
 'final_metrics': {'episode': 18000,
  'reward': 2.6643344762759007,
  'steps': 1000,
  'epsilon': 0.01,
  'portfolio_value': 1018.2707537518288,
  'n_trades': 1,
  'win_rate': 100.0,
  'avg_pnl': 18.37075375182884,
  'max_drawdown': 0.0,
  'total_pnl': 18.37075375182884,
  'timestamp': 1765827844.035006},
 'final_evaluation': {'mean_reward': -0.7094290028646182,
  'std_reward': 1.4208418358896981,
  'min_reward': -1.828037206772096,
  'max_reward': 3.4795561515625573,
  'mean_portfolio': 1001.6599435063829,
  'mean_trades': 1.0,
  'mean_steps': 1000.0,
  'mean_total_pnl

In [None]:
run_finetuning(
    agent_type="SARSA_Lambda",
    df=df_finetune,
    project_root=project_root,
    env_params=env_params,
    train_params=finetune_train_params,
)

In [None]:
run_finetuning(
    agent_type="Monte_Carlo",
    df=df_finetune,
    project_root=project_root,
    env_params=env_params,
    train_params=finetune_train_params,
)