In [1]:
import sys
import os
from pathlib import Path


current_dir = Path.cwd()
if current_dir.name == "notebooks":
    project_root = current_dir.parent
    sys.path.insert(0, str(project_root))
else:
    project_root = Path(os.getcwd())
    sys.path.insert(0, str(project_root))

import pandas as pd
from training import TrainingManager, TrainingConfig

TRAIN_VERSION = "v2"
N_EPISODES_INITIAL = 100
N_EPISODES_FINETUNE = 500
N_EPISODES_START_FINETUNE = N_EPISODES_INITIAL
N_EPISODES_END_FINETUNE = N_EPISODES_INITIAL + N_EPISODES_FINETUNE


data_path_2021 = project_root / "data" / "data_1h_2021.csv"
data_path_2022 = project_root / "data" / "data_1h_2022.csv"
data_path_2023 = project_root / "data" / "data_1h_2023.csv"

if not data_path_2021.exists():
    data_path_2021 = project_root.parent / "data" / "data_1h_2021.csv"
    data_path_2022 = project_root.parent / "data" / "data_1h_2022.csv"
    data_path_2023 = project_root.parent / "data" / "data_1h_2023.csv"

df1 = pd.read_csv(data_path_2021, index_col=0, parse_dates=True, date_format="iso8601")
df2 = pd.read_csv(data_path_2022, index_col=0, parse_dates=True, date_format="iso8601")
df3 = (
    pd.read_csv(data_path_2023, index_col=0, parse_dates=True, date_format="iso8601")
    if data_path_2023.exists()
    else None
)


# df_train = pd.concat([df1, df2.iloc[: len(df2) // 2]])


# if df3 is not None:
#     df_finetune = pd.concat([df2.iloc[len(df2)//2:], df3])
# else:
# df_finetune = df2.iloc[len(df2) // 2 :]

df_finetune = pd.concat([df1])

print(f"Данные для дообучения: {len(df_finetune)} строк")
print(f"Период: {df_finetune.index[0]} - {df_finetune.index[-1]}")


env_params = {
    "initial_balance": 1000.0,
    "window_size": 10,
    "commission": 0.0001,
    "slippage": 0.0001,
    "max_holding_time": 72,
    "max_drawdown_threshold": 0.08,
    "max_steps": 1000,
}


base_train_params = {
    "n_episodes": N_EPISODES_INITIAL,
    "n_episodes_start": 0,
    "max_steps": 1000,
    "eval_frequency": 100,
    "save_frequency": 500,
    "patience": 200,
    "initial_balance": 1000.0,
    **{k: env_params[k] for k in env_params if k != "max_steps"},
}


finetune_train_params = {
    "n_episodes": N_EPISODES_END_FINETUNE,
    "n_episodes_start": N_EPISODES_START_FINETUNE,
    "max_steps": 1000,
    "eval_frequency": 50,
    "save_frequency": 500,
    "patience": 200,
    "initial_balance": 1000.0,
    **{k: env_params[k] for k in env_params if k != "max_steps"},
}


agents_config = {
    "QLearning": {
        "learning_rate": 0.05,
        "discount_factor": 0.99,
        "epsilon_start": 1.0,
        "epsilon_end": 0.01,
        "epsilon_decay": 0.998,
    },
    "SARSA": {
        "learning_rate": 0.05,
        "discount_factor": 0.99,
        "epsilon_start": 1.0,
        "epsilon_end": 0.01,
        "epsilon_decay": 0.998,
    },
    "SARSA_Lambda": {
        "learning_rate": 0.02,
        "discount_factor": 0.99,
        "epsilon_start": 1.0,
        "epsilon_end": 0.01,
        "epsilon_decay": 0.998,
        "trace_decay": 0.9,
    },
    "Monte_Carlo": {
        "learning_rate": 0.02,
        "discount_factor": 0.99,
        "epsilon_start": 1.0,
        "epsilon_end": 0.05,
        "epsilon_decay": 0.999,
    },
}


def _get_agent_instance(agent_type: str, hyperparams: dict):
    if agent_type == "SARSA":
        from agents.classical.sarsa_agent import SarsaAgent

        agent = SarsaAgent()
    elif agent_type == "SARSA_Lambda":
        from agents.classical.sarsa_lambda_agent import SarsaLambdaAgent

        agent = SarsaLambdaAgent()
    elif agent_type == "QLearning":
        from agents.classical.qlearning_agent import QLearningAgent

        agent = QLearningAgent()
    elif agent_type == "Monte_Carlo":
        from agents.classical.monte_carlo_agent import MonteCarloAgent

        agent = MonteCarloAgent()
    else:
        raise ValueError(f"Unknown agent type: {agent_type}")

    for key, value in hyperparams.items():
        if hasattr(agent, key):
            setattr(agent, key, value)
    return agent


def run_finetuning(
    agent_type: str,
    df,
    project_root: str,
    env_params: dict,
    train_params: dict,
):
    hyperparams = agents_config.get(agent_type, {})

    experiment_name_base = f"exp_{agent_type.lower()}_{TRAIN_VERSION}"
    experiment_name_finetune = f"{experiment_name_base}_finetune"

    checkpoint_path = os.path.join(
        project_root,
        "training_data",
        "checkpoints",
        experiment_name_base,
        "final_agent.pkl",
    )

    if not Path(checkpoint_path).exists():
        print(f"!!! ОШИБКА: Файл чекпоинта не найден: {checkpoint_path}")
        raise FileNotFoundError("Сначала запустите и завершите ПЕРВИЧНОЕ ОБУЧЕНИЕ.")

    agent = _get_agent_instance(agent_type, hyperparams)

    continue_config = TrainingConfig(
        agent_name=f"{agent_type}_{TRAIN_VERSION}_finetune",
        agent_type=agent_type,
        **train_params,
    )

    manager = TrainingManager(
        base_log_dir=os.path.join(project_root, "training_data/logs"),
        base_checkpoint_dir=os.path.join(project_root, "training_data/checkpoints"),
    )

    print(f"\n=======================================================")
    print(
        f" НАЧАЛО ДООБУЧЕНИЯ: {agent_type} (Ep: {train_params['n_episodes_start']} -> {train_params['n_episodes']})"
    )
    print(f"=======================================================")

    return manager.continue_training(
        agent=agent,
        df=df,
        config=continue_config,
        checkpoint_path=checkpoint_path,
        experiment_name=experiment_name_finetune,
        verbose=True,
    )

Данные для дообучения: 8722 строк
Период: 28966.36 - 47558.35


In [2]:
run_finetuning(
    agent_type="QLearning",
    df=df_finetune,
    project_root=project_root,
    env_params=env_params,
    train_params=finetune_train_params,
)


 НАЧАЛО ДООБУЧЕНИЯ: QLearning (Ep: 100 -> 600)

Загрузка агента из /mnt/d/Study/Code/ml/TradingAgentClassicRL/training_data/checkpoints/exp_qlearning_v2/final_agent.pkl

 НАЧАЛО ОБУЧЕНИЯ
Агент:          QLearning_v2_finetune
Эксперимент:    exp_qlearning_v2_finetune
Эпизодов:       600
Max steps:       1000
Learning rate:  0.1
Discount:       0.95
Epsilon:        1.0 → 0.01
Eval frequency: 50
Patience:       200
Initial balance: $1,000.00


НОВАЯ ЛУЧШАЯ МОДЕЛЬ!
   Eval Reward: -22.19
   Portfolio: $892.89 (-10.71%)
   Total PnL: $-83.90

Эпизод   150/600 [ 25.0%] |███████░░░░░░░░░░░░░░░░░░░░░░░|
----------------------------------------------------------------------------------------------------
НАГРАДЫ:
   Текущая:         -43.97 | Средняя (100):     -43.97 | Eval:     -22.19 ± 22.49
ПОРТФЕЛЬ:
   Значение:       $892.89 | Изменение:        -10.71%
ТОРГОВЛЯ (на основе eval):
   Сделок:           108.4 | Win Rate:        64.5% | Profit Factor:   0.92
   Avg PnL:      $    -0.91 | Total 

{'experiment_name': 'exp_qlearning_v2_finetune',
 'log_dir': '/mnt/d/Study/Code/ml/TradingAgentClassicRL/training_data/logs/exp_qlearning_v2_finetune',
 'checkpoint_dir': '/mnt/d/Study/Code/ml/TradingAgentClassicRL/training_data/checkpoints/exp_qlearning_v2_finetune',
 'final_agent_path': '/mnt/d/Study/Code/ml/TradingAgentClassicRL/training_data/checkpoints/exp_qlearning_v2_finetune/final_agent.pkl',
 'best_agent_path': '/mnt/d/Study/Code/ml/TradingAgentClassicRL/training_data/checkpoints/exp_qlearning_v2_finetune/best_agent.pkl',
 'training_time': 288.41730260849,
 'best_val_reward': -12.27835568549915,
 'final_metrics': {'episode': 600,
  'reward': -13.002377151179433,
  'steps': 1000,
  'epsilon': 0.36751125485715885,
  'portfolio_value': 1004.9803137161223,
  'n_trades': 149,
  'win_rate': 46.97986577181208,
  'avg_pnl': 0.13847138681579718,
  'max_drawdown': 0.06046289094940942,
  'total_pnl': 20.63223663555378,
  'timestamp': 1765801935.4057178},
 'final_evaluation': {'mean_rewar