In [None]:
import qlib
from qlib.config import REG_CN
from qlib.contrib.model.gbdt import LGBModel
from qlib.contrib.data.handler import Alpha158
# 修正导入路径：从qlib.backtest导入回测相关功能
from qlib.backtest import backtest as normal_backtest
from qlib.contrib.evaluate import risk_analysis
from qlib.utils import exists_qlib_data, init_instance_by_config
from qlib.workflow import R
from qlib.workflow.record_temp import SignalRecord, PortAnaRecord
from qlib.utils import flatten_dict

# 初始化QLib（注意：provider_uri需要指向你的数据目录）
# 如果你之前的数据路径是C:\Users\Administrator\.qlib\qlib_data\cn_data
# 可以改为：provider_uri=r'C:\Users\Administrator\.qlib\qlib_data\cn_data'
qlib.init(provider_uri=r'C:\Users\Administrator\.qlib\qlib_data\cn_data', region=REG_CN)
    

In [None]:
market = "csi300"
benchmark = "SH000300"
data_handler_config = {
    "start_time": "2008-01-01",
    "end_time": "2020-08-01",
    "fit_start_time": "2008-01-01",
    "fit_end_time": "2014-12-31",
    "instruments": market,
}

task = {
    "model": {
        "class": "LGBModel",
        "module_path": "qlib.contrib.model.gbdt",
        "kwargs": {
            "loss": "mse",
            "colsample_bytree": 0.8879,
            "learning_rate": 0.0421,
            "subsample": 0.8789,
            "lambda_l1": 205.6999,
            "lambda_l2": 580.9768,
            "max_depth": 8,
            "num_leaves": 210,
            "num_threads": 20,
        },
    },
    "dataset": {
        "class": "DatasetH",
        "module_path": "qlib.data.dataset",
        "kwargs": {
            "handler": {
                "class": "Alpha158",
                "module_path": "qlib.contrib.data.handler",
                "kwargs": data_handler_config,
            },
            "segments": {
                "train": ("2008-01-01", "2014-12-31"),
                "valid": ("2015-01-01", "2016-12-31"),
                "test": ("2017-01-01", "2020-08-01"),
            },
        },
    },
}
# model initiaiton
model = init_instance_by_config(task["model"])
dataset = init_instance_by_config(task["dataset"])

In [None]:
import qlib
import mlflow  # 直接使用MLflow API
from mlflow.entities import Experiment  # MLflow实验类
from qlib.config import REG_CN
from qlib.contrib.model.gbdt import LGBModel
from qlib.contrib.data.handler import Alpha158
from qlib.utils import init_instance_by_config
from qlib.workflow import R

# 初始化QLib
qlib.init(
    provider_uri=r'C:\Users\Administrator\.qlib\qlib_data\cn_data',
    region=REG_CN
)

# 配置参数（与之前一致）
market = "csi300"
data_handler_config = {
    "start_time": "2008-01-01",
    "end_time": "2020-08-01",
    "fit_start_time": "2008-01-01",
    "fit_end_time": "2014-12-31",
    "instruments": market,
}

task = {
    "model": {
        "class": "LGBModel",
        "module_path": "qlib.contrib.model.gbdt",
        "kwargs": {
            "loss": "mse",
            "colsample_bytree": 0.8879,
            "learning_rate": 0.0421,
            "subsample": 0.8789,
            "lambda_l1": 205.6999,
            "lambda_l2": 580.9768,
            "max_depth": 8,
            "num_leaves": 210,
            "num_threads": 20,
        },
    },
    "dataset": {
        "class": "DatasetH",
        "module_path": "qlib.data.dataset",
        "kwargs": {
            "handler": {"class": "Alpha158", "module_path": "qlib.contrib.data.handler", "kwargs": data_handler_config},
            "segments": {"train": ("2008-01-01", "2014-12-31"), "valid": ("2015-01-01", "2016-12-31"), "test": ("2017-01-01", "2020-08-01")},
        },
    },
}

# 初始化模型和数据集
model = init_instance_by_config(task["model"])
dataset = init_instance_by_config(task["dataset"])

# --------------------------
# 关键：用MLflow直接创建实验
# --------------------------
# 实验名称和存储路径（解决参数缺失问题）
experiment_name = "train_model"
artifact_location = "mlruns/train_model"  # 模型存储路径

# 直接创建新实验（避免读取旧实验的问题）
exp_id = mlflow.create_experiment(
    name=experiment_name,
    artifact_location=artifact_location  # 显式传递两个必要参数
)


# 启动MLflow运行（替代QLib的R.start()）
with mlflow.start_run(experiment_id=exp_id) as run:
    # 将QLib的记录器绑定到MLflow运行
    R.set_experiment_id(exp_id)
    R.set_recorder_id(run.info.run_id)
    
    # 训练模型
    model.fit(dataset)
    
    # 保存模型到MLflow（即artifact_location路径）
    mlflow.pyfunc.log_model(
        artifact_path="trained_model",
        python_model=model  # 保存QLib的LGBModel
    )
    
    # 输出关键信息
    print(f"训练完成！")
    print(f"实验ID: {exp_id}")
    print(f"记录器ID: {run.info.run_id}")
    print(f"模型存储路径: {artifact_location}/{run.info.run_id}/artifacts/trained_model/")

In [None]:
import qlib
from qlib.config import REG_CN
from qlib.utils import init_instance_by_config
from qlib.workflow import R
import mlflow

# 初始化QLib
qlib.init(
    provider_uri=r'C:\Users\Administrator\.qlib\qlib_data\cn_data',
    region=REG_CN
)

# 配置参数
market = "csi300"
data_handler_config = {
    "start_time": "2008-01-01",
    "end_time": "2020-08-01",
    "fit_start_time": "2008-01-01",
    "fit_end_time": "2014-12-31",
    "instruments": market,
}

task = {
    "model": {
        "class": "LGBModel",
        "module_path": "qlib.contrib.model.gbdt",
        "kwargs": {
            "loss": "mse",
            "colsample_bytree": 0.8879,
            "learning_rate": 0.0421,
            "subsample": 0.8789,
            "lambda_l1": 205.6999,
            "lambda_l2": 580.9768,
            "max_depth": 8,
            "num_leaves": 210,
            "num_threads": 20,
        },
    },
    "dataset": {
        "class": "DatasetH",
        "module_path": "qlib.data.dataset",
        "kwargs": {
            "handler": {"class": "Alpha158", "module_path": "qlib.contrib.data.handler", "kwargs": data_handler_config},
            "segments": {"train": ("2008-01-01", "2014-12-31"), "valid": ("2015-01-01", "2016-12-31"), "test": ("2017-01-01", "2020-08-01")},
        },
    },
}

# 初始化模型和数据集
model = init_instance_by_config(task["model"])
dataset = init_instance_by_config(task["dataset"])



In [None]:

import os  # 导入 os 模块

# --------------------------
# 关键：手动创建 mlruns 文件夹
# --------------------------
# 在调用 MLflow 前，确保 mlruns 目录存在
if not os.path.exists("mlruns"):
    os.makedirs("mlruns")

# --------------------------
# 关键：用MLflow直接创建实验
# --------------------------
experiment_name = "train_model"

# 用 set_experiment，如果不存在会自动创建
mlflow.set_experiment(experiment_name)

# 启动MLflow运行
with mlflow.start_run() as run:  # 不需要 experiment_id
    R.set_experiment_id(run.info.experiment_id)  # 从 run 里获取
    R.set_recorder_id(run.info.run_id)

    model.fit(dataset)

    # 保存模型
    mlflow.sklearn.log_model(model.model, "lgb_model")

    print(f"训练完成！")
    print(f"实验ID: {run.info.experiment_id}")
    print(f"记录器ID: {run.info.run_id}")


In [None]:
import os
import shutil

# 清理所有 MLflow 相关目录
mlflow_dirs = ['mlruns', '.trash']
for dir_name in mlflow_dirs:
    if os.path.exists(dir_name):
        shutil.rmtree(dir_name)

In [None]:
# start exp to train model
with R.start(experiment_name="train_model"):
    R.log_params(**flatten_dict(task))
    model.fit(dataset)
    R.save_objects(trained_model=model)
    rid = R.get_recorder().id