In [13]:
import json
import importlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import mlflow
import mlflow.sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from math import sqrt
import xgboost as xgb

mlflow.sklearn.autolog()

# 加载数据
data = pd.read_csv('./DATA/exported_data.csv')
X = data.drop('取引価格（総額）', axis=1)
y = data['取引価格（総額）']

# 标准化数据
X_scaler = StandardScaler()
X_scaled = X_scaler.fit_transform(X)

y_scaler = StandardScaler()
y_scaled = y_scaler.fit_transform(np.array(y).reshape(-1, 1))

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)

params = {
    'objective': 'reg:squarederror',
    'n_estimators': 3000,
    'learning_rate': 0.05,
    'max_depth': 8,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'reg_alpha': 0.1,
    'reg_lambda': 0.1,
    'verbosity': 2  # 将日志详细程度设置为 1
}


with mlflow.start_run():
    xgb_model = xgb.XGBRegressor(**params)
    xgb_model.fit(X_train, y_train, 
                  eval_set=[(X_test, y_test)], 
                  verbose=True)
    
    y_pred = xgb_model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    rmse = sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    
    mlflow.log_metric("mse", mse)
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2", r2)
    mlflow.log_metric("mae", mae)
    

    mlflow.sklearn.log_model(xgb_model, "model")
    mlflow.log_params(params)

print(f"Test MSE: {mse}")
print(f"Test RMSE: {rmse}")
print(f"Test R²: {r2}")
print(f"Test MAE: {mae}")


[0]	validation_0-rmse:0.97205
[1]	validation_0-rmse:0.95488
[2]	validation_0-rmse:0.93900
[3]	validation_0-rmse:0.92418
[4]	validation_0-rmse:0.91088
[5]	validation_0-rmse:0.89962
[6]	validation_0-rmse:0.88982
[7]	validation_0-rmse:0.88193
[8]	validation_0-rmse:0.87633
[9]	validation_0-rmse:0.86644
[10]	validation_0-rmse:0.85690
[11]	validation_0-rmse:0.84948
[12]	validation_0-rmse:0.84150
[13]	validation_0-rmse:0.83373
[14]	validation_0-rmse:0.82695
[15]	validation_0-rmse:0.82042
[16]	validation_0-rmse:0.81429
[17]	validation_0-rmse:0.80924
[18]	validation_0-rmse:0.80373
[19]	validation_0-rmse:0.79885
[20]	validation_0-rmse:0.79414
[21]	validation_0-rmse:0.79105
[22]	validation_0-rmse:0.78696
[23]	validation_0-rmse:0.78397
[24]	validation_0-rmse:0.77995
[25]	validation_0-rmse:0.77642
[26]	validation_0-rmse:0.77347
[27]	validation_0-rmse:0.77017
[28]	validation_0-rmse:0.76810
[29]	validation_0-rmse:0.76573
[30]	validation_0-rmse:0.76346
[31]	validation_0-rmse:0.76079
[32]	validation_0-

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from math import sqrt
import xgboost as xgb
import mlflow
import mlflow.sklearn

mlflow.sklearn.autolog()

# 加载数据
data = pd.read_csv('./DATA/exported_data.csv')
X = data.drop('取引価格（総額）', axis=1)
y = data['取引価格（総額）']

# 标准化数据
X_scaler = StandardScaler()
X_scaled = X_scaler.fit_transform(X)

y_scaler = StandardScaler()
y_scaled = y_scaler.fit_transform(np.array(y).reshape(-1, 1))

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)

params = {
    'objective': 'reg:squarederror',
    'n_estimators': 3000,
    'learning_rate': 0.04,
    'max_depth': 8,
    'subsample': 0.8,
    'colsample_bytree': 0.5,
    'reg_alpha': 0.1,
    'reg_lambda': 0.1,
    'verbosity': 2  # 将日志详细程度设置为 1
}

with mlflow.start_run():
    xgb_model = xgb.XGBRegressor(**params)
    xgb_model.fit(X_train, y_train, 
                  eval_set=[(X_test, y_test)], 
                  verbose=True)
    
    y_pred_scaled = xgb_model.predict(X_test)

    # 反标准化预测值和实际值
    y_pred = y_scaler.inverse_transform(y_pred_scaled.reshape(-1, 1))
    y_test_orig = y_scaler.inverse_transform(y_test)

    mse = mean_squared_error(y_test_orig, y_pred)
    rmse = sqrt(mse)
    r2 = r2_score(y_test_orig, y_pred)
    mae = mean_absolute_error(y_test_orig, y_pred)
    
    mlflow.log_metric("mse", mse)
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2", r2)
    mlflow.log_metric("mae", mae)
    
    mlflow.sklearn.log_model(xgb_model, "model")
    mlflow.log_params(params)

print(f"Test MSE: {mse}")
print(f"Test RMSE: {rmse}")
print(f"Test R²: {r2}")
print(f"Test MAE: {mae}")