In [1]:
import json
import importlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import mlflow
import mlflow.sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from math import sqrt

mlflow.sklearn.autolog()

# 加载数据
data = pd.read_csv('./DATA/exported_data.csv')
X = data.drop('取引価格（総額）', axis=1)
y = data['取引価格（総額）']

# 标准化数据
X_scaler = StandardScaler()
X_scaled = X_scaler.fit_transform(X)

y_scaler = StandardScaler()
y_scaled = y_scaler.fit_transform(np.array(y).reshape(-1, 1))

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)

# 定义模型参数
params = {
    'n_estimators': 100,  
    'max_depth': 6,  
    'random_state': 42  
}

with mlflow.start_run():
    rf_model = RandomForestRegressor(**params)
    rf_model.fit(X_train, y_train.ravel()) 
    

    y_pred = rf_model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    rmse = sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    
    mlflow.log_metric("mse", mse)
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2", r2)
    mlflow.log_metric("mae", mae)

    mlflow.sklearn.log_model(rf_model, "model")

    mlflow.log_params(params)

print(f"Test MSE: {mse}")
print(f"Test RMSE: {rmse}")
print(f"Test R²: {r2}")
print(f"Test MAE: {mae}")

ValueError: Input X contains NaN.
RandomForestRegressor does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values