In [None]:
import numpy as np
from sklearn.linear_model import LogisticRegression
import mlflow
import mlflow.sklearn
import pandas as pd
import numpy as np
from sklearn import metrics
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

from tools import clean_data,load_data,visualize_data


In [None]:
#新建一个experiment
mlflow.create_experiment("Boston-HousePrice-predict")

In [None]:
#设定当前experment
mlflow.set_experiment("Boston-HousePrice-predict")

数据预处理

In [None]:
#read csv
df = pd.read_csv("Boston Housing.csv")
X = df.drop('medv', axis=1)
y = df['medv']
#clean data
x_cleaned = clean_data(X)
#scaler the data
scaler = MinMaxScaler()
X_normalized = scaler.fit_transform(x_cleaned)
#set train ,test data set
X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, random_state=1)
#scale the data 
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

运行MLflow

In [None]:
# 定义模型评估函数 MSE
def eval_metrics(actual, pred):
    rmse = mean_squared_error(actual, pred, squared=False)
    return rmse
    
# 训练和评估模型
def train_and_log_model(model, model_name, name):
    try:
        with mlflow.start_run(run_name = name):
            model.fit(X_train_scaled, y_train)
            y_pred = model.predict(X_test_scaled)
            rmse = eval_metrics(y_test, y_pred)
            train_score = model.score(X_train_scaled,y_train)
            test_score = model.score(X_test_scaled,y_test)
            
            # 记录参数和指标
            mlflow.log_param("model", model_name)
            mlflow.log_metric("rmse", rmse)
            mlflow.log_metric("train_score",train_score)
            mlflow.log_metric("test_score",test_score)
            
            # 记录模型
            mlflow.sklearn.log_model(model, model_name)
            print(f"{model_name} RMSE: {rmse}")
            print(f"{model_name} trainscore: {train_score}")
            print(f"{model_name} testscore: {test_score}")
            
    finally:
        mlflow.end_run()

In [None]:
lr = LinearRegression()
train_and_log_model(lr, "LinearRegression","LinearRegression")
dt = DecisionTreeRegressor()
train_and_log_model(dt, "DecisionTreeRegressor","DecisionTreeRegressor")

# 随机森林
rf = RandomForestRegressor()
train_and_log_model(rf, "RandomForestRegressor","RandomForestRegressor")