In [1]:
from sklearn.ensemble import RandomForestRegressor as RF
from xgboost import XGBRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score,root_mean_squared_error
from sklearn.preprocessing import StandardScaler
from skopt import BayesSearchCV
import ast


In [2]:
model_names = ["RF","XGBoost","Knn","ANN"]
model = {
    'RF': RF,
    'XGBoost': XGBRegressor,
    'Knn': KNeighborsRegressor,
    'ANN':MLPRegressor  
}
def bayesian_optimization(model_name, feature, label): 
    param_spaces = {
        'RF': {
            'n_estimators': (10, 1000),
            'max_depth': (1, 20),
            'min_samples_split': (2, 20),
            'min_samples_leaf': (1, 20),
        },
        'XGBoost': {
            'n_estimators': (10, 1000),
            'learning_rate': (0.01, 1.0),
            'max_depth': (1, 20),
            'min_child_weight': (1, 10),
            'subsample': (0.3, 1.0),
            'colsample_bytree': (0.3, 1.0),
            'reg_alpha': (0.0, 1.0),
            'reg_lambda': (0.0, 1.0)
        },
        'Knn': {
            'n_neighbors': (10, 50),
            'weights': ['uniform', 'distance'],
            'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
            'p': (1, 5),
            'leaf_size': (10, 100)
        },
      
        'ANN': {
            'hidden_layer_sizes': (20, 100),  
            'max_iter': (100, 10000), 
            'learning_rate_init': (1e-5, 1e-3, 'log-uniform'), 
            'activation': ['relu', 'tanh', 'logistic'], 
            'solver': ['adam', 'sgd'], 
            'alpha': (1e-5, 1e-2, 'log-uniform'), 
        }

    }

    bayes_search = BayesSearchCV(
        estimator=model[model_name](),
        search_spaces=param_spaces[model_name],
        n_iter=50,
        cv=5,
        n_jobs=-1,
        scoring='r2',
        random_state=42,
        verbose=0
    )
    bayes_search.fit(feature,label)
    return bayes_search.best_estimator_


In [None]:

def caculate_score_ML(file_path):
    index_name = []
    for i in range(1,19):
        index_name.append(f"data_bp_{i}")
    score_data_r2 = pd.DataFrame(index=index_name,columns=model_names)
    score_data_rmse = pd.DataFrame(index=index_name,columns=model_names)
    score_data_r2_train = pd.DataFrame(index=index_name,columns=model_names)
    score_data_rmse_train = pd.DataFrame(index=index_name,columns=model_names)
    i=1
    for root, dirs, files in os.walk(file_path):
        for file in files:
            file_path = os.path.join(root, file)
            df = pd.read_excel(file_path)
            df_new = pd.DataFrame()
            for column in df.columns:
                if type(df[column].values[1]) != str:
                    df_new.loc[:,column] = df[column]
            for column in df_new.columns:
                df_new[column] = pd.to_numeric(df_new[column], errors='coerce')  # 强制转换为数值，无法转换的值会变为 NaN
            df_new.fillna(0,inplace=True)
            label = df_new.iloc[:,-1]
            feature = df_new.iloc[:,:-1]
            x_train,x_test,y_train,y_test = train_test_split(feature,label,test_size=0.1,random_state=0)
            scaler = StandardScaler()
            scaler.fit(y_test.values.reshape(-1,1))
            y_test_ts = scaler.transform(y_test.values.reshape(-1,1))
            scaler2 = StandardScaler()
            scaler2.fit(y_train.values.reshape(-1,1)) 
            y_train_ts = scaler2.transform(y_train.values.reshape(-1,1))
            for model_name in model_names:
                model_t = bayesian_optimization(model_name,x_train,y_train)
                model_t.fit(x_train,y_train)
                y_predict = model_t.predict(x_test)
                y_train_predict = model_t.predict(x_train)
                y_predict_ts = scaler.transform(y_predict.reshape(-1,1))
                y_train_predict_ts = scaler2.transform(y_train_predict.reshape(-1,1))
                score_r2 = r2_score(y_test_ts,y_predict_ts)
                score_rmse = root_mean_squared_error(y_test_ts,y_predict_ts)
                score_r2_train = r2_score(y_train_ts,y_train_predict_ts)
                score_rmse_train = root_mean_squared_error(y_train_ts,y_train_predict_ts)
                score_data_r2.loc[f"data_bp_{i}",model_name] = score_r2
                score_data_rmse.loc[f"data_bp_{i}",model_name] = score_rmse
                score_data_r2_train.loc[f"data_bp_{i}",model_name] = score_r2_train
                score_data_rmse_train.loc[f"data_bp_{i}",model_name] = score_rmse_train
            i = i+1
            print("finish once file")
    score_data_r2.to_csv("E:\厌氧消化预测大模型\score_data_r2_ML_after_bys.csv",index=True)
    score_data_rmse.to_csv("E:\厌氧消化预测大模型\score_data_rmse_ML_after_bys.csv",index=True)
    score_data_r2_train.to_csv("E:\厌氧消化预测大模型\score_data_r2_train_ML_after_bys.csv",index=True)
    score_data_rmse_train.to_csv("E:\厌氧消化预测大模型\score_data_rmse_train_ML_after_bys.csv",index=True)
caculate_score_ML(" ")