In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from sklearn.metrics import r2_score
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from edRVFL import EnsembleDeepRVFL
import warnings
warnings.filterwarnings('ignore')
from sklearn.svm import SVR
from sklearn.linear_model import SGDRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor

In [None]:
normal = True
SCALER = StandardScaler
folds = 10

num_nodes = 10  # Number of enhancement nodes.
regular_para = 1  # Regularization parameter.
weight_random_range = [-1, 1]  # Range of random weights.
bias_random_range = [0, 1]  # Range of random weights.
num_layer = 10  # Number of hidden layers


In [None]:
def dict2csv(ave_score, ave_mse, ave_mape, save_path):
    df_score = pd.DataFrame.from_dict(ave_score, orient='index')
    df_score.to_excel(f'./{save_path}_score.xlsx', index=False)
    df_mse = pd.DataFrame.from_dict(ave_mse, orient='index')
    df_mse.to_excel(f'./{save_path}_mse.xlsx', index=False)
    df_mape = pd.DataFrame.from_dict(ave_mape, orient='index')
    df_mape.to_excel(f'./{save_path}_mape.xlsx', index=False)

def read_data(file, target_name, all_targets, normal=False):
    data = pd.read_excel(file).fillna(0).astype('float64')
    targets = data.loc[:, target_name].values
    features = data.drop(columns=all_targets).values
    if normal:
        scaler_1 = SCALER()
        features = scaler_1.fit_transform(features)
        # scaler_2 = SCALER()
        # targets = scaler_2.fit_transform(targets.reshape(-1, 1)).reshape(-1)
    return features, targets


def modelHelper(model, x_train, x_test, y_train, y_test):
    if isinstance(model, EnsembleDeepRVFL):
        model = EnsembleDeepRVFL(n_nodes=num_nodes, lam=regular_para, w_random_vec_range=weight_random_range,b_random_vec_range=bias_random_range, activation='relu', n_layer=num_layer, same_feature=False,task_type='regression')
        model.train(x_train, y_train, 0)
        y_pred = model.predict(x_test)
        # result[model_name][target_name]['y'] = y_pred
        r2 = r2_score(y_test, y_pred)
        mse = mean_squared_error(y_test, y_pred)
        mape = mean_absolute_percentage_error(y_test, y_pred)
    else:
        model.fit(x_train, y_train)
        y_pred = model.predict(x_test)
        # result[model_name][target_name]['y'] = y_pred
        r2 = r2_score(y_test, y_pred)
        mse = mean_squared_error(y_test, y_pred)
        mape = mean_absolute_percentage_error(y_test, y_pred)
    return r2, mse, mape
    # print(f'[{model_name}] [{target_name}] r2 = {r2}')
    # print(f'[{model_name}] [{target_name}] MSE = {mse}')

# Run

In [None]:
file = './Data/data.xlsx'
data = pd.read_excel(file).fillna(0).astype('float64')
all_targets = data.columns.to_list()[-9:]
# data.describe().to_csv('./results/describe.csv')
result = {}
ave_score = {}
ave_mse = {}
ave_mape = {}

kf = KFold(n_splits=folds, shuffle=True, random_state=17)
# from sklearn.model_selection import LeaveOneOut
# kf = LeaveOneOut()
# folds = 'leaveone'

for target_name in all_targets:
    print(f'#########################{target_name}#################################')
    result[target_name] = {}
    ave_score[target_name] = {}
    ave_mse[target_name] = {}
    ave_mape[target_name] = {}

    features, targets = read_data(file=file, target_name=target_name, all_targets=all_targets, normal= normal)
    for model_name in ['SVR', 'Ridge', 'Lasso', 'SGD', 'DT', 'RF', 'XGboost', 'edRVFL']:
        result[target_name][model_name] = {}
        result[target_name][model_name]['scores'] = []
        result[target_name][model_name]['MSEs'] = []
        result[target_name][model_name]['MAPEs'] = []

        # Reset the models
        for fold , (train_index, test_index) in enumerate(kf.split(features)):
            models = {'SVR': SVR(gamma='auto'), 'RF': RandomForestRegressor(n_estimators=100), 'Ridge': Ridge(alpha=1),
            'Lasso': Lasso(max_iter=5000, alpha=1), 'SGD': SGDRegressor(max_iter=5000, alpha=1), 'DT': DecisionTreeRegressor(), 
            'XGboost': XGBRegressor(max_depth=50, learning_rate=0.1, n_estimators=100), 'edRVFL': EnsembleDeepRVFL(n_nodes=num_nodes, lam=regular_para,w_random_vec_range=weight_random_range,b_random_vec_range=bias_random_range, activation='relu', n_layer=num_layer, same_feature=False,task_type='regression')}
            model = models[model_name]
            x_train, y_train = features[train_index], targets[train_index]
            x_test, y_test = features[test_index], targets[test_index]
            r2, mse, mape = modelHelper(model, x_train, x_test, y_train, y_test)
            result[target_name][model_name]['scores'].append(r2)
            result[target_name][model_name]['MSEs'].append(mse)
            result[target_name][model_name]['MAPEs'].append(mape)
        ave_score[target_name][model_name] = np.mean(result[target_name][model_name]['scores'])
        ave_mse[target_name][model_name] = np.mean(result[target_name][model_name]['MSEs'])
        ave_mape[target_name][model_name] = np.mean(result[target_name][model_name]['MAPEs'])
        print(f'{model_name}: ', ave_mape[target_name][model_name])

# Record

In [None]:
if not normal:
    dict2csv(ave_score=ave_score, ave_mse=ave_mse, ave_mape=ave_mape, save_path=f'./results/nonor_{folds}fold')
elif normal and SCALER is MinMaxScaler:
    dict2csv(ave_score=ave_score, ave_mse=ave_mse, ave_mape=ave_mape, save_path=f'./results/maxmin_{folds}fold')
elif normal and SCALER is StandardScaler:
    dict2csv(ave_score=ave_score, ave_mse=ave_mse, ave_mape=ave_mape, save_path=f'./results/stand_{folds}fold_T')