# Read Data

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
def read_data(normal_target=True):
    raw = pd.read_csv('./all_data.csv')
    if normal_target:
        max_min_scaler = lambda x: (x - np.min(x)) / (np.max(x) - np.min(x))
    else:
        max_min_scaler = lambda x: (x - np.min(x)) / (np.max(x) - np.min(x)) if x.name not in {'剥落面积百分比'} else x     
    raw = raw.apply(max_min_scaler)
    return raw
original_data = read_data(normal_target=False)
target = original_data.values[:, -1]
original_data = original_data.drop(columns=['剥落面积百分比', '喷涂距离(mm)', '载气', '氢气(SPLM)', '氩气(SPLM)']).values

In [3]:
dataloader = {'剥落面积百分比': []}
random_seeds = [1,7,15,36,21,2,32,42,25,18]
for random_seed in random_seeds:
    dataloader['剥落面积百分比'].append(train_test_split(original_data, target, test_size=0.33, random_state=random_seed, shuffle=True))
result = {}

# SVR

In [16]:
from sklearn.svm import SVR
model_name = 'SVR'
result[model_name] = {}
def svrHelper(target_name, x_train, x_test, y_train, y_test):
    model = SVR(C=47)
    model.fit(x_train, y_train)
    return model.score(x_test, y_test)
for target_name in dataloader:
    result[model_name][target_name] = {'kfolds': []}
    for fold in dataloader[target_name]:
        result[model_name][target_name]['kfolds'].append(svrHelper(target_name, *fold))
    result[model_name][target_name]['ave'] = np.mean(result[model_name][target_name]['kfolds'])
print(result['SVR'])

{'剥落面积百分比': {'kfolds': [0.4721149215720778, 0.749089657613509, 0.8628648319958342, 0.8535086774465285, 0.8426200183839707, 0.7654022263952412, 0.81222148552838, 0.8732835178988049, 0.766123903301073, 0.785767882113517], 'ave': 0.7782997122248937}}


# RFR

In [5]:
from sklearn.ensemble import RandomForestRegressor
model_name = 'RF'
result[model_name] = {}
def rfrHelper(target_name, x_train, x_test, y_train, y_test):
    reg1 = RandomForestRegressor(n_estimators=160)
    reg1.fit(x_train, y_train)
    return reg1.score(x_test, y_test)
for target_name in dataloader:
    result[model_name][target_name] = {'kfolds': []}
    for fold in dataloader[target_name]:
        result[model_name][target_name]['kfolds'].append(rfrHelper(target_name, *fold))
    result[model_name][target_name]['ave'] = np.mean(result[model_name][target_name]['kfolds'])
print(result['RF'])

{'剥落面积百分比': {'kfolds': [0.7550593464768459, 0.5540893833657008, 0.7023889979348472, 0.7023517221057836, 0.7273591486727636, 0.6176172010913391, 0.7374708564700554, 0.4812858375874167, 0.8133869973030967, 0.7224466488284393], 'ave': 0.6813456139836289}}


# Ridge

In [6]:
from sklearn.linear_model import Ridge
model_name = 'Ridge'
result[model_name] = {}
def ridgeHelper(target_name, x_train, x_test, y_train, y_test):
    ridge = Ridge(alpha=0.5)
    ridge.fit(x_train, y_train)
    return ridge.score(x_test, y_test)
for target_name in dataloader:
    result[model_name][target_name] = {'kfolds': []}
    for fold in dataloader[target_name]:
        result[model_name][target_name]['kfolds'].append(ridgeHelper(target_name, *fold))
    result[model_name][target_name]['ave'] = np.mean(result[model_name][target_name]['kfolds']) 
print(result['Ridge'])

{'剥落面积百分比': {'kfolds': [0.4318229989448148, 0.5441238757708674, 0.42201925548338703, 0.5230971545746919, 0.4489328453199978, 0.4961212711923133, 0.5086664330071498, 0.4560712896369272, 0.4782295684785953, 0.48848005791257176], 'ave': 0.47975647503213165}}


# Lasso

In [7]:
from sklearn.linear_model import Lasso
model_name = 'Lasso'
result[model_name] = {}
def LassoHelper(target_name, x_train, x_test, y_train, y_test):
    Lasso1 = Lasso(alpha=0.1)
    Lasso1.fit(x_train, y_train)
    return Lasso1.score(x_test, y_test)
for target_name in dataloader:
    result[model_name][target_name] = {'kfolds': []}
    for fold in dataloader[target_name]:
        result[model_name][target_name]['kfolds'].append(LassoHelper(target_name, *fold))
    result[model_name][target_name]['ave'] = np.mean(result[model_name][target_name]['kfolds']) 
print(result['Lasso'])   

{'剥落面积百分比': {'kfolds': [0.43633928477642714, 0.5312623584665136, 0.43386308503522275, 0.5187071979018043, 0.4525463831055061, 0.5018941220193447, 0.48904416029367215, 0.4583038231229878, 0.48323599985304244, 0.4484169277228287], 'ave': 0.4753613342297349}}


# SGD

In [8]:
from sklearn.linear_model import SGDRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
model_name = 'SGD'
result[model_name] = {}
def SGDHelper(target_name, x_train, x_test, y_train, y_test):
    reg = make_pipeline(StandardScaler(), SGDRegressor(max_iter=1000, tol=1e-3))
    reg.fit(x_train, y_train)
    return reg.score(x_test, y_test)
for target_name in dataloader:
    result[model_name][target_name] = {'kfolds': []}
    for fold in dataloader[target_name]:
        result[model_name][target_name]['kfolds'].append(SGDHelper(target_name, *fold))
    result[model_name][target_name]['ave'] = np.mean(result[model_name][target_name]['kfolds'])
print(result['SGD'])  


{'剥落面积百分比': {'kfolds': [0.4439485453818772, 0.5577208791784194, 0.42729028136799074, 0.5160268805180865, 0.45782965816668486, 0.4671228632797325, 0.5231517470154873, 0.41553761556676017, 0.4886300408410924, 0.5137463163757146], 'ave': 0.48110048276918455}}


#  Decison Tree

In [9]:
from sklearn import tree
model_name = 'Decision Tree'
result[model_name] = {}
def DTHelper(target_name, x_train, x_test, y_train, y_test):
    reg = tree.DecisionTreeRegressor()
    reg.fit(x_train, y_train)
    return reg.score(x_test, y_test)
for target_name in dataloader:
    result[model_name][target_name] = {'kfolds': []}
    for fold in dataloader[target_name]:
        result[model_name][target_name]['kfolds'].append(DTHelper(target_name, *fold))
    result[model_name][target_name]['ave'] = np.mean(result[model_name][target_name]['kfolds'])
print(result['Decision Tree']) 

{'剥落面积百分比': {'kfolds': [0.7756854325096938, 0.22956348086217182, 0.18993546225035296, 0.7072487699119134, 0.6562396701537565, 0.521359522015911, 0.6445529933816613, -0.1201706848600097, 0.7302161773783882, 0.5281937697061131], 'ave': 0.48628245933099523}}


# XGBoost

In [10]:
from xgboost import XGBRegressor
from sklearn.metrics import r2_score
model_name = 'XGboost'
result[model_name] = {}
def xgboostHelper(target_name, x_train, x_test, y_train, y_test):
    model = XGBRegressor(max_depth=500, learning_rate=0.01, n_estimators=500, objective="reg:squarederror")
    model.fit(x_train, y_train)
    return r2_score(y_test, model.predict(x_test))
for target_name in dataloader:
    result[model_name][target_name] = {'kfolds': []}
    for fold in dataloader[target_name]:
        result[model_name][target_name]['kfolds'].append(xgboostHelper(target_name, *fold))
    result[model_name][target_name]['ave'] = np.mean(result[model_name][target_name]['kfolds'])
print(result['XGboost']) 

{'剥落面积百分比': {'kfolds': [0.6513834179032868, 0.5810249623784707, 0.6471328015345708, 0.7081821978841345, 0.7535651467783457, 0.6046822886313392, 0.6752499568305588, 0.3358647914663877, 0.8455437900449065, 0.7045510922150481], 'ave': 0.650718044566705}}


In [11]:
import json
json.dump(result,open('./result.json','w'), ensure_ascii = False)