In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.ensemble import GradientBoostingRegressor
%matplotlib inline
%config InlineBackend.figure_format = 'svg'

In [4]:
# data selection
data = pd.read_csv('data.csv')
X = data.iloc[:,:25]
y = data['PCE']

# GBDT

In [5]:
# 1. data split
X_pce = data.iloc[:, :25]  # X feature
y_pce = data['PCE']  # target feature
X_pce_train, X_pce_test, y_pce_train, y_pce_test = train_test_split(X_pce, y_pce, test_size=0.2, random_state=42)

# 2. GBDT model

# Initialize Gradient-boosted decision trees regressor
gbdt_reg_pce = GradientBoostingRegressor(learning_rate = 0.05, max_depth = 10, max_features = 'sqrt', n_estimators = 150, subsample = 0.95, random_state=42) 


gbdt_reg_pce.fit(X_pce_train, y_pce_train)



# 4. Model evaluation on test set for pce
y_pce_pred = gbdt_reg_pce.predict(X_pce_test)
mse_pce = mean_squared_error(y_pce_test, y_pce_pred)
rmse_pce = mse_pce**0.5
mae_pce = mean_absolute_error(y_pce_test, y_pce_pred)
r2_pce = r2_score(y_pce_test, y_pce_pred)

print(f"MSE: {mse_pce:.4f}, RMSE: {rmse_pce:.4f}, MAE:{mae_pce:4f}, R²: {r2_pce:.4f} ")

MSE: 3.1629, RMSE: 1.7785, MAE:1.262585, R²: 0.9263 


In [6]:
app = pd.read_csv('app.csv')
X_app = app.iloc[:,:25]
y_pred = gbdt_reg_pce.predict(X_app)
y_pred

array([13.94703734, 14.24904776, 14.19145493, 14.70372736,  9.84351978,
       10.44581167, 12.82594877])