In [1]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'svg'

In [2]:
# 1. data split
data = pd.read_csv('sup.csv')
X = data.iloc[:, :24]  # X feature
y = data['Gap']  # target feature
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



# Initialize Gradient-boosted decision trees regressor
gbdt_reg_gap = GradientBoostingRegressor(random_state=42, learning_rate = 0.15, max_depth = 10, max_features = 'sqrt', n_estimators = 300, subsample = 0.97) 



gbdt_reg_gap.fit(X_train, y_train)



# 4. Model evaluation on test set for gap
y_pred = gbdt_reg_gap.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = mse**0.5
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)


print(f"MSE: {mse:.4f}, RMSE: {rmse:.4f}, MAE:{mae:4f}, R²: {r2:.4f} ")


MSE: 0.0003, RMSE: 0.0164, MAE:0.005790, R²: 0.9712 


In [5]:
predict = pd.read_csv('predict.csv')
predict

Unnamed: 0,MA,FA,Cs,Pb,Sn,Other,I,Br,Po,ETL,...,Add_H,Step,Solvent,Coated1_v,Coated1_t,Coated2_v,Coated2_t,Antisolvent,Anneal_T,Anneal_t
0,0.04,0.92,0.04,0.5,0.5,0,0.93,0.07,0,0.333333,...,1,0.5,1,0.25,0.3,0,0,1,0.34,0.35
1,0.04,0.92,0.04,0.45,0.55,0,0.93,0.07,0,0.333333,...,1,0.5,1,0.25,0.3,0,0,1,0.34,0.35
2,0.04,0.92,0.04,0.35,0.65,0,0.93,0.07,0,0.333333,...,1,0.5,1,0.25,0.3,0,0,1,0.34,0.35
3,0.04,0.92,0.04,0.8,0.2,0,0.93,0.07,0,0.333333,...,1,0.5,1,0.25,0.3,0,0,1,0.34,0.35


In [6]:
pred = gbdt_reg_gap.predict(predict)
pred

array([0.53845837, 0.53193049, 0.53105133, 0.55549063])

In [15]:
data2 = pd.read_csv('data.csv')


# 1. data split
X_pce = data2.iloc[:, :25]  # X feature
y_pce = data2['PCE']  # target feature
X_pce_train, X_pce_test, y_pce_train, y_pce_test = train_test_split(X_pce, y_pce, test_size=0.2, random_state=40)

gbdt_reg_pce = GradientBoostingRegressor(random_state=42, learning_rate = 0.1, max_depth = 10, max_features = 'sqrt', n_estimators = 300, subsample = 0.95) 

gbdt_reg_pce.fit(X_pce_train, y_pce_train)

y_pce_pred = gbdt_reg_pce.predict(X_pce_test)

In [16]:
x_a = pd.read_csv('a.csv')
x_a 
y_a = gbdt_reg_pce.predict(x_a)
y_a

array([22.11412866, 20.90310698,  9.28341419, 23.34729909])