In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

In [None]:
def Yin_Model(m1, m2, pH):
    HE = (73.465 + 53.467*m1 - 7.7315*pH + 121.6289*m2 - 48.918*m1**2 + 1.8088*pH**2 - 
            88.0723*m2**2 - 9.8753*m1*pH + 75.2511*m1*m2 - 19.6919*pH*m2) / 100
    return(HE)

def Boli_Model(pH, β):
    HE = (58.5099 + 15.5573*pH + 0.2956*pH*β - 2.3956*pH**2 - 0.0281*pH**2*β + 0.124*pH**3) / 100
    return(HE)

def Ajala_Model(m1, m2):
    HE = (89.74 + 12.76*m1 - 7.1*m2 +4.2*m1*m2 - 6.25*m1**2 + 0.39*m2**2) / 100
    return(HE)

In [None]:
#Load dataset
dataset_MNPs = pd.read_excel('./Data/MNPs_dataset.xlsx', sheet_name='MNPs_13')
X = dataset_MNPs.iloc[:,0:13].values
y = dataset_MNPs.iloc[:,13].values

In [None]:
#Test the reported models (baselines) on this dataset

rs = np.arange(50)
metric = {
    'r2_Yin':[],
    'r2_Boli':[],
    'r2_Ajala':[],
    'rmse_Yin':[],
    'rmse_Boli':[],
    'rmse_Ajala':[],
    'mae_Yin':[],
    'mae_Boli':[],
    'mae_Ajala':[]
}

for i in rs:
    
    X_test, y_test = np.array([]),np.array([])
    
    r2_Yin = np.array([])
    r2_Boli = np.array([])
    r2_Ajala = np.array([])
    
    rmse_Yin = np.array([])
    rmse_Boli = np.array([])
    rmse_Ajala = np.array([])
    
    mae_Yin = np.array([])
    mae_Boli = np.array([])
    mae_Ajala = np.array([])
    
    _, X_test, _, y_test = train_test_split(X, y, test_size=0.20, random_state=i)
     
    HE_Yin = Yin_Model(X_test[:,0]/1000, X_test[:,7]/1000, X_test[:,10])
    HE_Boli = Boli_Model(X_test[:,10], X_test[:,12])
    HE_Ajala = Ajala_Model(X_test[:,0]/1000, X_test[:,7]/1000)

    r2_Yin = r2_score(y_test, HE_Yin)
    r2_Boli = r2_score(y_test, HE_Boli)
    r2_Ajala = r2_score(y_test, HE_Ajala)
    
    rmse_Yin = mean_squared_error(y_test, HE_Yin)**0.5
    rmse_Boli = mean_squared_error(y_test, HE_Boli)**0.5
    rmse_Ajala = mean_squared_error(y_test, HE_Ajala)**0.5
    
    mae_Yin = mean_absolute_error(y_test, HE_Yin)
    mae_Boli = mean_absolute_error(y_test, HE_Boli)
    mae_Ajala = mean_absolute_error(y_test, HE_Ajala)
    
    metric['r2_Yin'].append(r2_Yin)
    metric['r2_Boli'].append(r2_Boli)
    metric['r2_Ajala'].append(r2_Ajala)
    
    metric['rmse_Yin'].append(rmse_Yin)
    metric['rmse_Boli'].append(rmse_Boli)
    metric['rmse_Ajala'].append(rmse_Ajala)
    
    metric['mae_Yin'].append(mae_Yin)
    metric['mae_Boli'].append(mae_Boli)
    metric['mae_Ajala'].append(mae_Ajala)

In [None]:
metric_df = pd.DataFrame(metric)
#metric_df.to_excel('./Data_saved/baseline_metrics.xlsx')

In [None]:
metric_df.describe()