In [1]:
"""
Random Forest
Global Feature Selection 
    - X = 'Depth (m)','iso_classes_encoded', 'Oberhollenzer_classes', 
    'σ,v (kPa)', 'σ',v (kPa)'.
    
    - Y = 'qc (MPa)','fs (kPa)'.

Link: https://github.com/DowellChan/ResNetRegression
"""
#General Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, mean_squared_log_error
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping, ModelCheckpoint, LearningRateScheduler, CSVLogger
from sklearn.model_selection import train_test_split
import time

'''
PreProcessing
''' 
#Loading Database
df_cpt=pd.read_csv(r'/content/drive/MyDrive/Colab Notebooks/Target_Encoded_Database.csv')

#Transforming to DataFrame
df_cpt = pd.DataFrame(df_cpt)
df_cpt = df_cpt.drop(columns='Unnamed: 0')

#Dropping zeros from qc
df_cpt = df_cpt[~(df_cpt[['qc (MPa)']] == 0).any(axis = 1)]
df_cpt = df_cpt[~(df_cpt[['qc (MPa)']] < 0).any(axis = 1)]
df_cpt = df_cpt[~(df_cpt[['SBT (-)']] == 0).any(axis = 1)]
df_cpt = df_cpt[~(df_cpt[['SBTn (-)']] == 0).any(axis = 1)]
df_cpt = df_cpt[~(df_cpt[['Mod. SBTn (-)']] == 0).any(axis = 1)]

#Transforming to NumpyArray
df_cpt = df_cpt.to_numpy()

'''
Inputs
'''
X = df_cpt[:, [11, 17, 18, 19, 27]]

#1--> 'Depth (m)' --> index: 1
#2 --> 'σ,v (kPa)' --> index: 9
#3 --> "σ',v (kPa)" --> index: 11
#4 --> 'SBT (-)' --> index: 17
#5 --> 'SBTn (-)' --> index: 18
#6 --> 'Mod. SBTn (-)' --> index: 19 
#7 --> 'Ic (-)' --> index: 21
#8 --> 'Ic SBT (-)' --> index:22 
#9 --> 'Ib (-)' --> index: 23
#10 --> 'Oberhollenzer_classes' --> index: 24
#11 --> 'iso_classes_encoded' --> index: 27

#X=X.to(device) #To GPU

'''
Outputs
'''
y = df_cpt[:, [2, 3]] 

#1 --> 'qc (MPa)' --> index: 2
#2 --> 'fs (kPa)' --> index: 3

#y=y.to(device) #To GPU

#Scaling the and preparing the data
#scalerX = MinMaxScaler()
#scalerY = MinMaxScaler()

#Scaling the Data
#scaledX=scalerX.fit_transform(X)
#scaledY=scalerY.fit_transform(y)

#Train & Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                   test_size = 0.20)

In [2]:
#CrossVal
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import cross_val_score
#Random forest Model
model = RandomForestRegressor(bootstrap = 'True', n_estimators = 1400, max_depth = 300,
                              min_samples_split = 15, min_samples_leaf = 2)

#K-Fold
cv = 6

In [3]:
from sklearn.model_selection import cross_validate
#Initial time
start_time = time.time()

#Evaluating the model
#scores = cross_val_score(model, X_train, y_train, cv = cv, scoring = ['neg_mean_squared_error', 'r2_score'])

scores = cross_validate(model, X_train, y_train, cv = cv, scoring = ('r2', 'neg_mean_squared_error'), return_train_score = True)

#Final time
end_time = (time.time() - start_time)
print('The running time is: %.2f minutes' %(end_time/60))

The running time is: 291.22 minutes


In [4]:
print(scores['train_r2'])

[0.91059721 0.91092307 0.91091807 0.91037217 0.91104733 0.9109318 ]


In [5]:
print(scores)

{'fit_time': array([2478.47093678, 2472.95064473, 2504.81663656, 2481.23757339,
       2431.30938339, 2619.39361835]), 'score_time': array([68.89406633, 71.87133193, 68.91921592, 70.96627712, 74.01795578,
       66.62108374]), 'test_r2': array([0.86761948, 0.8659593 , 0.86701887, 0.87143193, 0.86416175,
       0.86662921]), 'train_r2': array([0.91059721, 0.91092307, 0.91091807, 0.91037217, 0.91104733,
       0.9109318 ]), 'test_neg_mean_squared_error': array([-463.42810898, -458.82607286, -460.2122693 , -448.98302747,
       -466.91742592, -454.11109968]), 'train_neg_mean_squared_error': array([-299.4912655 , -299.54089983, -299.2289974 , -300.75419839,
       -298.76985862, -299.70723939])}
