In [1]:
"""
Predicting SBT for the test outside the model

Random Forest
Global Feature Selection 
    - X = 'Depth (m)', 'σ,v (kPa)', 'σ',v (kPa)', 'SBT (-)', 'SBTn (-)',
    'Mod. SBTn (-)', 'Oberhollenzer_classes', 'iso_classes_encoded'.

    
    - Y = 'SBT (-)', 'SBTn (-)',
    'Mod. SBTn (-)', 'Oberhollenzer_classes', 'iso_classes_encoded'.

Link: https://github.com/DowellChan/ResNetRegression
"""
#General Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, mean_squared_log_error
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping, ModelCheckpoint, LearningRateScheduler, CSVLogger
from sklearn.model_selection import train_test_split
import time

'''
PreProcessing
''' 
#Loading Database
df_cpt=pd.read_csv(r'/content/drive/MyDrive/Colab Notebooks/Target_Encoded_Database.csv')

#Transforming to DataFrame
df_cpt = pd.DataFrame(df_cpt)
df_cpt = df_cpt.drop(columns='Unnamed: 0')

#Dropping zeros from qc
#df_cpt = df_cpt[~(df_cpt[['qc (MPa)']] == 0).any(axis = 1)]
#df_cpt = df_cpt[~(df_cpt[['qc (MPa)']] < 0).any(axis = 1)]
#df_cpt = df_cpt[~(df_cpt[['SBT (-)']] == 0).any(axis = 1)]
#df_cpt = df_cpt[~(df_cpt[['SBTn (-)']] == 0).any(axis = 1)]
#df_cpt = df_cpt[~(df_cpt[['Mod. SBTn (-)']] == 0).any(axis = 1)]

#Transforming to NumpyArray
df_cpt = df_cpt.to_numpy()

'''
Inputs
'''
X = df_cpt[:, [1, 11, 17]]

#1 --> 'Depth (m)' --> index: 1
#2 --> 'σ,v (kPa)' --> index: 9
#3 --> "σ',v (kPa)" --> index: 11
#4 --> 'SBT (-)' --> index: 17


#X=X.to(device) #To GPU

'''
Outputs
'''
y = df_cpt[:, [18, 19]] 

#5 --> 'SBTn (-)' --> index: 18
#6 --> 'Mod. SBTn (-)' --> index: 19 
#7 --> 'Ic (-)' --> index: 21
#8 --> 'Ic SBT (-)' --> index:22 
#9 --> 'Ib (-)' --> index: 23
#10 --> 'Oberhollenzer_classes' --> index: 24
#11 --> 'iso_classes_encoded' --> index: 27

#y=y.to(device) #To GPU

#Scaling the and preparing the data
#scalerX = MinMaxScaler()
#scalerY = MinMaxScaler()

#Scaling the Data
#scaledX=scalerX.fit_transform(X)
#scaledY=scalerY.fit_transform(y)

#Train & Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                   test_size = 0.20)

In [None]:
from sklearn.ensemble import RandomForestRegressor
#Random forest Model
model = RandomForestRegressor(bootstrap = 'True', n_estimators = 40, max_depth = 30,
                              min_samples_split = 15, min_samples_leaf = 2)


#Initial time
start_time = time.time()

#Fitting the model
model.fit(X_train, y_train)

#Final time
end_time = (time.time() - start_time)
print('The running time is: %.2f minutes' %(end_time/60))

The running time is: 1.09 minutes


In [None]:
#Making a prediction
y_pred = model.predict(X_test)

In [None]:
#Metrics Global
print('Metrics Global')
mae = mean_absolute_error(y_test, y_pred)
print('The mae is: %.4f' %mae)
mse = mean_squared_error(y_test, y_pred)
print('The mse is: %.4f' %mse)
rmse = np.sqrt(mse).mean()
print('The rmse is: %.4f' %rmse)
r2 = r2_score(y_test, y_pred)
print('The R2 is: %.4f' %r2)

print('____________________________________________')

#Metrics for the SBTn
print('Metrics for SBTn')
mae_SBTn = mean_absolute_error(y_test[:, 0], y_pred[:, 0])
print('The mae is: %.4f' %mae_SBTn)
mse_SBTn = mean_squared_error(y_test[:, 0], y_pred[:, 0])
print('The mse is: %.4f' %mse_SBTn)
rmse_SBTn = np.sqrt(mse).mean()
print('The rmse is: %.4f' %rmse_SBTn)
r2_SBTn = r2_score(y_test[:, 0], y_pred[:, 0])
print('The R2 is: %.4f' %r2_SBTn)

print('____________________________________________')

#Metrics for the Mod.SBTn
print('Metrics for Mod.SBTn')
mae_ModSBTn = mean_absolute_error(y_test[:, 1], y_pred[:, 1])
print('The mae is: %.4f' %mae_ModSBTn)
mse_ModSBTn = mean_squared_error(y_test[:, 1], y_pred[:, 1])
print('The mse is: %.4f' %mse_ModSBTn)
rmse_ModSBTn = np.sqrt(mse).mean()
print('The rmse is: %.4f' %rmse_ModSBTn)
r2_ModSBTn= r2_score(y_test[:, 1], y_pred[:, 1])
print('The R2 is: %.4f' %r2_ModSBTn)

print('____________________________________________')

'''
#Metrics for the Iso classes
print('Metrics for Iso classes')
mae_iso = mean_absolute_error(y_test[:, 2], y_pred[:, 2])
print('The mae is: %.4f' %mae_iso)
mse_iso = mean_squared_error(y_test[:, 2], y_pred[:, 2])
print('The mse is: %.4f' %mse_iso)
rmse_iso = np.sqrt(mse).mean()
print('The rmse is: %.4f' %rmse_iso)
r2_iso = r2_score(y_test[:, 2], y_pred[:, 2])
print('The R2 is: %.4f' %r2_iso)
'''

Metrics Global
The mae is: 0.4268
The mse is: 0.5522
The rmse is: 0.7431
The R2 is: 0.8657
____________________________________________
Metrics for SBTn
The mae is: 0.3653
The mse is: 0.4663
The rmse is: 0.7431
The R2 is: 0.8397
____________________________________________
Metrics for Mod.SBTn
The mae is: 0.4884
The mse is: 0.6380
The rmse is: 0.7431
The R2 is: 0.8918
____________________________________________


"\n#Metrics for the Iso classes\nprint('Metrics for Iso classes')\nmae_iso = mean_absolute_error(y_test[:, 2], y_pred[:, 2])\nprint('The mae is: %.4f' %mae_iso)\nmse_iso = mean_squared_error(y_test[:, 2], y_pred[:, 2])\nprint('The mse is: %.4f' %mse_iso)\nrmse_iso = np.sqrt(mse).mean()\nprint('The rmse is: %.4f' %rmse_iso)\nr2_iso = r2_score(y_test[:, 2], y_pred[:, 2])\nprint('The R2 is: %.4f' %r2_iso)\n"

In [None]:
'''
Saving the Model
'''
import joblib

#Saving model as joblib file
import os
joblib.dump(model, 'CPT_RF_02_88_compress.joblib', compress = 3)
print(f"Compressed Random Forest: {np.round(os.path.getsize('CPT_RF_02_88_compress.joblib') / 1024 / 1024, 2) } MB")

Compressed Random Forest: 43.79 MB


_________________________________________________________________
Predictions for YL_C2
_________________________________________________________________

In [None]:
#Predicting Kurup

#Inputs for CPT Sounding YL-C2 
X_YL_C2 = [[1.2, 19, 4],
           [2.8, 34, 4],
           [4.2, 47, 4],
           [5.8, 61,4],
           [7.2, 73, 4],
           [8.8, 87, 3],
           [10.2, 99, 3],
           [11.8, 113, 3],
           [13.2, 128, 3],
           [16.2, 160, 5],
           [17.2, 170, 5]]

#To DataFrame
df_X_YL_C2 = pd.DataFrame(X_YL_C2)
df_X_YL_C2 = df_X_YL_C2.rename(columns = {0: 'Depth (m)', 1: "σ',v (kPa)",
                                          2: 'SBT (-)'})
df_X_YL_C2

Unnamed: 0,Depth (m),"σ',v (kPa)",SBT (-)
0,1.2,19,4
1,2.8,34,4
2,4.2,47,4
3,5.8,61,4
4,7.2,73,4
5,8.8,87,3
6,10.2,99,3
7,11.8,113,3
8,13.2,128,3
9,16.2,160,5


In [None]:
#Model predicting
y_pred_YL_C2 = model.predict(X_YL_C2)

#To DataFrame
df_y_pred_YL_C2 = pd.DataFrame(y_pred_YL_C2)
df_y_pred_YL_C2 = df_y_pred_YL_C2.rename(columns = {0: 'SBTn', 1: 'ModSBTn'}, inplace = False)
df_y_pred_YL_C2

Unnamed: 0,SBTn,ModSBTn
0,5.077208,6.206045
1,4.90207,6.4201
2,4.483422,4.905709
3,4.046529,4.749074
4,4.163549,4.419901
5,2.998937,2.053315
6,2.995348,2.125949
7,2.852336,1.998636
8,3.0,1.111758
9,4.916889,5.878194


In [None]:
#Concat DataFrames
df_YL_C2 = pd.concat([df_X_YL_C2, df_y_pred_YL_C2], axis = 1)
df_YL_C2

Unnamed: 0,Depth (m),"σ',v (kPa)",SBT (-),SBTn,ModSBTn
0,1.2,19,4,5.077208,6.206045
1,2.8,34,4,4.90207,6.4201
2,4.2,47,4,4.483422,4.905709
3,5.8,61,4,4.046529,4.749074
4,7.2,73,4,4.163549,4.419901
5,8.8,87,3,2.998937,2.053315
6,10.2,99,3,2.995348,2.125949
7,11.8,113,3,2.852336,1.998636
8,13.2,128,3,3.0,1.111758
9,16.2,160,5,4.916889,5.878194


In [None]:
#DataFrame to csv
df_YL_C2.to_csv('df_YL_C2.csv')

_________________________________________________________________
Predictions for NT-C16
_________________________________________________________________

In [None]:
#Predicting Kurup

#Inputs for CPT Sounding YL-C2 
X_NT_C16 = [[1.8, 33, 3],
            [4.2, 78, 3],
            [5.8, 103, 3],
            [7.2, 116, 3],
            [8.8, 133, 3],
            [10.2, 146, 3],
            [11.8, 162, 3],
            [13.2, 176, 3],
            [14.8, 192, 4],
            [16.2, 206, 4]]

#To DataFrame
df_X_NT_C16 = pd.DataFrame(X_NT_C16)
df_X_NT_C16 = df_X_NT_C16.rename(columns = {0: 'Depth (m)', 1: "σ',v (kPa)",
                                          2: 'SBT (-)'})
df_X_NT_C16

Unnamed: 0,Depth (m),"σ',v (kPa)",SBT (-)
0,1.8,33,3
1,4.2,78,3
2,5.8,103,3
3,7.2,116,3
4,8.8,133,3
5,10.2,146,3
6,11.8,162,3
7,13.2,176,3
8,14.8,192,4
9,16.2,206,4


In [None]:
#Model predicting
y_pred_NT_C16 = model.predict(X_NT_C16)

#To DataFrame
df_y_pred_NT_C16 = pd.DataFrame(y_pred_NT_C16)
df_y_pred_NT_C16 = df_y_pred_NT_C16.rename(columns = {0: 'SBTn', 1: 'ModSBTn'}, inplace = False)
df_y_pred_NT_C16

Unnamed: 0,SBTn,ModSBTn
0,3.457014,3.34562
1,3.246068,2.718628
2,2.730116,2.0
3,2.931658,2.247134
4,2.565247,1.77196
5,2.085848,1.669545
6,2.184908,1.324731
7,3.0,2.139048
8,3.007805,1.092278
9,3.0,1.0


In [None]:
#Concat DataFrames
df_NT_C16 = pd.concat([df_X_NT_C16, df_y_pred_NT_C16], axis = 1)
df_NT_C16

Unnamed: 0,Depth (m),"σ',v (kPa)",SBT (-),SBTn,ModSBTn
0,1.8,33,3,3.457014,3.34562
1,4.2,78,3,3.246068,2.718628
2,5.8,103,3,2.730116,2.0
3,7.2,116,3,2.931658,2.247134
4,8.8,133,3,2.565247,1.77196
5,10.2,146,3,2.085848,1.669545
6,11.8,162,3,2.184908,1.324731
7,13.2,176,3,3.0,2.139048
8,14.8,192,4,3.007805,1.092278
9,16.2,206,4,3.0,1.0


In [None]:
#DataFrame to csv
df_NT_C16.to_csv('df_NT_C16.csv')

________________________________________________________________________________
Predictions for NT-C7
________________________________________________________________________________


In [None]:
#Predicting Kurup

#Inputs for CPT Sounding NT-C7
X_NT_C7 = [[4.2, 90, 4],
           [5.8, 115, 4],
           [7.2, 130, 6],
           [8.8, 147, 6],
           [10.2, 160, 6],
           [11.8, 175, 5],
           [13.2, 189, 5],
           [14.8, 204, 3],
           [16.2, 218, 3],
           [17.8, 234, 4],
           [19.2, 247, 5]]


#To DataFrame
df_X_NT_C7 = pd.DataFrame(X_NT_C7)
df_X_NT_C7 = df_X_NT_C7.rename(columns = {0: 'Depth (m)', 1: "σ',v (kPa)",
                                          2: 'SBT (-)'})
df_X_NT_C7

Unnamed: 0,Depth (m),"σ',v (kPa)",SBT (-)
0,4.2,90,4
1,5.8,115,4
2,7.2,130,6
3,8.8,147,6
4,10.2,160,6
5,11.8,175,5
6,13.2,189,5
7,14.8,204,3
8,16.2,218,3
9,17.8,234,4


In [None]:
#Model predicting
y_pred_NT_C7 = model.predict(X_NT_C7)

#To DataFrame
df_y_pred_NT_C7 = pd.DataFrame(y_pred_NT_C7)
df_y_pred_NT_C7 = df_y_pred_NT_C7.rename(columns = {0: 'SBTn', 1: 'ModSBTn'}, inplace = False)
df_y_pred_NT_C7

Unnamed: 0,SBTn,ModSBTn
0,3.910527,4.255308
1,3.976949,4.779057
2,5.838932,7.0
3,5.450831,6.986371
4,5.0,6.975
5,4.954928,6.054677
6,4.679524,5.84636
7,2.601321,1.935445
8,2.0,1.035046
9,3.0,1.297191


In [None]:
#Concat DataFrames
df_NT_C7 = pd.concat([df_X_NT_C7, df_y_pred_NT_C7], axis = 1)
df_NT_C7

Unnamed: 0,Depth (m),"σ',v (kPa)",SBT (-),SBTn,ModSBTn
0,4.2,90,4,3.910527,4.255308
1,5.8,115,4,3.976949,4.779057
2,7.2,130,6,5.838932,7.0
3,8.8,147,6,5.450831,6.986371
4,10.2,160,6,5.0,6.975
5,11.8,175,5,4.954928,6.054677
6,13.2,189,5,4.679524,5.84636
7,14.8,204,3,2.601321,1.935445
8,16.2,218,3,2.0,1.035046
9,17.8,234,4,3.0,1.297191


In [None]:
#DataFrame to csv
df_NT_C7.to_csv('df_NT_C7.csv')

________________________________________________________________________________
Predictions for NT-C15
________________________________________________________________________________

In [None]:
#Predicting Kurup

#Inputs for CPT Sounding NT-C15
X_NT_C15 = [[3.8, 49, 5],
            [5.8, 69, 5],
            [7.2, 81, 6],
            [8.8, 97, 6],
            [10.5, 115, 6],
            [11.8, 128, 4],
            [13.2, 142, 4],
            [14.8, 159, 4],
            [16.2, 172, 5],
            [17.8, 188, 5]]


#To DataFrame
df_X_NT_C15 = pd.DataFrame(X_NT_C15)
df_X_NT_C15 = df_X_NT_C15.rename(columns = {0: 'Depth (m)', 1: "σ',v (kPa)",
                                          2: 'SBT (-)'})
df_X_NT_C15

Unnamed: 0,Depth (m),"σ',v (kPa)",SBT (-)
0,3.8,49,5
1,5.8,69,5
2,7.2,81,6
3,8.8,97,6
4,10.5,115,6
5,11.8,128,4
6,13.2,142,4
7,14.8,159,4
8,16.2,172,5
9,17.8,188,5


In [None]:
#Model predicting
y_pred_NT_C15 = model.predict(X_NT_C15)

#To DataFrame
df_y_pred_NT_C15 = pd.DataFrame(y_pred_NT_C15)
df_y_pred_NT_C15 = df_y_pred_NT_C15.rename(columns = {0: 'SBTn', 1: 'ModSBTn'}, inplace = False)
df_y_pred_NT_C15

Unnamed: 0,SBTn,ModSBTn
0,5.749234,6.784192
1,5.25275,6.867806
2,6.003865,6.983929
3,5.998611,6.900074
4,5.816219,6.899379
5,3.366563,3.067335
6,3.352108,2.118033
7,3.130545,1.961115
8,4.404329,5.049367
9,4.449748,4.974203


In [None]:
#Concat DataFrames
df_NT_C15 = pd.concat([df_X_NT_C15, df_y_pred_NT_C15], axis = 1)
df_NT_C15

Unnamed: 0,Depth (m),"σ',v (kPa)",SBT (-),SBTn,ModSBTn
0,3.8,49,5,5.749234,6.784192
1,5.8,69,5,5.25275,6.867806
2,7.2,81,6,6.003865,6.983929
3,8.8,97,6,5.998611,6.900074
4,10.5,115,6,5.816219,6.899379
5,11.8,128,4,3.366563,3.067335
6,13.2,142,4,3.352108,2.118033
7,14.8,159,4,3.130545,1.961115
8,16.2,172,5,4.404329,5.049367
9,17.8,188,5,4.449748,4.974203


In [2]:
#DataFrame to csv
df_NT_C15.to_csv('df_NT_C15.csv')

NameError: ignored