In [None]:
# Importing Libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.offsetbox import AnchoredText
import matplotlib.colors as colors
from mpl_toolkits import mplot3d
from math import sqrt
import warnings

from sklearn.metrics import mean_squared_error,r2_score,mean_absolute_error
from sklearn.model_selection import train_test_split,KFold,cross_val_score,GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder,LabelEncoder, MinMaxScaler


import tensorflow as tf
import keras
from keras import layers
from keras.models import Sequential
from keras.layers import Dense, LeakyReLU
from keras.models import load_model
from keras.wrappers.scikit_learn import KerasClassifier

import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (10,10)
plt.rcParams['savefig.dpi'] = 600
plt.rcParams["savefig.format"] = 'tiff'
warnings.filterwarnings("ignore")

In [None]:
sns.set(style='whitegrid')
sns.set_context("paper", font_scale=2)


In [None]:
# Learning Rate Scheduler
def scheduler(epoch, lr):
  if epoch < 160:
    return lr
  else:
    return lr * np.exp(-0.1)

callback = keras.callbacks.LearningRateScheduler(scheduler)



# Neural Network
def Neural_network():
    init = keras.initializers.random_normal()
    model=Sequential()
    model.add(layers.Dense(64,activation='relu',kernel_initializer=init, input_dim=scaled_DF.shape[1]))
    model.add(layers.Dropout(0.1))
    

    model.add(layers.Dense(128,activation='relu',kernel_initializer=init))
    model.add(layers.Dropout(0.1))

    
    model.add(layers.Dense(128,activation='relu',kernel_initializer=init))
    model.add(layers.Dropout(0.1))

    
    model.add(layers.Dense(1,kernel_initializer=init,activation = 'linear'))
 
    
    optimize=tf.keras.optimizers.Adam(lr = 0.001)
    
    model.compile(optimizer=optimize,
                    loss='mse',
                    )
    return model
# layers.LeakyReLU(alpha = 0.05)


In [None]:
# Funtion to train the model
def training_model(X_train,Y_train,model):
    history=model.fit(X_train,Y_train,epochs=200,batch_size=64,verbose=0,callbacks=[callback])
    return history

In [None]:
# To print losses
def plots():
    f, ax = plt.subplots(1,1)
    actual_test=np.array(scaler.inverse_transform(testY).reshape(-1,1))
    predicted_test=np.array(scaler.inverse_transform(model.predict(testX).reshape(-1,1)))

    #actual=testY
    #predicted= model.predict(testX)

    actual=np.array(scaler.inverse_transform(np.array(trainY).reshape(-1,1)))
    predicted=np.array(scaler.inverse_transform(model.predict(np.array(trainX)).reshape(-1,1)))

    plt.rcParams["figure.figsize"] = (10,10)
    plt.rcParams['savefig.dpi'] = 400
    plt.rcParams["savefig.format"] = 'tiff'

    sns.set(style='whitegrid')
    sns.set_context("paper", font_scale=2)

    plt.scatter(actual_test,predicted_test, color='seagreen', label='Test', linewidths=1, edgecolors='yellowgreen', s=75)
    sns.regplot(actual,predicted, color='crimson', label='Train', scatter_kws={'s':40, 'alpha':0.5, 'edgecolor':'black'})

    print("Mean absolute error (MAE):      %f" % mean_absolute_error(actual_test,predicted_test))
    print("Mean squared error (MSE):       %f" % mean_squared_error(actual_test,predicted_test))
    print("Root mean squared error (RMSE): %f" % sqrt(mean_squared_error(actual_test,predicted_test)))
    print("R square (R^2):                 %f" % r2_score(actual_test,predicted_test))
    
    
    plt.xlabel('Actual')
    plt.ylabel('Predicted')
    #plt.suptitle("Actual Vs Predicted")
    anchored_text = AnchoredText("R\u00b2 Score_train  "+str(round(r2_score(actual,predicted),3))+'\n'"R\u00b2 Score_test  "+str(round(r2_score(actual_test,predicted_test),3)), loc=2,prop=dict(size=15))
    ax.add_artist(anchored_text)
    
    plt.legend(loc = 9, prop={'size': 15})
    #plt.savefig(str(model)[1:6], bbox_inches='tight')

In [None]:
train_1 = pd.read_excel('train.xlsx')
test_1 = pd.read_excel('test.xlsx')

In [None]:
# Combine Train and test for feature engineering
DF_raw = pd.concat([train_1,test_1],ignore_index=True)
DF_data = DF_raw.copy()

In [None]:
DF_data.head()

In [None]:
DF_data.drop(['Name of the Molecule','Smiles', 'Solvent type'], axis=1, inplace=True)

In [None]:

# Scaling the whole DataFrame



scaler = StandardScaler()
scaled_DF = pd.DataFrame(scaler.fit_transform(DF_data.iloc[:,1:8]))
scaled_DF.columns = DF_data.iloc[:,1:8].columns

scaled_DF['Et(30)'] = scaler.fit_transform(np.array(DF_data['Et(30)']).reshape(-1,1))
scaled_DF

display(scaled_DF)

In [None]:
scaled_DF = pd.concat([scaled_DF, DF_data.iloc[:,8:]], axis=1)

In [None]:
scaled_DF.head()

In [None]:
DF_target = scaled_DF[['Et(30)']]
scaled_DF.drop('Et(30)',axis=1,inplace=True)

display(scaled_DF)
display(DF_target)

In [None]:
trainX = scaled_DF[:len(train_1)] 
testX = scaled_DF[len(train_1):]

trainY = DF_target[:len(train_1)]
testY = DF_target[len(train_1):]

In [None]:
trainX.shape

In [None]:
model = Neural_network()

kfold = KFold(n_splits=5, shuffle=True, random_state=None)

best_score = 0.1

scores = []
rmse = []
for train,valid in kfold.split(trainX,trainY):
  training_model(trainX.iloc[train],trainY.iloc[train],model)
  scores.append(r2_score(trainY.iloc[valid],model.predict(trainX.iloc[valid])))
  
  actual = trainY.iloc[valid]
  predicted = model.predict(trainX.iloc[valid])
  rmse.append(sqrt(mean_squared_error(scaler.inverse_transform(actual),scaler.inverse_transform(predicted))))
    
print("Average validation R2 score after crossvalidation : ", np.mean(scores))
print("Average validation rmse score after crossvalidation : ", np.mean(rmse))

#Train model on whole train data

model = Neural_network()
training_model(trainX,trainY,model)


actual=np.array(scaler.inverse_transform(np.array(trainY)))
predicted=np.array(scaler.inverse_transform(model.predict(np.array(trainX)).reshape(-1,1)))
model.save("nn.h5")
score = r2_score(actual,predicted)
print("\n\nTraining Accuracy : ",score) # Training Accuracy
plots()


In [None]:
model = load_model('/content/nn_0.97.h5')

In [None]:
plots()

In [None]:
model.summary()

In [None]:
from keras.utils.vis_utils import plot_model
plot_model(model, show_shapes=True, show_layer_names=True)

In [None]:
actual_test=np.array(scaler.inverse_transform(testY).reshape(-1,1))
predicted_test=np.array(scaler.inverse_transform(model.predict(testX).reshape(-1,1)))

predictions_DF = test_1[['Name of the Molecule','Solvent type', 'Smiles']]
predictions_DF['Actual'] = np.squeeze(actual_test)
predictions_DF['Predicted'] = np.squeeze(predicted_test)

predictions_DF.to_excel('predictions_test_nn.xlsx',index = False)

In [None]:
actual=np.array(scaler.inverse_transform(np.array(trainY).reshape(-1,1)))
predicted=np.array(scaler.inverse_transform(model.predict(np.array(trainX)).reshape(-1,1)))

predictions_DF = train_1[['Name of the Molecule','Solvent type', 'Smiles']]
predictions_DF['Actual'] = np.squeeze(actual)
predictions_DF['Predicted'] = np.squeeze(predicted)

predictions_DF.to_excel('predictions_train_nn.xlsx',index = False)