In [None]:
!pip install keras-tuner --upgrade

In [None]:
# Importing Libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.offsetbox import AnchoredText
import matplotlib.colors as colors
from mpl_toolkits import mplot3d
from math import sqrt
import warnings
import time

from sklearn.metrics import mean_squared_error,r2_score,mean_absolute_error
from sklearn.model_selection import train_test_split,KFold,cross_val_score,GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder,LabelEncoder, MinMaxScaler


from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor,AdaBoostRegressor,ExtraTreesRegressor,BaggingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.cross_decomposition import PLSRegression
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import GridSearchCV

import tensorflow as tf
import keras
from keras import layers
from keras.models import Sequential
from keras.layers import Dense
from keras.models import load_model
from tensorflow.keras.callbacks import EarlyStopping

import pickle
import os
import random


In [None]:
seed = 115
os.environ['PYTHONHASHSEED'] = str(seed)
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)

# import torch
# torch.manual_seed(seed)

In [None]:
# To print losses
def plots():
    f, ax = plt.subplots(1,1)
    actual_test=np.array(scaler.inverse_transform(testY).reshape(-1,1))
    predicted_test=np.array(scaler.inverse_transform(model.predict(testX).reshape(-1,1)))

    actual=np.array(scaler.inverse_transform(np.array(trainY).reshape(-1,1)))
    predicted=np.array(scaler.inverse_transform(model.predict(np.array(trainX)).reshape(-1,1)))

    plt.rcParams["figure.figsize"] = (6,6)
    plt.rcParams['savefig.dpi'] = 300
    plt.rcParams["savefig.format"] = 'tiff'

    sns.set(style='whitegrid')
    sns.set_context("paper", font_scale=1.2)

    plt.scatter(actual_test,predicted_test, color='purple', label='Test', linewidths=1, edgecolors='black', s=75)
    sns.regplot(x=actual,y=predicted, color='orangered', label='Train', scatter_kws={'s':40, 'alpha':0.5, 'edgecolor':'black'})
    print("R square (R^2) for train:                 %f" % r2_score(actual,predicted))
    print("Mean absolute error (MAE):      %f" % mean_absolute_error(actual_test,predicted_test))
    print("Mean squared error (MSE):       %f" % mean_squared_error(actual_test,predicted_test))
    print("Root mean squared error (RMSE): %f" % sqrt(mean_squared_error(actual_test,predicted_test)))
    print("R square (R^2):                 %f" % r2_score(actual_test,predicted_test))


    plt.xlabel('Actual')
    plt.ylabel('Predicted')
    #plt.suptitle("Actual Vs Predicted")
    anchored_text = AnchoredText("R\u00b2 Score_train  "+str(round(r2_score(actual,predicted),3))+'\n'"R\u00b2 Score_test  "+str(round(r2_score(actual_test,predicted_test),3)), loc=2,prop=dict(size=8))
    # anchored_text = AnchoredText("R\u00b2 Score =  "+str(round(r2_score(actual_test,predicted_test),3)), loc=2,prop=dict(size=15))
    ax.add_artist(anchored_text)

    plt.legend(loc = 9, prop={'size': 8}) #labels=["Test","Train"],
    plt.tight_layout()
    plt.savefig(str(model)[1:6], bbox_inches='tight')

In [None]:
df = pd.read_excel(
    'final_data.xlsx'
)

In [None]:
with open('train_ids', 'rb') as f:
    train_ids = pickle.load(f)

Train = df[df.ID.isin(train_ids)]
Test = df[~df.ID.isin(train_ids)]

In [None]:
# Combine Train and test for feature engineering
DF_raw = pd.concat([Train,Test],ignore_index=True)
DF_data = DF_raw.copy()

In [None]:
# Removing Unwanted columns
DF_data=DF_data.drop(['SMILES', 'ID', 'Type'],axis=1)
DF_data

In [None]:

# Scaling the whole DataFrame

scaler = StandardScaler()
scaled_DF = pd.DataFrame(scaler.fit_transform(DF_data.iloc[:,1:]))
scaled_DF.columns = DF_data.iloc[:,1:].columns

scaled_DF['Sigma'] = scaler.fit_transform(np.array(DF_data['Sigma']).reshape(-1,1))
scaled_DF

display(scaled_DF)

In [None]:
DF_target = scaled_DF[['Sigma']]
scaled_DF.drop('Sigma',axis=1,inplace=True)

# display(scaled_DF)
# display(DF_target)

In [None]:
trainX = scaled_DF[:len(Train)]
testX = scaled_DF[len(Train):]

trainY = DF_target[:len(Train)]
testY = DF_target[len(Train):]

In [None]:
X_train, X_val, Y_train, Y_val = train_test_split(
    trainX, trainY,
    test_size=0.1,
    random_state=5
)

print("Training Data Shape:", X_train.shape, Y_train.shape)
print("Validation Data Shape:", X_val.shape, Y_val.shape)

In [None]:
import keras_tuner as kt
from tensorflow import keras
from tensorflow.keras import layers, Sequential
import numpy as np

# Define the learning rate scheduler
def scheduler(epoch, lr):
    if epoch < 160:
        return lr
    else:
        return lr * np.exp(-0.1)

callback = keras.callbacks.LearningRateScheduler(scheduler)

# Define the model builder for Keras Tuner
def model_builder(hp):
    init = keras.initializers.RandomNormal()
    model = Sequential()

    # Input layer
    model.add(layers.Dense(
        units=hp.Int('units_input', min_value=32, max_value=128, step=32),
        activation='relu',
        kernel_initializer=init,
        input_dim=scaled_DF.shape[1]
    ))
    model.add(layers.Dropout(hp.Float('dropout_input', min_value=0.1, max_value=0.5, step=0.1)))

    # Hidden layers
    for i in range(hp.Int('num_hidden_layers', 1, 3)):
        model.add(layers.Dense(
            units=hp.Int(f'units_hidden_{i}', min_value=64, max_value=256, step=64),
            activation='relu',
            kernel_initializer=init
        ))
        model.add(layers.Dropout(hp.Float(f'dropout_hidden_{i}', min_value=0.1, max_value=0.5, step=0.1)))

    # Output layer
    model.add(layers.Dense(1, kernel_initializer=init, activation='linear'))

    # Optimizer and learning rate
    learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)

    model.compile(optimizer=optimizer, loss='mse')
    return model

# Initialize the tuner
tuner = kt.Hyperband(
    model_builder,
    objective='val_loss',
    max_epochs=50,
    factor=3,
    directory='my_dir',
    project_name='tuning_neural_network'
)

# Search for the best hyperparameters
tuner.search(
    X_train, Y_train,
    epochs=50,
    validation_data=(X_val, Y_val),
    callbacks=[callback]
)

# Get the best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

# Build and train the best model
best_model = tuner.hypermodel.build(best_hps)
history = best_model.fit(
    X_train, Y_train,
    epochs=200,
    batch_size=32,
    validation_data=(X_val, Y_val),
    callbacks=[callback],
    verbose=1
)

# Print the optimal hyperparameters
print("Optimal Hyperparameters:")
for param, value in best_hps.values.items():
    print(f"{param}: {value}")

In [None]:
# Learning Rate Scheduler
def scheduler(epoch, lr):
  if epoch < 160:
    return lr
  else:
    return lr * np.exp(-0.1)

callback = keras.callbacks.LearningRateScheduler(scheduler)

# Neural Network
def Neural_network():
    # Initialize weights with a random normal initializer
    init = keras.initializers.RandomNormal()
    model = Sequential()

    # Input Layer
    model.add(layers.Dense(
        128,
        activation='relu',
        kernel_initializer=init,
        input_dim=scaled_DF.shape[1]
    ))
    model.add(layers.Dropout(0.1))  # 40% dropout as per optimal parameters

    # Hidden Layer
    model.add(layers.Dense(
        64,
        activation='relu',
        kernel_initializer=init
    ))
    model.add(layers.Dropout(0.1))  # 30% dropout as per optimal parameters
    model.add(layers.Dense(
            64,
            activation='relu',
            kernel_initializer=init
        ))
    model.add(layers.Dropout(0.3))

    # Output Layer
    model.add(layers.Dense(
        1,
        kernel_initializer=init,
        activation='linear'  # Linear activation for regression
    ))

    # Optimizer
    optimize = tf.keras.optimizers.Adam(learning_rate=0.001)  # Optimal learning rate

    # Compile the model
    model.compile(
        optimizer=optimize,
        loss='mse'  # Mean Squared Error for regression tasks
    )
    return model

# Training Function
def training_model(X_train, Y_train, model, X_val, Y_val):
    # Include callbacks: LearningRateScheduler and optional EarlyStopping
    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=30,
        min_delta=0.001,
        restore_best_weights=True
    )

    history = model.fit(
        X_train, Y_train,
        epochs=200,
        batch_size=32,
        validation_data=(X_val, Y_val),  # Add validation data
        verbose=1,  # Set verbose to 1 for training logs
        callbacks=[callback, early_stopping]  # Add callbacks
    )
    return history


In [None]:
model = Neural_network()
# Define Callbacks

history = training_model(X_train, Y_train, model, X_val, Y_val)

In [None]:
plots()

In [None]:
# model.save('nn_tuned.keras')

In [None]:
model = keras.models.load_model('nn_tuned.keras')

In [None]:
plots()

In [None]:
model.summary()

In [None]:
pip install pydot graphviz

In [None]:
from tensorflow.keras.utils import plot_model
plot_model(model, to_file="model_architecture.png", show_shapes=True, show_layer_names=True)