In [None]:
import numpy as np
import pandas as pd
from rdkit import Chem, DataStructs
from rdkit.Chem import AllChem
from sklearn.metrics import precision_score, recall_score, confusion_matrix
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense,Dropout
from tensorflow.keras.utils import to_categorical
file=r"C:\Users\VISWAM\Downloads\amf.csv"
db=pd.read_csv(file)
#Converting SMILES to ECFP values 
def get_fingerprint(smiles,size=8192)
  if ((smiles is None) or (pd.isnull(smiles))):
    return np.zeros((size,))
  molecule = Chem.MolFromSmiles(smiles)
  if molecule is None:
    return np.zeros((size,))
  fingerprint = AllChem.GetMorganFingerprintAsBitVect(
      molecule, 2, size)
  arr = np.zeros((1,))
  DataStructs.ConvertToNumpyArray(fingerprint, arr)
  return arr 
cols=db.columns[[0,2,4,6,8,12]]
col=db.columns[[1,3,5,7]]
array=[]
for a in cols:
    j=[]
    for i in range(0,len(db[a])):
     temp=(get_fingerprint(db[a][i])).astype(int)
     j.append(temp)
    db[a]=j
db.fillna(0,inplace=True)
db1=pd.concat([pd.DataFrame(db["R"].values.tolist()), pd.DataFrame(db["r1"].values.tolist()),pd.DataFrame(db["r2"].values.tolist()),pd.DataFrame(db["c1"].values.tolist()),db['c1c'],pd.DataFrame(db["c2"].values.tolist()),db['c2c'],db['t'],db['T']], axis=1)
y=pd.factorize(db["class"])[0]
cn=['Small Group','Large Group', 'Anti Markovnokov','Markovnikov']
y = to_categorical(y)
#splitting Dataset into train and test data
xtr, xte, ytr, yte = train_test_split(db1, y, test_size=0.10, stratify=y,shuffle=True ,random_state=42)

In [None]:
#Classification model using keras
classifier = Sequential()
reg = tf.keras.regularizers.l1_l2(l1=0.00005, l2=0.004)
classifier.add(Dense(512, activation='relu', kernel_initializer='random_normal', input_dim=xtr.shape[1],kernel_regularizer=reg))
classifier.add(Dropout(0.5))
classifier.add(Dense(512, activation='relu', kernel_initializer='random_normal',kernel_regularizer=reg))
classifier.add(Dropout(0.5))
classifier.add(Dense(512, activation='relu', kernel_initializer='random_normal',kernel_regularizer=reg))
classifier.add(Dropout(0.5))
classifier.add(Dense(4, activation='softmax', kernel_initializer='random_normal'))
classifier.compile(optimizer ='adam',loss='categorical_crossentropy', metrics=['accuracy'])
history = classifier.fit(xtr, ytr, validation_data=(xte, yte), epochs=100, verbose=2)
# evaluation of the model
# ploting loss during training
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.xlabel('Epochs')
plt.ylabel('loss')
plt.legend()
plt.savefig('classloss')
plt.show()
# ploting accuracy during training
plt.plot(history.history['accuracy'], label='train')
plt.plot(history.history['val_accuracy'], label='test')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.savefig('classacc')
plt.show()

In [None]:
#Confusion matix plotting
def plot_confusion_matrix(cm,
                          target_names,pr,re,
                          title='Confusion matrix',
                          cmap=None,
                          normalize=False):
    
    accuracy = np.trace(cm) / np.sum(cm).astype('float')
    misclass = 1 - accuracy
    true_pos = np.trace(cm)
    false_pos = np.sum(cm, axis=0) - true_pos
    false_neg = np.sum(cm, axis=1) - true_pos

    p = pr
    r = re
    f1=(2*(pr*re)/(re+pr))
    if cmap is None:
        cmap = plt.get_cmap('Blues')

    plt.figure(figsize=(8,7))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()

    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names, rotation=45)
        plt.yticks(tick_marks, target_names)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]


    thresh = cm.max() / 1.5 if normalize else cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if normalize:
            plt.text(j, i, "{:0.4f}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
        else:
            plt.text(j, i, "{:,}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")


    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label\naccuracy={:0.3f}; misclass={:0.3f}; precision={:0.3f}; recall={:0.3f}\nF1-score={:0.3f}'.format(accuracy, misclass,p,r,f1))
    pyplot.savefig('classcm4',bbox_inches='tight')
    plt.show()
#plotting confusion matrix of test data
a=classifier.predict(xte)
n_values = 4; c = np.eye(n_values, dtype=int)[np.argmax(a, axis=1)]
yp=to_categorical(classifier.predict_classes(xte))
pr=precision_score(yte,yp,average='macro')
re=recall_score(yte,yp,average='macro')
results = confusion_matrix(yte.argmax(axis=1), c.argmax(axis=1))
plot_confusion_matrix(results,cn,pr,re)

In [None]:
#plotting confusion matrix of train data
a=classifier.predict(xtr)
n_values = 4; c = np.eye(n_values, dtype=int)[np.argmax(a, axis=1)]
yp=to_categorical(classifier.predict_classes(xtr))
pr=precision_score(ytr,yp,average='macro')
re=recall_score(ytr,yp,average='macro')
results = confusion_matrix(ytr.argmax(axis=1), c.argmax(axis=1))
plot_confusion_matrix(results,cn,pr,re)