In [None]:
import numpy as np
from rdkit import Chem, DataStructs
from rdkit.Chem import AllChem
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import r2_score
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense,Dropout
file=r"C:\Users\VISWAM\Downloads\amf.csv"
db=pd.read_csv(file)
#Converting SMILES to ECFP values 
def get_fingerprint(smiles,size=8192):
  if ((smiles is None) or (pd.isnull(smiles))):
    return np.zeros((size,))
  molecule = Chem.MolFromSmiles(smiles)
  if molecule is None:
    return np.zeros((size,))
  fingerprint = AllChem.GetMorganFingerprintAsBitVect(
      molecule, 2, size)
  arr = np.zeros((1,))
  DataStructs.ConvertToNumpyArray(fingerprint, arr)
  return arr 
cols=db.columns[[0,2,4,6,8,12]]
col=db.columns[[1,3,5,7]]
array=[]
for a in cols:
    j=[]
    for i in range(0,len(db[a])):
     temp=(get_fingerprint(db[a][i])).astype(int)
     j.append(temp)
    db[a]=j
db.fillna(0,inplace=True)
db2=pd.concat([pd.DataFrame(db["R"].values.tolist()), pd.DataFrame(db["r1"].values.tolist()),pd.DataFrame(db["r2"].values.tolist()),pd.DataFrame(db["c1"].values.tolist()),db['c1c'],pd.DataFrame(db["c2"].values.tolist()),db['c2c'],pd.DataFrame(db["p"].values.tolist()),db['t'],db['T'],db['m'],db['am'],db['l'],db['s']], axis=1)
co=["c1c","c2c","t","T"]
db2[co]=(db2[co]-db2[co].mean())/db2[co].std()
db3=db['y']
#splitting Dataset into train and test data
xtr, xte, ytr, yte = train_test_split(db2, db3, test_size=0.1, random_state=35)


In [None]:
#Regression model 
model = Sequential()
reg = tf.keras.regularizers.l1_l2(l1=0.0001, l2=0.008)
model.add(Dense(256 ,input_dim=xtr.shape[1], kernel_initializer='normal', kernel_regularizer=reg, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(384, kernel_initializer='normal', kernel_regularizer=reg, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(384, kernel_initializer='normal', kernel_regularizer=reg, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(512, kernel_initializer='normal', kernel_regularizer=reg, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(1,kernel_initializer='normal'))
model.compile(loss='mean_squared_error', optimizer='adam', metrics=[r2_keras])
model.summary()
history= model.fit(xtr, ytr, validation_data=(xte, yte), epochs=100,verbose=2)
# evaluation of the model
# ploting loss during training
plt.title('Loss')
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.savefig('regloss.png')
plt.show()
yp=model.predict(xte)
ya=model.predict(xtr)
# ploting predictions during training for test data
a = plt.axes(aspect='equal')
plt.scatter(yte, yp)
plt.xlabel('True Values')
plt.ylabel('Predictions')
lims = [0, 100]
plt.xlim(lims)
plt.ylim(lims)
plt.plot(lims, lims)
plt.savefig('testreg.png')
plt.show()
# ploting predictions during training for test data
a = plt.axes(aspect='equal')
plt.scatter(ytr, ya)
plt.xlabel('True Values')
plt.ylabel('Predictions')
lims = [0, 100]
plt.xlim(lims)
plt.ylim(lims)
plt.plot(lims, lims, color='red')
plt.savefig('trainreg.png')
plt.show()

In [None]:
#MAE of testing dataset
y=yte.values.reshape((75,1))
print("R2 Score=",r2_score(yte,yp))
er=np.abs(yp-y)
print(er.mean(),er.std(),er.T)
#MAE of training dataset
ys=ytr.values.reshape((672,1))
print("R2 Score=",r2_score(ytr,ya))
err=np.abs(ya-ys)
print(err.mean(),err.std(),er.T)