In [2]:
from rdkit import Chem 
from rdkit.Chem import Draw 
from ipywidgets import interact, interactive, fixed # For interactive display of conformers
from espsim import EmbedAlignConstrainedScore, EmbedAlignScore, ConstrainedEmbedMultipleConfs, GetEspSim, GetShapeSim 
import pandas as pd 
import matplotlib.pyplot as plt # For making scatter plots 
import numpy as np # editing plots and coefficient correlation 
import scipy.stats as ss # For ranking data 
from scipy.stats import spearmanr 
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
import os 

In [6]:
targets=['ace','ache','ar','cdk2'] #list the different targets in DUD folder 

for t in targets:

    # Opening active file 
    refSmiles_ac = pd.read_csv('/Users/yada/Documents/UCL/Dissertation/benchmarking_platform-master/compounds/DUD/cmp_list_DUD_' + str(t)+'_actives.dat', sep='\t', usecols=['# Mol_Title', 'ID', 'SMILES']) 
    # Opening decoy file 
    refSmiles_dc = pd.read_csv('/Users/yada/Documents/UCL/Dissertation/benchmarking_platform-master/compounds/DUD/cmp_list_DUD_' + str(t)+'_decoys.dat', sep='\t', usecols=['# Mol_Title', 'ID', 'SMILES']) #create dataframe by moving from .dat to .csv

    # Identifying which smiles are active or decoy 
    refSmiles_ac['Active_NotActive'] = 1
    refSmiles_dc['Active_NotActive']=0

    #### temporary list 
    tmpac = refSmiles_ac[0:25]
    tmpdc = refSmiles_dc[0:25]

    # Combining two files together 
    # Putting the acitve and decoy files together 
    newlist = pd.concat([tmpac, tmpdc], ignore_index=True)
    # Adding Hydrogens to the smile strings to allow alingment and calculations 
    refMols=[Chem.AddHs(Chem.MolFromSmiles(x)) for x in newlist["SMILES"]]

    
    # TEMPORARY LIST 
    tmplist = newlist[0:50]
    tmpMols = refMols[0:50]


   # Take the first 2 molecules as probe molecules 
    prbMol = []
    prbSmile = tmplist['SMILES'][:2]
    for i in prbSmile:
        prbSmileH=Chem.AddHs(Chem.MolFromSmiles(i)) 
        prbMol.append(prbSmileH)


    count = 1 

    # Creating directory
    dir = '/Users/yada/Documents/UCL/Dissertation/Benchmarking notebooks/DUD Results/'+str(t)
    if not os.path.exists(dir):
        os.mkdir(dir)
        
    rocshapelist = []
    rocesplist = []
    rocavrlist = []
        
    for i in prbMol:
        simShape,simEsp=EmbedAlignScore(i,tmpMols,getBestESP=False,prbNumConfs = 10,refNumConfs = 10,prbCharge = [],refCharges = [],metric = "carbo",integrate = "gauss",partialCharges = "gasteiger") 
    
        # adding parameters into csv file 
        data = {'Mol_Title':tmplist['# Mol_Title'],'ID':tmplist['ID'],'Smiles':tmplist['SMILES'],'Shape':simShape,'ESP':simEsp,'Active_NotActive':tmplist['Active_NotActive']}
        newfile = pd.DataFrame(data)
    
        # Adding new column for average values of simShape and simEsp
        newfile['Average'] = newfile[['Shape','ESP']].mean(axis=1)
        newfile.to_csv(str(dir) +'/TestDUD_FalseESP'+str(count)+'.csv')
    
        count = count + 1
    
            

        # calculate AUC
        auc = roc_auc_score(newfile['Active_NotActive'], newfile['Shape'])
        print (auc)
        rocshapelist.append(auc)

        auc = roc_auc_score(newfile['Active_NotActive'],newfile['ESP']) # better distinguising the difference between decoy and acitve - bigger value = more tpr than fpr
        rocesplist.append(auc)
        auc = roc_auc_score(newfile['Active_NotActive'],newfile['Average'])
        rocavrlist.append(auc) 

    data = {'Smiles':prbSmile,'ROC Shape':rocshapelist,'ROC ESP':rocesplist,'ROC Average':rocavrlist}
    newfileroc = pd.DataFrame(data)
    newfileroc.to_csv(str(dir) + '/ROCscores.csv')

hello
0.9632000000000001
hello
0.9472
hello
0.6032
hello
0.8224
hello
0.9935999999999999
hello
0.9888
hello
0.7936000000000001
hello
0.8


In [4]:
print (len(prbSmile),len(rocshapelist),len(rocesplist),len(rocavrlist))

2 1 1 1


In [9]:
shapelist = []
esplist = []
avrlist = []

for t in targets: 
    files = pd.read_csv ('/Users/yada/Documents/UCL/Dissertation/Benchmarking notebooks/DUD Results/'+str(t)+'/ROCscores.csv')

    
    
    shapelist.append (files['ROC Shape'])
    esplist.append(files['ROC ESP'])
    avrlist.append(files['ROC Average'])

shapeavr = np.mean(shapelist)
espavr = np.mean(esplist)
avravr = np.mean(avrlist)

stdshape = np.std(shapelist)
stdesp = np.std(esplist)
stdavr = np.std(avrlist)

data = {'Avr ROC Shape':[shapeavr],'Avr ROC ESP':[espavr],'Avr ROC Average':[avravr],'Std Shape':[stdshape],'Std Esp':[stdesp],'Std Avr':[stdavr]}
newfile = pd.DataFrame(data)
newfile.to_csv('/Users/yada/Documents/UCL/Dissertation/Benchmarking notebooks/DUD Results/AvrStdResults.csv')
    