
Molecular Docking and Virtual Screening. Task:
Perform molecular docking simulations to predict the binding
affinity between a protein target and a small molecule ligand.
Additionally, conduct virtual screening to identify potential
drug candidates. Deliverable: A report summarizing the
docking results, including the binding poses and potential lead
compounds.

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [2]:
df = pd.read_csv(r"F:\BE\My Cl2\B4\polymerase_cluster.csv")

In [3]:
df.head()

Unnamed: 0,index,ABS,0,1,2,3,4,5,6,7,...,G1,G2,G3,G4,G5,G6,G7,G8,G9,G10
0,1,abstract astrocytes produce granulocytemacroph...,-0.050448,0.017385,-0.039777,-0.067159,-0.029633,0.074573,-0.050444,-0.010799,...,0,0,0,0,0,1,0,0,0,0
1,2,abstract replication of avian infectious bronc...,-0.128422,-0.084803,0.084813,-0.013748,0.006486,0.128668,0.032655,0.066775,...,0,0,0,0,0,1,0,0,0,0
2,3,abstract the infectivity of vesicular stomatit...,-0.095019,-0.032279,0.017571,-0.06586,0.001315,0.048199,-0.031072,0.010103,...,0,0,0,0,0,1,0,0,0,0
3,4,abstract two temporally and enzymatically dist...,-0.134657,-0.086097,0.026415,-0.027553,-0.02028,-0.044637,-0.013312,-0.033345,...,0,0,0,0,0,0,1,0,0,0
4,5,abstract rnadependent rna polymerase rdrp acti...,-0.222713,-0.100353,0.107456,-0.079828,0.030818,-0.09104,-0.014809,0.012756,...,0,0,0,0,0,0,1,0,0,0


In [4]:
df.describe()

Unnamed: 0,index,0,1,2,3,4,5,6,7,8,...,G1,G2,G3,G4,G5,G6,G7,G8,G9,G10
count,1941.0,1941.0,1941.0,1941.0,1941.0,1941.0,1941.0,1941.0,1941.0,1941.0,...,1941.0,1941.0,1941.0,1941.0,1941.0,1941.0,1941.0,1941.0,1941.0,1941.0
mean,971.0,2.369912e-11,-1.597115e-11,2.575992e-12,4.636786e-12,-1.700155e-11,-2.369912e-11,-1.545595e-12,9.788769e-12,-2.885111e-11,...,0.017002,0.136528,0.134982,0.084493,0.132406,0.217929,0.098403,0.022154,0.132406,0.023699
std,560.462755,0.1332632,0.09854583,0.08434641,0.08368993,0.07889669,0.07804782,0.07508877,0.07051177,0.06963762,...,0.12931,0.343436,0.341792,0.278197,0.339019,0.412945,0.297935,0.147221,0.339019,0.152149
min,1.0,-0.284363,-0.210271,-0.2596602,-0.1993367,-0.2029868,-0.2371054,-0.2551403,-0.3557127,-0.2134723,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,486.0,-0.101437,-0.06876699,-0.056674,-0.05945025,-0.04654148,-0.05341692,-0.04745877,-0.02966343,-0.04166662,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,971.0,-0.01601933,-0.02039921,0.000497486,-0.0125031,-0.01223933,-0.006558889,-0.008344065,0.000244633,-0.004359187,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,1456.0,0.09605404,0.05053176,0.05724056,0.04785435,0.03250968,0.04813849,0.0415155,0.03008328,0.03327347,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,1941.0,0.3895761,0.3434733,0.2696224,0.3071795,0.4022183,0.3589496,0.3193979,0.4162633,0.4378696,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [5]:
df.isna().sum()

index    0
ABS      0
0        0
1        0
2        0
3        0
4        0
5        0
6        0
7        0
8        0
9        0
10       0
11       0
12       0
13       0
14       0
15       0
16       0
17       0
18       0
19       0
20       0
21       0
22       0
23       0
24       0
25       0
26       0
27       0
28       0
29       0
G1       0
G2       0
G3       0
G4       0
G5       0
G6       0
G7       0
G8       0
G9       0
G10      0
dtype: int64

In [6]:
df.columns

Index(['index', 'ABS', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
       '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22',
       '23', '24', '25', '26', '27', '28', '29', 'G1', 'G2', 'G3', 'G4', 'G5',
       'G6', 'G7', 'G8', 'G9', 'G10'],
      dtype='object')

In [7]:
x = df[['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29']]
y = df[['G1', 'G2', 'G3', 'G4', 'G5', 'G6', 'G7', 'G8', 'G9', 'G10']]

In [8]:
x.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
0,-0.050448,0.017385,-0.039777,-0.067159,-0.029633,0.074573,-0.050444,-0.010799,-0.016189,0.04083,...,0.002844,-0.001927,0.024008,0.056567,0.007435,-0.013699,-0.026742,-0.005136,0.02025,-0.006446
1,-0.128422,-0.084803,0.084813,-0.013748,0.006486,0.128668,0.032655,0.066775,0.038085,0.008261,...,0.011105,0.052557,-0.022118,0.000516,-0.057766,-0.056399,0.002794,0.006992,0.030501,-0.055875
2,-0.095019,-0.032279,0.017571,-0.06586,0.001315,0.048199,-0.031072,0.010103,-0.013713,0.00104,...,0.026309,0.031643,-0.024822,0.000128,-0.030027,-0.008914,0.017452,-0.005019,0.038202,-0.017136
3,-0.134657,-0.086097,0.026415,-0.027553,-0.02028,-0.044637,-0.013312,-0.033345,0.019789,-0.011843,...,0.027098,0.048639,0.027083,0.030807,-0.037008,-0.0121,0.005392,0.031274,0.015159,-0.002906
4,-0.222713,-0.100353,0.107456,-0.079828,0.030818,-0.09104,-0.014809,0.012756,0.103606,-0.037955,...,0.086049,-0.042678,-0.004993,-0.009535,0.008584,-0.041467,0.095225,-0.065667,0.06055,-0.006368


In [9]:
y.head()

Unnamed: 0,G1,G2,G3,G4,G5,G6,G7,G8,G9,G10
0,0,0,0,0,0,1,0,0,0,0
1,0,0,0,0,0,1,0,0,0,0
2,0,0,0,0,0,1,0,0,0,0
3,0,0,0,0,0,0,1,0,0,0
4,0,0,0,0,0,0,1,0,0,0


In [10]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [11]:
model = RandomForestClassifier(random_state=42)
model.fit(x_train, y_train)

In [12]:
y_pred = model.predict(x_test)

In [13]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy}")

Model Accuracy: 0.8508997429305912


In [14]:
class_report = classification_report(y_test, y_pred)
print("Classification Report:")
print(class_report)

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.71      0.83         7
           1       0.98      0.96      0.97        54
           2       0.96      0.96      0.96        52
           3       1.00      0.93      0.96        29
           4       0.98      0.86      0.91        50
           5       0.99      0.81      0.89        85
           6       1.00      0.81      0.90        37
           7       1.00      0.60      0.75        10
           8       0.98      0.75      0.85        59
           9       1.00      0.83      0.91         6

   micro avg       0.98      0.85      0.91       389
   macro avg       0.99      0.82      0.89       389
weighted avg       0.98      0.85      0.91       389
 samples avg       0.85      0.85      0.85       389



  _warn_prf(average, modifier, msg_start, len(result))
