## Histone deacetylase 1 - part 2 (MMP identification)

### Import libraries

In [1]:
import pandas as pd

from wizepair2.mmp import MMP

In [2]:
from pandarallel import pandarallel
pandarallel.initialize(progress_bar=True)

INFO: Pandarallel will run on 10 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.


### Read in the dataset

In [3]:
df = pd.read_csv('hdac1_inhibitors_stripped.csv')
len(df.index)

108

### Create cartesian product of unique molecules tested in the same assay

In [4]:
df_pairs = pd.merge(df, df, on='assay_chembl_id')
df_pairs = df_pairs[['stripped_smiles_x', 'stripped_smiles_y']].drop_duplicates()
df_pairs

Unnamed: 0,stripped_smiles_x,stripped_smiles_y
0,CC(C)N1CCN(Cc2cnc(-c3ccc(C(=O)Nc4ccccc4N)cc3)c...,CC(C)N1CCN(Cc2cnc(-c3ccc(C(=O)Nc4ccccc4N)cc3)c...
1,CC(C)N1CCN(Cc2cnc(-c3ccc(C(=O)Nc4ccccc4N)cc3)c...,CCN1CCN(Cc2cnc(-c3ccc(C(=O)Nc4ccccc4N)cc3)c(C#...
2,CCN1CCN(Cc2cnc(-c3ccc(C(=O)Nc4ccccc4N)cc3)c(C#...,CC(C)N1CCN(Cc2cnc(-c3ccc(C(=O)Nc4ccccc4N)cc3)c...
3,CCN1CCN(Cc2cnc(-c3ccc(C(=O)Nc4ccccc4N)cc3)c(C#...,CCN1CCN(Cc2cnc(-c3ccc(C(=O)Nc4ccccc4N)cc3)c(C#...
4,CCN1CCN(Cc2cnc(-c3ccc(C(=O)Nc4ccccc4N)cc3)c(Cl...,CCN1CCN(Cc2cnc(-c3ccc(C(=O)Nc4ccccc4N)cc3)c(Cl...
...,...,...
120,CC(C)N1CCN(Cc2cnc(-c3ccc(C(=O)Nc4ccccc4N)cc3)c...,Cc1cc(CN2CCN(C(C)C)CC2)cnc1-c1ccc(C(=O)Nc2cccc...
121,CC(C)N1CCN(Cc2cnc(-c3ccc(C(=O)Nc4ccccc4N)cc3)c...,Cc1cc(CN2CCC2)cnc1-c1ccc(C(=O)Nc2ccccc2N)cc1
122,CC(C)N1CCN(Cc2cnc(-c3ccc(C(=O)Nc4ccccc4N)cc3)c...,CC(C)N1CCN(Cc2cnc(-c3ccc(C(=O)Nc4ccccc4N)cc3)c...
123,CC(C)N1CCN(Cc2cnc(-c3ccc(C(=O)Nc4ccccc4N)cc3)c...,Nc1ccccc1NC(=O)c1ccc(-c2ncc(CN3CCC3)cc2F)cc1


### Identify all pairs

In [5]:
df_pairs = df_pairs.sample(frac=1).parallel_apply(lambda x: MMP(x.stripped_smiles_x, x.stripped_smiles_y, strictness=7).execute(), axis=1)

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=13), Label(value='0 / 13'))), HBox…

In [6]:
df_pairs = pd.json_normalize(df_pairs.explode())
df_pairs.sample(3).transpose()

Unnamed: 0,18,390,369
smiles1,Cc1cc(CN2CCN(C(C)C)CC2)cnc1-c1ccc(C(=O)Nc2cccc...,Nc1ccccc1NC(=O)c1ccc(-c2ncc(CN3CCC3)cc2F)cc1,CC(C)N1CCN(Cc2cnc(-c3ccc(C(=O)Nc4ccccc4N)cc3)c...
smiles2,CC(C)N1CCN(Cc2cnc(-c3ccc(C(=O)Nc4ccccc4N)cc3)c...,CCN1CCN(Cc2cnc(-c3ccc(C(=O)Nc4ccccc4N)cc3)c(C#...,CC(C)N1CCN(Cc2cnc(-c3ccc(C(=O)Nc4ccccc4N)cc3)c...
percentmcs,0.941176,0.787879,0.969697
radius,4.0,4.0,1.0
valid,True,True,True
solversecs,6.231694,3.474741,3.626344
embedding,"[0, 0, 0, 0, 0, 0, 15, 196, 276, 148, 97, 0, 0...","[0, 0, 0, 0, 0, 2, 66, 246, 159, 119, 25, 0, 0...","[0, 0, 0, 0, 0, 0, 54, 210, 238, 109, 82, 0, 0..."
predsolversecs,8.932,5.277,7.56
error,,,
smirks,[H]-[#6:1]:[#6:2](:[#6:3]-[H])-[#6:4]1:[#7:5]:...,[H]-[#6:1]:[#6:2](:[#6:3]-[H])-[#6:4]1:[#7:5]:...,[#6:1]-[#17:2]>>[#6:1]-[#9:2]


### Drop failures and write output to file

In [7]:
df_pairs = df_pairs[df_pairs.valid]
df_pairs.to_csv('hdac1_inhibitors_pairs.csv', index=False)