## Beta-2 adrenergic receptor - part 2 (MMP identification)

### Import libraries

In [1]:
import pandas as pd

In [2]:
from tqdm import tqdm
tqdm.pandas()

import sys
sys.path.append('/home/daniel/wizepair2')
from mmpa.mmp import MMP

### Read in the dataset

In [3]:
df = pd.read_csv('beta2_agonists_stripped.csv')
len(df.index)

7603

### Create cartesian product of unique molecules tested in the same assay

In [4]:
df_pairs = pd.merge(df, df, on='assay_chembl_id')
df_pairs = df_pairs[['stripped_smiles_x', 'stripped_smiles_y']].drop_duplicates()
df_pairs

Unnamed: 0,stripped_smiles_x,stripped_smiles_y
0,COc1cc([C@H](O)CO)ccc1O,COc1cc([C@H](O)CO)ccc1O
1,COc1cc([C@H](O)CO)ccc1O,CC(C)(C)NC[C@H](O)c1ccc(O)c(CO)c1
2,COc1cc([C@H](O)CO)ccc1O,CC[C@H](NC(C)C)[C@H](O)c1ccc(O)c(O)c1
3,COc1cc([C@H](O)CO)ccc1O,CC(C)NC[C@H](O)c1ccc2ccccc2c1
5,COc1cc([C@H](O)CO)ccc1O,NC[C@H](O)c1ccc(O)c(O)c1
...,...,...
30329,COc1cccc(CC(C)NCC(O)c2ccc(O)c(O)c2)c1,CC(Cc1ccc(O)cc1)NCC(O)c1ccc(O)c(O)c1.O=C(O)c1c...
30330,CC(Cc1ccc(O)cc1)NCC(O)c1ccc(O)c(O)c1.O=C(O)c1c...,CNCC(SC)c1ccc(O)c(O)c1
30331,CC(Cc1ccc(O)cc1)NCC(O)c1ccc(O)c(O)c1.O=C(O)c1c...,COc1cccc(CC(C)NCC(O)c2ccc(O)c(O)c2)c1
30850,CC(C)NCC(O)c1ccc(O)c(NS(C)(=O)=O)c1,CC(C)NCC(O)c1ccc(Cl)c(Cl)c1


### Identify all pairs

In [5]:
df_pairs = df_pairs.progress_apply(lambda x: MMP(x.stripped_smiles_x, x.stripped_smiles_y, fuzziness=5).execute(), axis=1)

100%|██████████| 3565/3565 [26:56<00:00,  2.20it/s]  


In [6]:
df_pairs = pd.json_normalize(df_pairs.explode())
df_pairs.sample(3).transpose()

Unnamed: 0,12744,2384,6018
smiles1,CC(C)(C)NCC(O)c1ccc(O)c(C(=O)NN)c1,COc1cc(C(O)CN)ccc1O,CC(O)c1cc(C(O)CNC(C)(C)C)ccc1O
smiles2,CC(C)(C)NCC(O)c1ccc(O)c(C(=O)NN)c1,COc1cc(C(O)CN)ccc1O,CC(O)c1cc(C(O)CNC(C)(C)C)ccc1O
percentmcs,1,1,1
radius,4,4,2
valid,False,False,False
smirks,>>,>>,>>
fragment1,,,
fragment2,,,


### Drop failures and write output to file

In [7]:
df_pairs = df_pairs[df_pairs.valid]
df_pairs.to_csv('beta2_agonists_pairs.csv', index=False)