## Beta-2 adrenergic receptor - part 2 (MMP identification)

### Import libraries

In [1]:
import pandas as pd

In [2]:
from tqdm import tqdm
tqdm.pandas()

import sys
sys.path.append('/home/daniel/wizepair2')
from mmpa.mmp import MMP

### Read in the dataset

In [3]:
df = pd.read_csv('beta2_agonists_stripped.csv')
len(df.index)

7600

### Create cartesian product of unique molecules tested in the same assay

In [4]:
df_pairs = pd.merge(df, df, on=['target_pref_name', 'standard_type'])
df_pairs = df_pairs[['stripped_smiles_x', 'stripped_smiles_y']].drop_duplicates()
df_pairs

Unnamed: 0,stripped_smiles_x,stripped_smiles_y
0,COC(=O)c1cc(C(O)CN)ccc1O,COC(=O)c1cc(C(O)CN)ccc1O
1,COC(=O)c1cc(C(O)CN)ccc1O,NCC(O)c1ccc(O)c(Cl)c1
2,COC(=O)c1cc(C(O)CN)ccc1O,CCc1ccc(C(O)CN)cc1O
3,COC(=O)c1cc(C(O)CN)ccc1O,COc1ccc(C(O)CN)cc1O
4,COC(=O)c1cc(C(O)CN)ccc1O,NCC(O)c1ccc(Cl)c(O)c1
...,...,...
101939,COc1cccc(CC(C)NCC(O)c2ccc(O)c(O)c2)c1,COc1cccc(CC(C)NCC(O)c2ccc(O)c(O)c2)c1
102026,CNCC(SC)c1ccc(O)c(O)c1,CC(Cc1ccc(O)cc1)NCC(O)c1ccc(O)c(O)c1.O=C(O)c1c...
102029,COc1cccc(CC(C)NCC(O)c2ccc(O)c(O)c2)c1,CC(Cc1ccc(O)cc1)NCC(O)c1ccc(O)c(O)c1.O=C(O)c1c...
102030,CC(Cc1ccc(O)cc1)NCC(O)c1ccc(O)c(O)c1.O=C(O)c1c...,CNCC(SC)c1ccc(O)c(O)c1


### Identify all pairs

In [5]:
df_pairs = df_pairs.sample(frac=1).progress_apply(lambda x: MMP(x.stripped_smiles_x, x.stripped_smiles_y, strictness=4).execute(), axis=1)

100%|██████████| 4920/4920 [29:00<00:00,  2.83it/s]  


In [6]:
df_pairs = pd.json_normalize(df_pairs.explode())
df_pairs.sample(3).transpose()

Unnamed: 0,9345,15123,15641
smiles1,COC(=O)c1cc(C(O)CN)ccc1O,CC(CCCC(=O)O)NCC(O)c1ccc(O)c(O)c1,CNCC(O)c1ccc(OC(=O)C(C)(C)C)c(OC(=O)C(C)(C)C)c1
smiles2,NCC(O)c1ccc(O)c(I)c1,CC(CCCCC(=O)O)NCC(O)c1ccc(O)c(O)c1,CNCC(O)c1ccc(O)c(O)c1
percentmcs,0.733333,0.809524,0.44
radius,3,1,3
valid,True,True,True
smirks,[#6](-[#8]-[#6:5](=[#8])-[#6:10](:[#6:12](:[#6...,[#6:6](-[#6:4](=[#8:2])-[#8]-[H])(-[H])-[H]>>[...,[#6:7]1:[#6:6](:[#6:10](:[#6:8](-[#8:9]-[#6](=...
fragment1,[cH2][cH][c]([C](=[O])[O][CH3])[c]([cH2])[OH],[CH3][C](=[O])[OH],[CH3][C]([CH3])([CH3])[C](=[O])[O][c]1[cH][cH]...
fragment2,[cH2][cH][c]([I])[c]([cH2])[OH],[CH3][CH2][C](=[O])[OH],[OH][c]1[cH][cH][cH][cH][c]1[OH]


### Drop failures and write output to file

In [7]:
df_pairs = df_pairs[df_pairs.valid]
df_pairs.to_csv('beta2_agonists_pairs.csv', index=False)