## Beta-2 adrenergic receptor - part 2 (MMP identification)

### Import libraries

In [1]:
import pandas as pd

In [2]:
from tqdm import tqdm
tqdm.pandas()

import sys
sys.path.append('/home/daniel/wizepair2')
from mmpa.mmp import MMP

### Read in the dataset

In [3]:
df = pd.read_csv('beta2_agonists_stripped.csv')
len(df.index)

7599

### Create cartesian product of unique molecules tested in the same assay

In [4]:
df_pairs = pd.merge(df, df, on=['target_pref_name', 'standard_type'])
df_pairs = df_pairs[['stripped_smiles_x', 'stripped_smiles_y']].drop_duplicates()
df_pairs

Unnamed: 0,stripped_smiles_x,stripped_smiles_y
0,COc1cc([C@H](O)CO)ccc1O,COc1cc([C@H](O)CO)ccc1O
1,COc1cc([C@H](O)CO)ccc1O,CC(C)(C)NC[C@H](O)c1ccc(O)c(CO)c1
2,COc1cc([C@H](O)CO)ccc1O,CC[C@H](NC(C)C)[C@H](O)c1ccc(O)c(O)c1
3,COc1cc([C@H](O)CO)ccc1O,CC(C)NC[C@H](O)c1ccc2ccccc2c1
8,COc1cc([C@H](O)CO)ccc1O,NC[C@H](O)c1ccc(O)c(O)c1
...,...,...
102145,COc1cccc(CC(C)NCC(O)c2ccc(O)c(O)c2)c1,COc1cccc(CC(C)NCC(O)c2ccc(O)c(O)c2)c1
102180,CNCC(SC)c1ccc(O)c(O)c1,CC(Cc1ccc(O)cc1)NCC(O)c1ccc(O)c(O)c1.O=C(O)c1c...
102183,COc1cccc(CC(C)NCC(O)c2ccc(O)c(O)c2)c1,CC(Cc1ccc(O)cc1)NCC(O)c1ccc(O)c(O)c1.O=C(O)c1c...
102184,CC(Cc1ccc(O)cc1)NCC(O)c1ccc(O)c(O)c1.O=C(O)c1c...,CNCC(SC)c1ccc(O)c(O)c1


### Identify all pairs

In [5]:
df_pairs = df_pairs.progress_apply(lambda x: MMP(x.stripped_smiles_x, x.stripped_smiles_y, fuzziness=5).execute(), axis=1)

100%|██████████| 4920/4920 [36:46<00:00,  2.23it/s]  


In [6]:
df_pairs = pd.json_normalize(df_pairs.explode())
df_pairs.sample(3).transpose()

Unnamed: 0,18564,8830,8260
smiles1,CC(C)NCC(O)c1ccc(O)c(O)c1,CC(C)NCC(O)c1ccc(O)c(N)c1,CC(CCCNC(=O)OCc1ccccc1)NCC(O)c1ccc(O)c(O)c1
smiles2,CC(Cc1ccc2c(c1)OCO2)NCC(O)c1ccc(O)c(O)c1,NC[C@H](O)c1ccc(O)c(O)c1,CC(CCNC(=O)OCc1ccccc1)NCC(O)c1ccc(O)c(O)c1
percentmcs,0.583333,0.6,0.642857
radius,4,2,4
valid,True,True,True
smirks,[#6:9](-[#6:3](-[#6:10](-[H])(-[H])-[H])(-[#7:...,[#6](-[#6](-[#6](-[H])(-[H])-[H])(-[#7:11](-[#...,[#6:10](-[#6:4](-[#6:5](-[#6:1](-[#6:2](-[#7:3...
fragment1,[CH3][CH2][NH][CH]([CH3])[CH3],[cH2][c]([NH2])[cH][c]([cH2])[CH]([OH])[CH2][N...,[CH3][CH]([NH2])[CH2][CH2][CH2][NH][C](=[O])[O...
fragment2,[CH3][CH2][NH][CH]([CH3])[CH2][c]1[cH][cH][c]2...,[H][C@]([OH])([CH2][NH2])[c]([cH2])[cH][c]([cH...,[CH3][CH]([NH2])[CH2][CH2][NH][C](=[O])[O][CH2...


### Drop failures and write output to file

In [7]:
df_pairs = df_pairs[df_pairs.valid]
df_pairs.to_csv('beta2_agonists_pairs.csv', index=False)