## Beta-2 adrenergic receptor - part 2 (MMP identification)

### Import libraries

In [1]:
import pandas as pd

In [2]:
from tqdm import tqdm
tqdm.pandas()

import sys
sys.path.append('/home/daniel/wizepair2')
from mmpa.mmp import MMP

### Read in the dataset

In [3]:
df = pd.read_csv('beta2_agonists_stripped.csv')
len(df.index)

7600

### Create cartesian product of unique molecules tested in the same assay

In [4]:
df_pairs = pd.merge(df, df, on=['target_pref_name', 'standard_type'])
df_pairs = df_pairs[['stripped_smiles_x', 'stripped_smiles_y']].drop_duplicates()
df_pairs

Unnamed: 0,stripped_smiles_x,stripped_smiles_y
0,COC(=O)c1cc(C(O)CN)ccc1O,COC(=O)c1cc(C(O)CN)ccc1O
1,COC(=O)c1cc(C(O)CN)ccc1O,NCC(O)c1ccc(O)c(Cl)c1
2,COC(=O)c1cc(C(O)CN)ccc1O,CCc1ccc(C(O)CN)cc1O
3,COC(=O)c1cc(C(O)CN)ccc1O,COc1ccc(C(O)CN)cc1O
4,COC(=O)c1cc(C(O)CN)ccc1O,NCC(O)c1ccc(Cl)c(O)c1
...,...,...
101939,COc1cccc(CC(C)NCC(O)c2ccc(O)c(O)c2)c1,COc1cccc(CC(C)NCC(O)c2ccc(O)c(O)c2)c1
102026,CNCC(SC)c1ccc(O)c(O)c1,CC(Cc1ccc(O)cc1)NCC(O)c1ccc(O)c(O)c1.O=C(O)c1c...
102029,COc1cccc(CC(C)NCC(O)c2ccc(O)c(O)c2)c1,CC(Cc1ccc(O)cc1)NCC(O)c1ccc(O)c(O)c1.O=C(O)c1c...
102030,CC(Cc1ccc(O)cc1)NCC(O)c1ccc(O)c(O)c1.O=C(O)c1c...,CNCC(SC)c1ccc(O)c(O)c1


### Identify all pairs

In [5]:
df_pairs = df_pairs.sample(frac=1).progress_apply(lambda x: MMP(x.stripped_smiles_x, x.stripped_smiles_y, strictness=4, correspondence=1).execute(), axis=1)

100%|██████████| 4920/4920 [42:07<00:00,  1.95it/s]  


In [6]:
df_pairs = pd.json_normalize(df_pairs.explode())
df_pairs.sample(3).transpose()

Unnamed: 0,12771,13889,12895
smiles1,CC(C)NCC(O)c1ccc(O)c(O)c1,COc1cc(C(O)CN)ccc1O,CC(C)NC[C@@H](O)c1ccc(O)c(O)c1
smiles2,CC(C)(C)NCC(O)c1ccc(O)c(NC(N)=O)c1,NCC(O)c1ccc(Br)c(O)c1,C1CNCCN1.COc1cc(C(O)CO)ccc1O.COc1cc(C(O)CO)ccc1O
percentmcs,0.684211,0.769231,0.28125
radius,1,3,1
valid,False,True,False
smirks,[#6:14](-[#6:12](-[#6:15](-[H])(-[H])-[H])(-[#...,[#6](-[#8:5]-[#6:6]1:[#6:1](:[#6:8]:[#6:4](:[#...,[#6](-[#6](-[#6](-[H])(-[H])-[H])(-[#7:1](-[#6...
fragment1,,[CH3][O][c]1[cH][cH][cH][cH][c]1[OH],
fragment2,,[OH][c]1[cH][cH][cH][cH][c]1[Br],


### Drop failures and write output to file

In [7]:
df_pairs = df_pairs[df_pairs.valid]
df_pairs.to_csv('beta2_agonists_pairs.csv', index=False)