## Beta-2 adrenergic receptor - part 2 (MMP identification)

### Import libraries

In [1]:
import pandas as pd

In [2]:
from tqdm import tqdm
tqdm.pandas()

import sys
sys.path.append('/home/daniel/wizepair2')
from mmpa.mmp import MMP

### Read in the dataset

In [3]:
df = pd.read_csv('beta2_agonists_stripped.csv')
len(df.index)

7603

### Create cartesian product of unique molecules tested in the same assay

In [4]:
df_pairs = pd.merge(df, df, on='assay_chembl_id')
df_pairs = df_pairs[['stripped_smiles_x', 'stripped_smiles_y']].drop_duplicates()
df_pairs

Unnamed: 0,stripped_smiles_x,stripped_smiles_y
0,COc1cc([C@H](O)CO)ccc1O,COc1cc([C@H](O)CO)ccc1O
1,COc1cc([C@H](O)CO)ccc1O,CC(C)(C)NC[C@H](O)c1ccc(O)c(CO)c1
2,COc1cc([C@H](O)CO)ccc1O,CC[C@H](NC(C)C)[C@H](O)c1ccc(O)c(O)c1
3,COc1cc([C@H](O)CO)ccc1O,CC(C)NC[C@H](O)c1ccc2ccccc2c1
5,COc1cc([C@H](O)CO)ccc1O,NC[C@H](O)c1ccc(O)c(O)c1
...,...,...
30329,COc1cccc(CC(C)NCC(O)c2ccc(O)c(O)c2)c1,CC(Cc1ccc(O)cc1)NCC(O)c1ccc(O)c(O)c1.O=C(O)c1c...
30330,CC(Cc1ccc(O)cc1)NCC(O)c1ccc(O)c(O)c1.O=C(O)c1c...,CNCC(SC)c1ccc(O)c(O)c1
30331,CC(Cc1ccc(O)cc1)NCC(O)c1ccc(O)c(O)c1.O=C(O)c1c...,COc1cccc(CC(C)NCC(O)c2ccc(O)c(O)c2)c1
30850,CC(C)NCC(O)c1ccc(O)c(NS(C)(=O)=O)c1,CC(C)NCC(O)c1ccc(Cl)c(Cl)c1


### Identify all pairs

In [5]:
df_pairs = df_pairs.progress_apply(lambda x: MMP(x.stripped_smiles_x, x.stripped_smiles_y, fuzziness=5).execute(), axis=1)
df_pairs = pd.json_normalize(df_pairs)
df_pairs.sample(3).transpose()

100%|██████████| 3565/3565 [20:00<00:00,  2.97it/s]  


Unnamed: 0,3527,2389,1813
smiles1,CNC[C@H](O)c1ccc(OC(=O)C(C)(C)C)c(OC(=O)C(C)(C...,CCC(NC(C)C)C(O)c1ccc(O)c(O)c1,CCNC[C@H](O)c1ccc(O)c(O)c1
smiles2,CNCC(O)c1ccc(O)c(O)c1,CC(C)(C)NC[C@H](O)c1ccc(O)c(CO)c1,CC[C@H](NC(C)C)[C@H](O)c1ccc(O)c(O)c1
fragment1,[H][C@]([OH])([CH2][NH][CH3])[c]1[cH][cH][c]([...,[CH3][CH2][CH]([NH][CH]([CH3])[CH3])[CH]([OH])...,[cH2][cH][c]([cH][cH2])[C]([OH])[CH2][NH][CH2]...
fragment2,[CH3][NH][CH2][CH]([OH])[c]1[cH][cH][c]([OH])[...,[H][C@]([OH])([CH2][NH][C]([CH3])([CH3])[CH3])...,[H][C@@]([CH2][CH3])([NH][CH]([CH3])[CH3])[C](...
percentmcs,0.384615,0.611111,0.684211
smirks,[#6:12](-[#7:11](-[#6:10](-[#6@:6](-[#8:13]-[H...,[#6](-[#6](-[#6:6](-[#7:7](-[#6:8](-[#6:10](-[...,[#6:15](-[#6:13](-[#7:14](-[#6:10](-[#6@:5](-[...


### Drop failures and write output to file

In [6]:
df_pairs = df_pairs[~pd.isna(df_pairs.smirks)]
df_pairs.to_csv('beta2_agonists_pairs.csv', index=False)