## Beta-2 adrenergic receptor - part 2 (MMP identification)

### Import libraries

In [1]:
import pandas as pd

In [2]:
from pandarallel import pandarallel
pandarallel.initialize(progress_bar=True)

import sys
sys.path.append('/home/daniel/wizepair2')
from classes.mmp import MMP

INFO: Pandarallel will run on 8 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


### Read in the dataset

In [3]:
df = pd.read_csv('beta2_agonists_stripped.csv')
len(df.index)

1223

### Create cartesian product of unique molecules tested in the same assay

In [4]:
df_pairs = pd.merge(df, df, on=['target_pref_name', 'standard_type'])
df_pairs = df_pairs[['stripped_smiles_x', 'stripped_smiles_y']].drop_duplicates()
df_pairs

Unnamed: 0,stripped_smiles_x,stripped_smiles_y
0,COc1ccccc1OCCNC[C@@H](O)c1ccc(O)c(O)c1,COc1ccccc1OCCNC[C@@H](O)c1ccc(O)c(O)c1
1,COc1ccccc1OCCNC[C@@H](O)c1ccc(O)c(O)c1,CNC[C@H](O)c1ccc(O)c(O)c1F
2,COc1ccccc1OCCNC[C@@H](O)c1ccc(O)c(O)c1,CNCC(O)c1ccc(O)c2nc(O)sc12
3,COc1ccccc1OCCNC[C@@H](O)c1ccc(O)c(O)c1,CNCC(O)c1cc(O)c(O)cc1F
4,COc1ccccc1OCCNC[C@@H](O)c1ccc(O)c(O)c1,CC(C)NCC(O)c1ccc(Cl)c(Cl)c1
...,...,...
31395,CCC(c1ccc(O)c(O)c1)C(CC)c1ccc(O)c(O)c1,CCC(c1ccc(O)c(O)c1)C(CC)c1ccc(O)c(O)c1
31434,CC(C)NC[C@@H](O)c1ccc(O)c(O)c1,CC(C)NC[C@H](O)c1ccc2ccccc2c1
31439,CC(C)NC[C@H](O)c1ccc2ccccc2c1,CC(C)NC[C@@H](O)c1ccc(O)c(O)c1
31442,CC(C)NC[C@H](O)c1ccc2ccccc2c1,CNCC(O)c1ccc(O)c(O)c1


### Identify all pairs

In [5]:
df_pairs = df_pairs.sample(frac=1).parallel_apply(lambda x: MMP(x.stripped_smiles_x, x.stripped_smiles_y, strictness=6).execute(), axis=1)

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=201), Label(value='0 / 201'))), HB…

In [6]:
df_pairs = pd.json_normalize(df_pairs.explode())
df_pairs.sample(3).transpose()

Unnamed: 0,5716,631,5836
smiles1,CC(C)NCC(c1ccc(O)c(O)c1)S(=O)(=O)O,CNCC(O)c1ccc(OC(=O)C(C)(C)C)c(OC(=O)C(C)(C)C)c1,CNCC(O)c1cc(O)c(O)cc1F
smiles2,CNC[C@H](O)c1ccc(O)c(O)c1,CNCC(O)c1ccc(O)c(O)c1,NC(O)c1ccc(O)c(O)c1
percentmcs,0.611111,0.44,0.642857
radius,2.0,1.0,3.0
valid,True,True,True
solversecs,0.017439,0.035065,0.007426
embedding,"[9, 18, 41, 12, 11, 17, 0, 0, 0, 0, 0, 0, 0, 0...","[16, 18, 38, 43, 9, 18, 11, 0, 0, 0, 0, 0, 0, ...","[24, 18, 10, 19, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
predsolversecs,0.125,0.182,0.076
error,,,
smirks,[H]-[#6:1]:[#6:2](:[#6:3]-[H])-[#6](-[H])(-[#6...,[H]-[#6](-[H])(-[H])-[#6](-[#6](=[#8])-[#8:1]-...,[H]-[#8:1]-[#6:2]1:[#6:3]:[#6:4](-[H]):[#6:5](...


### Drop failures and write output to file

In [7]:
df_pairs = df_pairs[df_pairs.valid]
df_pairs.to_csv('beta2_agonists_pairs.csv', index=False)