## Beta-2 adrenergic receptor - part 2 (MMP identification)

### Import libraries

In [1]:
import pandas as pd

from wizepair2.mmp import MMP

In [2]:
from pandarallel import pandarallel
pandarallel.initialize(progress_bar=True)

INFO: Pandarallel will run on 10 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.


### Read in the dataset

In [3]:
df = pd.read_csv('beta2_agonists_stripped.csv')
len(df.index)

1302

### Create cartesian product of unique molecules tested in the same assay

In [4]:
df_pairs = pd.merge(df, df, on=['target_pref_name', 'standard_type'])
df_pairs = df_pairs[['stripped_smiles_x', 'stripped_smiles_y']].drop_duplicates()
df_pairs

Unnamed: 0,stripped_smiles_x,stripped_smiles_y
0,CNCC(O)c1ccc(O)c(O)c1F,CNCC(O)c1ccc(O)c(O)c1F
1,CNCC(O)c1ccc(O)c(O)c1F,CNC[C@H](O)c1cc(O)c(O)cc1F
2,CNCC(O)c1ccc(O)c(O)c1F,CN[C@H](C)[C@@H](O)c1ccc(O)c(O)c1
3,CNCC(O)c1ccc(O)c(O)c1F,CNC(C)C(O)c1ccc(O)c(O)c1
4,CNCC(O)c1ccc(O)c(O)c1F,NC(O)c1ccc(O)c(O)c1
...,...,...
30681,CCC(c1ccc(O)c(O)c1)C(CC)c1ccc(O)c(O)c1,CCC(c1ccc(O)c(O)c1)C(CC)c1ccc(O)c(O)c1
30720,CC(C)NC[C@@H](O)c1ccc(O)c(O)c1,CC(C)NC[C@H](O)c1ccc2ccccc2c1
30725,CC(C)NC[C@H](O)c1ccc2ccccc2c1,CC(C)NC[C@@H](O)c1ccc(O)c(O)c1
30728,CC(C)NC[C@H](O)c1ccc2ccccc2c1,CNCC(O)c1ccc(O)c(O)c1


### Identify all pairs

In [5]:
df_pairs = df_pairs.sample(frac=1).parallel_apply(lambda x: MMP(x.stripped_smiles_x, x.stripped_smiles_y, strictness=7).execute(), axis=1)

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=161), Label(value='0 / 161'))), HB…

In [6]:
df_pairs = pd.json_normalize(df_pairs.explode())
df_pairs.sample(3).transpose()

Unnamed: 0,6039,5505,1410
smiles1,COc1ccccc1OCCNC[C@@H](O)c1ccc(O)c(O)c1,CNC[C@@H](O)c1ccc(O)c(O)c1F,C[C@H](N)[C@H](O)c1ccc(O)c(O)c1
smiles2,COc1ccccc1OCCNC[C@H](O)c1ccc(O)c(O)c1,CNCC(O)c1cc(F)c(O)c(O)c1F,CNCC(O)c1cc(O)c(O)cc1F
percentmcs,0.958333,0.8,0.533333
radius,3.0,4.0,4.0
valid,True,True,True
solversecs,0.190893,0.021462,0.018631
embedding,"[0, 0, 2, 36, 26, 114, 31, 68, 7, 0, 0, 0, 0, ...","[1, 22, 26, 20, 24, 9, 0, 0, 0, 0, 0, 0, 0, 0,...","[2, 37, 16, 18, 18, 5, 0, 0, 0, 0, 0, 0, 0, 0,..."
predsolversecs,0.635,0.111,0.101
error,,,
smirks,[H]-[#6:1]:[#6:2](-[H]):[#6:3](-[#6@:4](-[#8:5...,[H]-[#8:1]-[#6:2]1:[#6:3](-[H]):[#6:4](-[H]):[...,[H]-[#8:1]-[#6:2]1:[#6:3](-[H]):[#6:4](-[H]):[...


### Drop failures and write output to file

In [7]:
df_pairs = df_pairs[df_pairs.valid]
df_pairs.to_csv('beta2_agonists_pairs.csv', index=False)