## Beta-2 adrenergic receptor - part 2 (MMP identification)

### Import libraries

In [1]:
import pandas as pd

In [2]:
from pandarallel import pandarallel
pandarallel.initialize(progress_bar=True)

import sys
sys.path.append('/home/daniel/wizepair2')
from classes.mmp import MMP

INFO: Pandarallel will run on 6 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


### Read in the dataset

In [3]:
df = pd.read_csv('beta2_agonists_stripped.csv')
len(df.index)

1223

### Create cartesian product of unique molecules tested in the same assay

In [4]:
df_pairs = pd.merge(df, df, on=['target_pref_name', 'standard_type'])
df_pairs = df_pairs[['stripped_smiles_x', 'stripped_smiles_y']].drop_duplicates()
df_pairs

Unnamed: 0,stripped_smiles_x,stripped_smiles_y
0,COc1ccccc1OCCNC[C@@H](O)c1ccc(O)c(O)c1,COc1ccccc1OCCNC[C@@H](O)c1ccc(O)c(O)c1
1,COc1ccccc1OCCNC[C@@H](O)c1ccc(O)c(O)c1,CNC[C@H](O)c1ccc(O)c(O)c1F
2,COc1ccccc1OCCNC[C@@H](O)c1ccc(O)c(O)c1,CNCC(O)c1ccc(O)c2nc(O)sc12
3,COc1ccccc1OCCNC[C@@H](O)c1ccc(O)c(O)c1,CNCC(O)c1cc(O)c(O)cc1F
4,COc1ccccc1OCCNC[C@@H](O)c1ccc(O)c(O)c1,CC(C)NCC(O)c1ccc(Cl)c(Cl)c1
...,...,...
31395,CCC(c1ccc(O)c(O)c1)C(CC)c1ccc(O)c(O)c1,CCC(c1ccc(O)c(O)c1)C(CC)c1ccc(O)c(O)c1
31434,CC(C)NC[C@@H](O)c1ccc(O)c(O)c1,CC(C)NC[C@H](O)c1ccc2ccccc2c1
31439,CC(C)NC[C@H](O)c1ccc2ccccc2c1,CC(C)NC[C@@H](O)c1ccc(O)c(O)c1
31442,CC(C)NC[C@H](O)c1ccc2ccccc2c1,CNCC(O)c1ccc(O)c(O)c1


### Identify all pairs

In [5]:
df_pairs = df_pairs.sample(frac=1).parallel_apply(lambda x: MMP(x.stripped_smiles_x, x.stripped_smiles_y, strictness=5).execute(), axis=1)

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=268), Label(value='0 / 268'))), HB…

In [6]:
df_pairs = pd.json_normalize(df_pairs.explode())
df_pairs.sample(3).transpose()

Unnamed: 0,1558,4190,258
smiles1,CCC(N)C(O)c1ccc(O)c(O)c1,C[C@H](N)[C@H](O)c1ccc(O)c(O)c1,CC(C)(C)NCC(O)c1ccc(O)c(C(N)=O)c1
smiles2,CNC[C@H](O)c1ccc(O)cc1,COc1cc([C@H](O)CO)ccc1O,CC(C)NCC(O)c1ccc(O)c(O)c1
percentmcs,0.571429,0.6,0.722222
radius,2.0,4.0,3.0
valid,True,True,True
solversecs,0.015005,0.016144,0.04146
embedding,"[31, 17, 11, 6, 18, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[11, 19, 21, 12, 12, 5, 0, 0, 0, 0, 0, 0, 0, 0...","[15, 16, 55, 14, 7, 25, 5, 0, 0, 0, 0, 0, 0, 0..."
predsolversecs,0.153484,0.161966,0.30672
error,,,
smirks,[H]-[#8:1]-[#6:2]1:[#6:3](-[H]):[#6:4](-[H]):[...,[H]-[#8:1]-[#6@:2](-[#6:3]1:[#6:4](-[H]):[#6:5...,[H]-[#6:1]:[#6:2](-[#8:3]-[H]):[#6:4](-[#6](=[...


### Drop failures and write output to file

In [7]:
df_pairs = df_pairs[df_pairs.valid]
df_pairs.to_csv('beta2_agonists_pairs.csv', index=False)