## Beta-2 adrenergic receptor - part 2 (MMP identification)

### Import libraries

In [1]:
import pandas as pd

In [2]:
from pandarallel import pandarallel
pandarallel.initialize(progress_bar=True)

import sys
sys.path.append('/home/daniel/wizepair2')
from mmpa.mmp import MMP

INFO: Pandarallel will run on 12 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


### Read in the dataset

In [3]:
df = pd.read_csv('beta2_agonists_stripped.csv')
len(df.index)

1223

### Create cartesian product of unique molecules tested in the same assay

In [4]:
df_pairs = pd.merge(df, df, on=['target_pref_name', 'standard_type'])
df_pairs = df_pairs[['stripped_smiles_x', 'stripped_smiles_y']].drop_duplicates()
df_pairs

Unnamed: 0,stripped_smiles_x,stripped_smiles_y
0,COc1ccccc1OCCNC[C@@H](O)c1ccc(O)c(O)c1,COc1ccccc1OCCNC[C@@H](O)c1ccc(O)c(O)c1
1,COc1ccccc1OCCNC[C@@H](O)c1ccc(O)c(O)c1,CNC[C@H](O)c1ccc(O)c(O)c1F
2,COc1ccccc1OCCNC[C@@H](O)c1ccc(O)c(O)c1,CNCC(O)c1ccc(O)c2nc(O)sc12
3,COc1ccccc1OCCNC[C@@H](O)c1ccc(O)c(O)c1,CNCC(O)c1cc(O)c(O)cc1F
4,COc1ccccc1OCCNC[C@@H](O)c1ccc(O)c(O)c1,CC(C)NCC(O)c1ccc(Cl)c(Cl)c1
...,...,...
31395,CCC(c1ccc(O)c(O)c1)C(CC)c1ccc(O)c(O)c1,CCC(c1ccc(O)c(O)c1)C(CC)c1ccc(O)c(O)c1
31434,CC(C)NC[C@@H](O)c1ccc(O)c(O)c1,CC(C)NC[C@H](O)c1ccc2ccccc2c1
31439,CC(C)NC[C@H](O)c1ccc2ccccc2c1,CC(C)NC[C@@H](O)c1ccc(O)c(O)c1
31442,CC(C)NC[C@H](O)c1ccc2ccccc2c1,CNCC(O)c1ccc(O)c(O)c1


### Identify all pairs

In [5]:
df_pairs = df_pairs.sample(frac=1).parallel_apply(lambda x: MMP(x.stripped_smiles_x, x.stripped_smiles_y, strictness=5, correspondence=2).execute(), axis=1)

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=134), Label(value='0 / 134'))), HB…

In [6]:
df_pairs = pd.json_normalize(df_pairs.explode())
df_pairs.sample(3).transpose()

Unnamed: 0,502,1711,3611
smiles1,CNC[C@@H](O)c1ccc(O)c(O)c1F,CC(C)NCC(O)c1ccc(O)c(NS(C)(=O)=O)c1,C[C@@H](N)[C@@H](O)c1ccc(O)c(O)c1
smiles2,CNCC(O)c1c(F)ccc(O)c1F,CCC(C)(C)NCC(O)c1ccc(O)c(NS(C)(=O)=O)c1,C1CNCCN1.COc1cc(C(O)CO)ccc1O
percentmcs,0.733333,0.857143,0.421053
radius,2,1,1
valid,True,True,False
smirks,[#7:10](-[#6:6](-[#6@@](-[#8:9]-[H])(-[#6:5](:...,[#6:7](-[#6:2](-[#6:6](-[H])(-[H])-[H])(-[#7:5...,[#6](-[#6@@](-[#7:3](-[H])-[H])(-[#6@@](-[#8:4...
fragment1,[H][C@@]([OH])([CH2][NH2])[c]([cH2])[c]([F])[c...,[CH3][CH]([CH3])[NH2],
fragment2,[cH2][c]([F])[c]([c]([cH2])[F])[CH]([OH])[CH2]...,[CH3][CH2][C]([CH3])([CH3])[NH2],


### Drop failures and write output to file

In [7]:
df_pairs = df_pairs[df_pairs.valid]
df_pairs.to_csv('beta2_agonists_pairs.csv', index=False)