In [1]:
################################################################################
# find structural similarity for antidepressants present in LINCS data using
# the Tanimoto algorithm

# author: Ximena Fernandez
# mail:   xfdzciencias@gmail.com

################################################################################

In [23]:
################################################################################
# import modules 
################################################################################

import pandas as pd
import numpy as np
import pubchempy as pcp
from rdkit import Chem
from rdkit.Chem import AllChem, DataStructs
import altair as alt

In [13]:
################################################################################
# data extraction
################################################################################

## read network construction file with antidepressants (ad) 'Target' column and convert to df
df = pd.read_csv('/Users/ximenafernandezsanchez/Documents/Neurociencias/9no_semestre/PP III/Shared/filtered_long_format_with_symbols.csv')

## extract unique ad values and create a new df
ad = pd.DataFrame(df['Target'].unique(), columns=['ad'])

print(ad)

                 ad
0       alaproclate
1     amitriptyline
2         amoxapine
3        bifemelane
4         bupropion
5      clomipramine
6       desipramine
7        dibenzepin
8         dosulepin
9           doxepin
10       duloxetine
11     escitalopram
12       fluoxetine
13      fluvoxamine
14    isocarboxazid
15      maprotiline
16        mianserin
17      mirtazapine
18       nefazodone
19    nortriptyline
20       paroxetine
21    protriptyline
22       reboxetine
23       sertraline
24  tranylcypromine
25        trazodone
26     trimipramine
27      venlafaxine


In [14]:
################################################################################
# data processing
################################################################################

## define a function to obtain the canonical SMILES structure for each ad

def smiles(ad):
    ### finds the compound in the PubChem database using the name as key
    drugs = pcp.get_compounds(ad, 'name')
    ### extracts the canonical SMILES for such compound
    smiles = drugs[0].canonical_smiles
    ### converts the canonical SMILES to a mol object with rdkit
    mol = Chem.MolFromSmiles(smiles)
    ### returns the canonical SMILES considering stereochemistry
    return Chem.MolToSmiles(mol, isomericSmiles=True)

## apply function to add canonical SMILES to ads
ad['canonical_SMILES_stereo'] = ad['ad'].apply(smiles)

print(ad)

                 ad                            canonical_SMILES_stereo
0       alaproclate                    CC(N)C(=O)OC(C)(C)Cc1ccc(Cl)cc1
1     amitriptyline                     CN(C)CCC=C1c2ccccc2CCc2ccccc21
2         amoxapine               Clc1ccc2c(c1)C(N1CCNCC1)=Nc1ccccc1O2
3        bifemelane                           CNCCCCOc1ccccc1Cc1ccccc1
4         bupropion                     CC(NC(C)(C)C)C(=O)c1cccc(Cl)c1
5      clomipramine                  CN(C)CCCN1c2ccccc2CCc2ccc(Cl)cc21
6       desipramine                         CNCCCN1c2ccccc2CCc2ccccc21
7        dibenzepin                CN(C)CCN1C(=O)c2ccccc2N(C)c2ccccc21
8         dosulepin                     CN(C)CCC=C1c2ccccc2CSc2ccccc21
9           doxepin                     CN(C)CCC=C1c2ccccc2COc2ccccc21
10       duloxetine                      CNCCC(Oc1cccc2ccccc12)c1cccs1
11     escitalopram            CN(C)CCCC1(c2ccc(F)cc2)OCc2cc(C#N)ccc21
12       fluoxetine                 CNCCC(Oc1ccc(C(F)(F)F)cc1)c1ccccc1
13    

In [22]:
################################################################################
# Tanimoto similarity calculation
################################################################################

## create fingerprints (binary values) for each molecular structure 
fingerprints = ad['canonical_SMILES_stereo'].apply(lambda x: AllChem.GetMorganFingerprintAsBitVect(Chem.MolFromSmiles(x), 2))

## creates a Tanimoto similarity matrix
tanimoto_similarities = np.array([DataStructs.BulkTanimotoSimilarity(fp, fingerprints) for fp in fingerprints])

## converts np matrix to a pd df
tanimoto_df = pd.DataFrame(tanimoto_similarities, index=ad['ad'], columns=ad['ad'])
tanimoto_df.index.name = None

print(tanimoto_df)

ad               alaproclate  amitriptyline  amoxapine  bifemelane  bupropion  \
alaproclate         1.000000       0.087719   0.106061    0.122807   0.274510   
amitriptyline       0.087719       1.000000   0.112903    0.132075   0.089286   
amoxapine           0.106061       0.112903   1.000000    0.075758   0.142857   
bifemelane          0.122807       0.132075   0.075758    1.000000   0.125000   
bupropion           0.274510       0.089286   0.142857    0.125000   1.000000   
clomipramine        0.145161       0.340000   0.218750    0.131148   0.166667   
desipramine         0.087719       0.260870   0.131148    0.304348   0.109091   
dibenzepin          0.101695       0.297872   0.125000    0.125000   0.103448   
dosulepin           0.078125       0.666667   0.101449    0.116667   0.079365   
doxepin             0.079365       0.684211   0.153846    0.118644   0.080645   
duloxetine          0.106061       0.112903   0.081081    0.267857   0.125000   
escitalopram        0.066667

In [27]:
################################################################################
# data visualization
################################################################################

## converts Tanimoto df to long format to plot heatmap
tanimoto_long_df = tanimoto_df.reset_index().melt(id_vars='index')
tanimoto_long_df.columns = ['node_1', 'node_2', 'Tan']

## limits Tanimoto similarity values to 3 decimals for visualization comprehensive purposes
tanimoto_long_df['Tan'] = tanimoto_long_df['Tan'].round(3)

## creates and displays a heatmap object
base = alt.Chart(tanimoto_long_df).encode(
    alt.X('node_1:O', title=None),
    alt.Y('node_2:O', title=None)
)

heatmap = base.mark_rect().encode(
    alt.Color('Tan:Q', scale=alt.Scale(scheme='viridis'), legend=alt.Legend(title=None)),
    tooltip=['node_1:N', 'node_2:N', 'Tan:Q']
).properties(
    title='Tanimoto similarity of antidepressants based on canonical SMILES with stereochemistry',
    width=1000,
    height=900
)

text = base.mark_text(baseline='middle').encode(
    text='Tan:Q',
    color=alt.condition(
        alt.datum.Tan > 0.5,
        alt.value('black'),
        alt.value('white')
    )
)

heatmap + text