In [34]:
################################################################################
# find functional similarity (according to adverse effects) for antidepressants present in LINCS data using
# the Jaccard algorithm

# author: Ximena Fernandez
# mail:   xfdzciencias@gmail.com

################################################################################

In [35]:
################################################################################
# import modules 
################################################################################

import pandas as pd
import numpy as np
import altair as alt

In [36]:
################################################################################
# data extraction
################################################################################

## read network construction file with antidepressants (ad) 'Target' column and convert to df
df = pd.read_csv('/Users/ximenafernandezsanchez/Documents/Neurociencias/9no_semestre/PP III/Shared/filtered_long_format_with_symbols.csv')

## extract unique ad values and create a new df
ad = pd.DataFrame(df['Target'].unique(), columns=['ad'])

print(ad)

                 ad
0       alaproclate
1     amitriptyline
2         amoxapine
3        bifemelane
4         bupropion
5      clomipramine
6       desipramine
7        dibenzepin
8         dosulepin
9           doxepin
10       duloxetine
11     escitalopram
12       fluoxetine
13      fluvoxamine
14    isocarboxazid
15      maprotiline
16        mianserin
17      mirtazapine
18       nefazodone
19    nortriptyline
20       paroxetine
21    protriptyline
22       reboxetine
23       sertraline
24  tranylcypromine
25        trazodone
26     trimipramine
27      venlafaxine


In [37]:
################################################################################
# data mapping
################################################################################

## read adverse effects file with antidepressants (ad) 'Drug' column and convert to df
df = pd.read_csv('/Users/ximenafernandezsanchez/Documents/Neurociencias/9no_semestre/PP III/Shared/AD-ADR FAERS PRR CORREGIDOS  - Hoja 1-2.csv')

## filter 'df' to keep only the rows where 'Drug' values are in 'ad'
df_exp = df[df['Drug'].isin(ad['ad'])]
###### (this is unneeded for this particular dataset, but we keep this filtering step for compatibility reasons)

## filter 'df_exp' to keep only the rows where the adverse effect is significantly associated with an ad (according to PRR)
filtered_df_exp = df_exp[df_exp['PRR sin infinitos'] > 1]

## reset the filtered dataframe index
filtered_df_exp.reset_index(drop=True, inplace=True)

print(filtered_df_exp)

                Drug                                   pt  PRR sin infinitos
0      amitriptyline  5-hydroxyindolacetic acid increased          22.923896
1      amitriptyline                               Abasia           1.023523
2      amitriptyline                  Abdominal adhesions           1.821472
3      amitriptyline       Abdominal compartment syndrome           1.910325
4      amitriptyline                     Abdominal hernia           1.509392
...              ...                                  ...                ...
33321    venlafaxine                           Xanthopsia           5.877377
33322    venlafaxine                        Xerophthalmia           3.526426
33323    venlafaxine                              Xerosis       12029.689500
33324    venlafaxine                       X-ray abnormal           2.938688
33325    venlafaxine                 Yellow nail syndrome           4.408033

[33326 rows x 3 columns]


In [38]:
################################################################################
# Jaccard similarity calculation
################################################################################

## create a binary representation of adverse effects for each antidepressant (rows: ad, columns: adverse effects)
adjacency_matrix = pd.crosstab(filtered_df_exp['Drug'], filtered_df_exp['pt'])

## creates a Jaccard similarity matrix (vectorized calculation according to standardized algorithm)
### creates a matrix that reflects the number of adverse effects shared per antidepressant pair (rows: ad, columns: ad)
intersection = adjacency_matrix.dot(adjacency_matrix.T)

### creates a matrix that reflects the number of unique adverse effects per antidepressant pair (rows: ad, columns: ad)
union = adjacency_matrix.sum(axis=1).values[:, None] + adjacency_matrix.sum(axis=1).values - intersection

### calculates the Jaccard similarity index per antidepressant pair (rows: ad, columns: ad)
jaccard_matrix_values = intersection / union

## fills the main diagonal with value = 1 (Jaccard similarity for each ad compared to itself)
np.fill_diagonal(jaccard_matrix_values.values, 1)

## creates a df object with the Jaccard similarity matrix (to avoid index and columns having the same name, rename the columns explicitly)
jaccard_effects_matrix = pd.DataFrame(jaccard_matrix_values, index=adjacency_matrix.index, columns=adjacency_matrix.index.copy())
jaccard_effects_matrix.index.name = 'node_2'
jaccard_effects_matrix.columns.name = 'node_1'

## export Jaccard similarity matrix to a csv file for further analysis
jaccard_effects_matrix.to_csv('jaccard_effects_matrix.csv')

print(jaccard_effects_matrix)

node_1           amitriptyline  amoxapine  bupropion  clomipramine  \
node_2                                                               
amitriptyline         1.000000   0.021021   0.150391      0.082727   
amoxapine             0.021021   1.000000   0.017924      0.070529   
bupropion             0.150391   0.017924   1.000000      0.065670   
clomipramine          0.082727   0.070529   0.065670      1.000000   
desipramine           0.046259   0.089862   0.056831      0.084656   
dibenzepin            0.001553   0.019355   0.000774      0.007092   
doxepin               0.122164   0.044855   0.136392      0.104762   
duloxetine            0.175381   0.020948   0.183651      0.053812   
escitalopram          0.186292   0.020952   0.208152      0.084862   
fluoxetine            0.177193   0.023628   0.194753      0.095178   
fluvoxamine           0.076157   0.075406   0.090702      0.171949   
isocarboxazid         0.002314   0.041176   0.005015      0.012500   
maprotiline         

In [39]:
################################################################################
# data visualization
################################################################################

## converts Jaccard df to long format to plot heatmap
jaccard_long_df = jaccard_effects_matrix.reset_index().melt(id_vars='node_2')
jaccard_long_df.columns = ['node_1', 'node_2', 'Jaccard']

## limits Jaccard similarity values to 3 decimals for visualization comprehensive purposes
jaccard_long_df['Jaccard'] = jaccard_long_df['Jaccard'].round(3)

## creates and displays a heatmap object
base = alt.Chart(jaccard_long_df).encode(
    alt.X('node_1:O', title=None),
    alt.Y('node_2:O', title=None)
)

heatmap = base.mark_rect().encode(
    alt.Color('Jaccard:Q', scale=alt.Scale(scheme='viridis'), legend=alt.Legend(title=None)),
    tooltip=['node_1:N', 'node_2:N', 'Jaccard:Q']
).properties(
    title='Jaccard similarity of antidepressants based on adverse effects',
    width=1000,
    height=900
)

text = base.mark_text(baseline='middle').encode(
    text='Jaccard:Q',
    color=alt.condition(
        alt.datum.Jaccard > 0.5,
        alt.value('black'),
        alt.value('white')
    )
)

heatmap + text