In [1]:
!pip install plotly



In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn import metrics
import plotly.graph_objects as go

In [3]:
metrics_df = pd.read_csv('OrigFiles/ProteinAndPathologyQuantifications.csv')
donor_df = pd.read_csv('OrigFiles/DonorInformation.csv')  
metrics_df.head()

Unnamed: 0,donor_id,donor_name,structure_id,structure_acronym,ihc_a_syn,ihc_tau2_ffpe,ihc_at8_ffpe,ihc_at8,ihc_ptdp_43_ffpe,ihc_a_beta_ffpe,...,il_4_pg_per_mg,rantes_pg_per_mg,ab40_pg_per_mg,a_syn_pg_per_mg,ifn_g_pg_per_mg,mcp_1_pg_per_mg,bdnf_pg_per_mg,mip_1a_pg_per_mg,il_7_pg_per_mg,ab42_pg_per_mg
0,309335467,H14.09.030,10557,FWM,7.8e-05,0.002358,0.001137,0.00011,0.001259,0.008335,...,2.68,15.99,0.7361,0.122288,1.47,20.78,5.03736,9.38,11.78,523.292251
1,309335480,H14.09.043,10208,PCx,6.3e-05,0.002762,0.001272,0.000164,0.002354,0.005047,...,3.02,24.04,0.669094,0.111962,1.44,40.4,4.95462,8.1,45.02,81.493875
2,309335493,H14.09.056,10557,FWM,6.4e-05,0.003468,0.013787,0.016023,0.001708,0.007365,...,0.8,129.8,0.7361,0.091084,0.54,46.88,5.88409,27.0,15.82,470.734514
3,326765668,H14.09.081,10557,FWM,4.9e-05,0.003035,0.001707,0.000137,0.001729,0.004046,...,1.3,15.16,8.995575,0.054076,0.76,10.34,16.13524,8.06,24.22,568.368571
4,326765668,H14.09.081,10235,TCx,8e-05,0.002088,0.004489,6.2e-05,0.001513,0.015809,...,0.0,17.9,95.931,0.03,1.22,21.38,0.03,10.52,23.72,438.863263


In [5]:
metrics_donor_df = pd.merge(metrics_df, donor_df,  how='left', left_on='donor_name', right_on = 'name')
metrics_donor_df.head()

Unnamed: 0,donor_id_x,donor_name,structure_id,structure_acronym,ihc_a_syn,ihc_tau2_ffpe,ihc_at8_ffpe,ihc_at8,ihc_ptdp_43_ffpe,ihc_a_beta_ffpe,...,num_tbi_w_loc,dsm_iv_clinical_diagnosis,control_set,nincds_arda_diagnosis,ever_tbi_w_loc,race,hispanic,act_demented,braak,nia_reagan
0,309335467,H14.09.030,10557,FWM,7.8e-05,0.002358,0.001137,0.00011,0.001259,0.008335,...,0,No Dementia,3,No Dementia,N,White,Not Hispanic,No Dementia,3,2
1,309335480,H14.09.043,10208,PCx,6.3e-05,0.002762,0.001272,0.000164,0.002354,0.005047,...,1,Multiple Etiologies,9,Possible Alzheimer'S Disease,Y,White,Not Hispanic,Dementia,0,0
2,309335493,H14.09.056,10557,FWM,6.4e-05,0.003468,0.013787,0.016023,0.001708,0.007365,...,0,Alzheimer's Disease Type,18,Possible Alzheimer'S Disease,N,White,Not Hispanic,Dementia,5,3
3,326765668,H14.09.081,10557,FWM,4.9e-05,0.003035,0.001707,0.000137,0.001729,0.004046,...,1,No Dementia,35,No Dementia,Y,White,Not Hispanic,No Dementia,3,2
4,326765668,H14.09.081,10235,TCx,8e-05,0.002088,0.004489,6.2e-05,0.001513,0.015809,...,1,No Dementia,35,No Dementia,Y,White,Not Hispanic,No Dementia,3,2


In [6]:
scaler = MinMaxScaler()
scaled_at8= scaler.fit_transform(metrics_donor_df['ihc_at8_ffpe'].to_frame())
result_at8 = pd.cut(scaled_at8.ravel(), bins=[0.0, 0.2, 0.4, 0.6, 0.8, 1.00], right=True, labels=False).astype(int)+1
scaled_abeta= scaler.fit_transform(metrics_donor_df['ihc_a_beta_ffpe'].to_frame())
result_abeta = pd.cut(scaled_abeta.ravel(), bins=[0.0, 0.2, 0.4, 0.6, 0.8, 1.00], right=True, labels=False).astype(int)+1

In [9]:
source_df = metrics_donor_df[['cerad',  'act_demented']]
source_df['a_beta'] = result_abeta
source_df['at8'] = result_at8
source_df['act_demented']=  source_df['act_demented'].map({'Dementia': 1, 'No Dementia': 0})
source_df = source_df[(source_df['a_beta']>0) & (source_df['at8']>0)]
source_df.head()
#print(source_df.shape)

Unnamed: 0,cerad,act_demented,a_beta,at8
0,3,0,1,1
1,0,1,1,1
2,3,1,1,2
3,2,0,1,1
4,2,0,2,1


In [10]:
cerad_abeta = source_df.groupby(['cerad', 'a_beta'])['act_demented'].aggregate(np.ma.count)
cerad_abeta_df =pd.DataFrame(cerad_abeta)
cerad_abeta_df.reset_index(inplace=True)
cerad_abeta_df['a_beta'] = cerad_abeta_df['a_beta']+3

In [11]:
source_list = list(cerad_abeta_df['cerad'])
print(source_list)
target_list = list(cerad_abeta_df['a_beta'])
print(target_list)
value_list =list(cerad_abeta_df['act_demented'])
print(value_list)

[0, 0, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3]
[4, 5, 4, 5, 6, 8, 4, 5, 6, 4, 5, 6, 7]
[68, 2, 85, 13, 2, 1, 59, 24, 6, 45, 32, 12, 3]


In [13]:
abeta_dementia = source_df.groupby(['a_beta', 'act_demented'])['cerad'].aggregate(np.ma.count)
abeta_dementia_df =pd.DataFrame(abeta_dementia)
abeta_dementia_df.reset_index(inplace=True)
abeta_dementia_df['a_beta'] = abeta_dementia_df['a_beta'] + 3 
abeta_dementia_df['act_demented'] = abeta_dementia_df['act_demented'] + 9
abeta_dementia_df.head()

Unnamed: 0,a_beta,act_demented,cerad
0,4,9,142
1,4,10,115
2,5,9,32
3,5,10,39
4,6,9,7


In [14]:
source_list =source_list + list(abeta_dementia_df['a_beta'])
print(source_list)
target_list = target_list+ list(abeta_dementia_df['act_demented'])
print(target_list)
value_list = value_list+ list(abeta_dementia_df['cerad'])
print(value_list)

[0, 0, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8]
[4, 5, 4, 5, 6, 8, 4, 5, 6, 4, 5, 6, 7, 9, 10, 9, 10, 9, 10, 10, 9]
[68, 2, 85, 13, 2, 1, 59, 24, 6, 45, 32, 12, 3, 142, 115, 32, 39, 7, 13, 3, 1]


In [15]:
nodecolor_list= ['rgba(31, 119, 180, 0.8)', 'rgba(255, 127, 14, 0.8)', 'rgba(44, 160, 44, 0.8)', 'rgba(214, 39, 40, 0.8)', 
                 'rgba(148, 103, 189, 0.8)', 'rgba(140, 86, 75, 0.8)', 'rgba(227, 119, 194, 0.8)', 'rgba(127, 127, 127, 0.8)',
                 'rgba(188, 189, 34, 0.8)', 'rgba(23, 190, 207, 0.8)', 'rgba(31, 119, 180, 0.8)', 'rgba(255, 127, 14, 0.8)', 
                 'rgba(44, 160, 44, 0.8)', 'rgba(214, 39, 40, 0.8)', 'rgba(148, 103, 189, 0.8)']
opacity = 0.4
linkcolor_list = [nodecolor_list[src].replace("0.8", str(opacity)) for src in source_list]

In [18]:
fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = ['0', '1','2','3', '1','2','3', '4', '5', 'No Dementia', 'Dementia'],
      x = [0.001, 0.001,0.001, 0.001, 0.5, 0.5, 0.5, 0.5, 0.5, 0.999, 0.999 ],
      y=[0.99, 0.75, 0.45, 0.05, 0.90, 0.45, 0.25, 0.1, 0.05,  0.85, 0.2],
      color = nodecolor_list
    ),
    link = dict(
      source = source_list, 
      target = target_list,
      value = value_list,
      color = linkcolor_list  
  ))])

fig.update_layout(title_text="Sankey Diagram: CERAD SCore, Aβ protein and Dementia Status", font_size=10)
fig.show()
