In [None]:
# Import modules

import sys
import pandas as pd
import numpy as np
import random
import pickle
import datetime
import matplotlib.pyplot as plt
%matplotlib inline
import ipywidgets as widgets
from ipywidgets import interact, interact_manual

# Standard plotly imports
import plotly.plotly as py
import plotly.graph_objs as go
from plotly import tools
from plotly.offline import iplot, init_notebook_mode

# Using plotly + cufflinks in offline mode
import cufflinks
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)

%run MyFunctions.ipynb

In [None]:
# Read donation data (relative time)
df = pd.read_pickle('X:/201902 Hb Trajectories/df_rel_full.pkl')
df = df.loc[df['Donatiesoortcode'] == 'V', ]

# Read donor data
df_donors = pd.read_pickle('X:/201902 Hb Trajectories/donors.pkl')

# Only take donors that have donated in 2010 or later, but not before (proxy for new donors)
old_donors = np.unique(list(df.loc[df['DateTime'].dt.year < 2010, 'KeyID']))
df = df.loc[~df['KeyID'].isin(old_donors), ]
df_donors = df_donors.loc[df_donors.index.isin(np.unique(df['KeyID'])), ]

# List of KeyIDs by sex
id_f = list(df_donors.loc[df_donors['Geslacht'] == 'V', ].index)
id_m = list(df_donors.loc[df_donors['Geslacht'] == 'M', ].index)

In [None]:
df_m = df.loc[df['KeyID'].isin(id_m), ]

In [None]:
df_m['Hb'].hist(bins=200)

In [None]:
df_f = df.loc[df['KeyID'].isin(id_f), ]
print(df_f.shape)
df_f.groupby('HbLowHigh').count()

In [None]:
df_m = df.loc[df['KeyID'].isin(id_m), ]
print(df_m.shape)
df_m.groupby('HbLowHigh').count()

In [None]:
n = 9
df_donor_count = df.groupby('KeyID').size()
topn = list(df_donor_count.sort_values(ascending=False)[:n].index)

plotIndividuals(df, topn)

In [None]:
@interact_manual
def plotAllTimeseries(data=df,
                     figsize_x=20,
                     figsize_y=16,
                     alpha=(0, 1, 0.01)):
    fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(figsize_x, figsize_y), sharex=True)

    for d_id in np.unique(df['KeyID']):
        df_sub = df.loc[df['KeyID'] == d_id, :]
        if d_id in id_f:
            ax1.plot(df_sub['TimeSinceFirst'], df_sub['Hb'], color='black', alpha=alpha)
        else:
            ax2.plot(df_sub['TimeSinceFirst'], df_sub['Hb'], color='black', alpha=alpha)
        ax1.axhline(y=7.8, color='red', linestyle='--')
        ax2.axhline(y=8.4, color='red', linestyle='--')

        fig.text(0.5, 0.04, 'Time since first measurement', ha='center', fontsize=12)
        fig.text(0.04, 0.5, 'Hb value', va='center', rotation='vertical', fontsize=12)
    
    plt.show()

In [None]:
df.head()

In [None]:
print(datetime.datetime.now())

fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(12, 8))

drawlines1 = []
drawlines2 = []

for d_id in np.unique(df['KeyID'])[:1000]:
    df_sub = df.loc[df['KeyID'] == d_id, :]
    if d_id in id_f:
        drawlines1.append(df_sub['TimeSinceFirst'])
        drawlines1.append(df_sub['Hb'])
#         ax1.plot(df_sub['TimeSinceFirst'] / pd.Timedelta('1Y'), df_sub['Hb'], color='black', alpha=0.05)
    else:
        drawlines2.append(df_sub['TimeSinceFirst'])
        drawlines2.append(df_sub['Hb'])
#         ax2.plot(df_sub['TimeSinceFirst'] / pd.Timedelta('1Y'), df_sub['Hb'], color='black', alpha=0.05)

ax1.plot(*drawlines1, color='black', alpha=0.1)
ax2.plot(*drawlines2, color='black', alpha=0.1)
        
ax1.axhline(y=7.8, color='red', linestyle=':', linewidth=1)
ax2.axhline(y=8.4, color='red', linestyle=':', linewidth=1)

ax1.set_title('Hb values over time for women')
ax2.set_title('Hb values over time for men')

ax1.set_xlim((0, 9))
ax2.set_xlim((0, 9))
ax1.set_ylim((5, 15))
ax2.set_ylim((5, 15))

fig.text(0.5, 0.04, 'Years since first measurement', ha='center', fontsize=12)
fig.text(0.04, 0.5, 'Hb value', va='center', rotation='vertical', fontsize=12)

plt.show()

print(datetime.datetime.now())

In [None]:
import plotly 
plotly.tools.set_credentials_file(username='mariekev93', api_key='GugOH830IvByChinL3bC')

In [None]:
len(id_f)

In [None]:
data_f = []
for d_id in id_f[:97]:
    df_sub = df.loc[df['KeyID'] == d_id, :]
    trace = go.Scattergl(x = df_sub['TimeSinceFirst'],
                         y = df_sub['Hb'],
                         mode = 'lines',
                         line = dict(color = ('rgba(0, 0, 0, 0.1)'),
                                     width = 1))
    data_f.append(trace)

for d_id in sel_f:
    df_sub = df.loc[df['KeyID'] == d_id, :]
    trace = go.Scattergl(x = df_sub['TimeSinceFirst'],
                         y = df_sub['Hb'],
                         mode = 'lines',
                         line = dict(width = 2))
    data_f.append(trace)  

In [None]:
data_m = []
for d_id in id_m[:97]:
    df_sub = df.loc[df['KeyID'] == d_id, :]
    trace = go.Scattergl(x = df_sub['TimeSinceFirst'],
                         y = df_sub['Hb'],
                         mode = 'lines',
                         line = dict(color = ('rgba(0, 0, 0, 0.1)'),
                                     width = 1))
    data_m.append(trace)

for d_id in sel_m:
    df_sub = df.loc[df['KeyID'] == d_id, :]
    trace = go.Scattergl(x = df_sub['TimeSinceFirst'],
                         y = df_sub['Hb'],
                         mode = 'lines',
                         line = dict(width = 2))
    data_m.append(trace) 

In [None]:
fig = tools.make_subplots(rows=1, cols=2)

for trace in data_f:
    fig.append_trace(trace, 1, 1)

for trace in data_m:
    fig.append_trace(trace, 1, 2)

fig['layout'].update(height=600, width=800)
py.iplot(fig)

In [None]:
n = 9
s = 3500
df_donor_count = df.groupby('KeyID').size()
topn_f = list(df_donor_count.loc[df_donor_count.index.isin(id_f)].sort_values(ascending=False)[s:s+n].index)
topn_m = list(df_donor_count.loc[df_donor_count.index.isin(id_m)].sort_values(ascending=False)[s:s+n].index)

plotIndividuals(df, ['DK.00419306', 'DK.00772555'])
# plotIndividuals(df, topn_m)

In [None]:
sel_f = ['DK.01401342', 'DK.00392993', 'DK.00987577']
sel_m = ['DK.01216473', 'DK.00043278', 'DK.00451868']

In [None]:
list(range(0, 3000, 365))

In [None]:
dfw = pd.read_json('X:/201902 Hb Trajectories/df_weekly_m.json')


In [None]:
fig, ax = plt.subplots(1, 3, figsize=(15, 7), sharex=True, sharey=True)

for a in fig.axes:
    a.tick_params(axis='none', labelsize=8, pad=1)  
    a.grid(which='major')

df0 = df.loc[df['KeyID'] == sel_m[0], :]
ax[0].plot(df0['TimeSinceFirst']/365, df0['Hb'], color='black')
ax[0].axhline(y=8.4, color='red')
ax[0].set_title('Donor 1', fontsize=9)

df1 = df.loc[df['KeyID'] == sel_m[1], :]
ax[1].plot(df1['TimeSinceFirst']/365, df1['Hb'], color='black')
ax[1].axhline(y=8.4, color='red')
ax[1].set_title('Donor 2', fontsize=9)

df2 = df.loc[df['KeyID'] == sel_m[2], :]
ax[2].plot(df2['TimeSinceFirst']/365, df2['Hb'], color='black')
ax[2].axhline(y=8.4, color='red')
ax[2].set_title('Donor 3', fontsize=9)

ax[0].set_ylabel('Hb (mmol/L)')
fig.text(0.5, 0, 'Days since first donation', ha='center', fontsize=9) 

plt.show()

In [None]:
sel = ['DK.00419306', 'DK.00772555', 'DK.00200822']

fig, ax = plt.subplots(1, 3, figsize=(15, 7), sharex=True, sharey=True)

for a in fig.axes:
    a.tick_params(axis='none', labelsize=8, pad=1)  
    a.set_xticks(ticks=range(0, 3000, 365))
    a.set_xticklabels(labels=range(0, 8))

df0 = df.loc[df['KeyID'] == sel[0], :]
ax[0].plot(df0['TimeSinceFirst'], df0['Hb'], color='black')
ax[0].axhline(y=7.8, color='red')
ax[0].set_title('Donor 1', fontsize=9)
for t in df0['TimeSinceFirst']:
    ax[0].axvline(t, color='grey', lw=0.8)

df1 = df.loc[df['KeyID'] == sel[1], :]
ax[1].plot(df1['TimeSinceFirst'], df1['Hb'], color='black')
ax[1].axhline(y=8.4, color='red')
ax[1].set_title('Donor 2', fontsize=9)
for t in df1['TimeSinceFirst']:
    ax[1].axvline(t, color='grey', lw=0.8)

df2 = df.loc[df['KeyID'] == sel[2], :]
ax[2].plot(df2['TimeSinceFirst'], df2['Hb'], color='black')
ax[2].axhline(y=8.4, color='red')
ax[2].set_title('Donor 3', fontsize=9)
for t in df2['TimeSinceFirst']:
    ax[2].axvline(t, color='grey', lw=0.8)

ax[0].set_ylabel('Hb (mmol/L)')
fig.text(0.5, 0, 'Days since first donation', ha='center', fontsize=9) 

plt.show()