# Patient Demographics
Charts showing the makeup of the population using Thousand Smiles Clinics historical data.


In [1]:
import pandas as pd
import altair as alt
import sys
import numpy as np
import math
from datetime import datetime
from dateutil.relativedelta import relativedelta
import random
from altair.expr import datum, if_


sys.path.append('..')
import TSD

In [2]:
def compute_age(row):
    d1 = row["start"]
    d2 = row["dob"]
    diff_in_years = relativedelta(d1, d2)
    return diff_in_years.years

merged_df = TSD.df_merged

merged_df["age_in_years"] = merged_df.apply(lambda row: compute_age(row), axis=1)
age_row = merged_df["age_in_years"]

df_merged_latest = TSD.df_merged.copy()

#find id of last clinic
most_recent = df_merged_latest.timein.max()
last_clinic =  most_recent
last_clinic_id = df_merged_latest.query('timein == @last_clinic')['clinic_id'].iloc[0]

#filter data so it only includes latest clinic
df_merged_latest = df_merged_latest.query('clinic_id == @last_clinic_id').copy()

#calculate age
df_merged_latest['age'] = \
df_merged_latest.apply(lambda row: relativedelta(row['start'],row['dob']).years, axis=1)

In [3]:
#calculate average per clinic historically
clinic_counts = merged_df.groupby(['clinic_id','age_in_years']).size().reset_index(name='count')
avg_clinic = clinic_counts.groupby('age_in_years').mean().drop('clinic_id', axis=1).reset_index()
avg_clinic.age_in_years = avg_clinic.age_in_years // 5
grouped_avg = avg_clinic.groupby('age_in_years').sum().reset_index()
grouped_avg.age_in_years = grouped_avg.age_in_years * 5

#find number of clinics
total_clinics = merged_df.clinic_id.nunique()

In [4]:
#Chart most recent clinic with line showing historical average
line = alt.Chart(grouped_avg).mark_line(color='blue', strokeOpacity=0.2).encode(
    alt.X('age_in_years:Q',title='Age in Years',bin = alt.Bin(maxbins=20)),
    y = 'count:Q',
)

bars = alt.Chart(df_merged_latest).mark_bar().encode(
 alt.X('age_in_years', bin = alt.Bin(maxbins=20)),
    y = alt.Y('count()',title='Number of Patients'),
    color = alt.Color('age_in_years',scale = alt.Scale(scheme = "spectral"),legend=None),
)

alt.layer(bars,line).properties(
    width=700,
    height=400,
    title = alt.Title(
'Patients Treated on '+
    str(most_recent.strftime("%B %d, %Y")), subtitle = " (Historical data from previous "+str(total_clinics)+" clinics shown by blue line.)"
))