In [1]:
import pandas as pd
import json
import bokeh.plotting as bpl
import os
import numpy as np
from bokeh.plotting import figure, output_file, show, gridplot
from bokeh.models import ColumnDataSource, LabelSet, HoverTool

In [2]:
os.chdir('../')
with open('data/raw/cohort_demographics_test_data.json', 'r') as fb:
    cohorts_dic = json.load(fb)
with open('data/raw/Reference_population.json', 'r') as fb:
    ref_dic = json.load(fb)

In [3]:
print(cohorts_dic['UK Biobank'])

{'Age': {'0-4': 0, '5-9': 0, '10-14': 0, '15-19': 0, '20-24': 0, '25-29': 0, '30-34': 0, '35-39': 0, '40-44': 53960, '45-49': 66438, '50-54': 76808, '55-59': 91953, '60-64': 121419, '65-70': 92739, '70-74': 0, '75-79': 0, '80-84': 0, '85+': 0, 'Missing': 0}, 'Gender': {'Female': 273824, 'Male': 229486, 'Missing': 0}, 'Ethnicity': {'White': {'White': 581, 'White British': 472204, 'White Irish': 14000, 'White other': 17034}, 'Black': {'Black': 27, 'Black Carribian': 4608, 'Black African': 3461, 'Black Other': 126}, 'Asian': {'Asian': 44, 'Indian': 6109, 'Pakistani': 1905, 'Bangledeshi': 240, 'Sri Lankan': 0, 'Asian Other': 1856}, 'Chinese': {'Chinese': 1653}, 'Mixed': {'Mixed': 50, 'White and Black Carribean': 650, 'White and Black Caribean': 449, 'White and Asian': 864, 'Other': 1082}, 'Other': {'Other': 4697}, 'Missing': {'Missing': 1760}}, 'Socioeconomic Status': {'I': 81610, 'II': 106680, 'III(non manual)': 133350, 'III(manual)': 160019, 'IV': 40005, 'V': 10668, 'Armed Forces': 1067,

## Testing bar plot

In [4]:
UKB_age_dic = cohorts_dic['UK Biobank']['Age'].copy()
ukbiobank_age = pd.DataFrame({'Age range':list(UKB_age_dic.keys()), 'Value':list(UKB_age_dic.values())})
print(ukbiobank_age)

   Age range   Value
0        0-4       0
1        5-9       0
2      10-14       0
3      15-19       0
4      20-24       0
5      25-29       0
6      30-34       0
7      35-39       0
8      40-44   53960
9      45-49   66438
10     50-54   76808
11     55-59   91953
12     60-64  121419
13     65-70   92739
14     70-74       0
15     75-79       0
16     80-84       0
17       85+       0
18   Missing       0


In [5]:
output_file('plots/bar_test.html')
p = figure(x_range = ukbiobank_age['Age range'], title = 'Test bar plot',x_axis_label = 'Age')
p.vbar(x = ukbiobank_age['Age range'], top = ukbiobank_age['Value'], width = 0.9)


## Testing spider plot

In [6]:
UKB_eth_dic = cohorts_dic['UK Biobank']['Ethnicity'].copy()
ukbiobank_eth = pd.DataFrame({'Ethnicity':list(UKB_eth_dic.keys()), 'Value':[sum(v.values()) for v in UKB_eth_dic.values()]})
ukbiobank_eth.loc[2,'Value'] = ukbiobank_eth.loc[2,'Value'] + ukbiobank_eth.loc[3,'Value']
ukbiobank_eth = ukbiobank_eth.drop(3)
print(ukbiobank_eth)

  Ethnicity   Value
0     White  503819
1     Black    8222
2     Asian   11807
4     Mixed    3095
5     Other    4697
6   Missing    1760


In [7]:
num_vars = len(ukbiobank_eth) -1

theta = np.linspace(0, 2*np.pi, num_vars, endpoint=False)
# rotate theta such that the first axis is at the top
theta += np.pi/2

def unit_poly_verts(theta):
    """Return vertices of polygon for subplot axes.
    This polygon is circumscribed by a unit circle centered at (0.5, 0.5)
    """
    x0, y0, r = [0.5] * 3
    verts = [(r*np.cos(t) + x0, r*np.sin(t) + y0) for t in theta]
    return verts

def radar_patch(r, theta):
    yt = (r + 0.01) * np.sin(theta) + 0.5
    xt = (r + 0.01) * np.cos(theta) + 0.5
    return xt, yt

verts = unit_poly_verts(theta)
x = [i[0] for i in verts]
y = [i[1] for i in verts]

vals = np.array(ukbiobank_eth[:-1]['Value'])
missing_eth = ukbiobank_eth.loc[6,'Value']/sum(vals)
vals = vals/(sum(vals) * 2 )
xt,yt = radar_patch(vals, theta)
label_eth = ukbiobank_eth[:-1]['Ethnicity'].to_list()


In [8]:
percents = vals * 2
percents = np.round(percents,3)
x_lines = [[0.5,i] for i in x]
y_lines = [[0.5,i] for i in y]

In [9]:
q = figure(title = 'Spider plot test')

source = ColumnDataSource(data=dict(x=xt,
                                    y=yt,
                                    x_lines=x_lines,
                                    y_lines=y_lines,
                                    label_eth = label_eth,
                                    labs_x_cords = x,
                                    labs_y_cords = y,
                                    values = ukbiobank_eth[:-1]['Value'],
                                    percent = percents
                                   ))
labels = LabelSet(x='labs_x_cords', y='labs_y_cords', text='label_eth',x_offset = 5,y_offset =-5, source=source,render_mode='canvas')
hover = HoverTool(tooltips=[
        ("Ethnicity", "@label_eth"),
        ("Raw value", "@values"),
        ('Percent', "@percent{0.0 %}"),
    ])
q.patch(x= 'x', y='y', fill_alpha = 0.2, fill_color = 'blue',source=source)
q.multi_line('x_lines','y_lines',source=source, color = 'navy', line_width = 1)
q.ellipse(x = 0.5,y= 0.5,width = missing_eth, height = missing_eth, fill_alpha = 0.1, fill_color = 'grey')
q.add_layout(labels)
q.add_tools(hover)


## Testing multiple plots

In [10]:
r = gridplot([[p,None],[None,q]])
show(r)              

## Testing changing individual plot - Reletivise

In [11]:
print(ref_dic.keys())

dict_keys(['2011 Census'])


In [12]:
ref_age_dic = ref_dic['2011 Census']['Age'].copy()
ref_age = pd.DataFrame({'Age range':list(ref_age_dic.keys()), 'Value':list(ref_age_dic.values())})
ukb_ref_age = pd.merge(ukbiobank_age,ref_age, how = 'outer', on ='Age range')
ukb_ref_age = ukb_ref_age[ukb_ref_age['Age range'] != 'Missing']
ukb_ref_age['ukb_percent'] = (ukb_ref_age['Value_x']/ukb_ref_age['Value_x'].sum() *100) +0.0001
ukb_ref_age['ref_percent'] = ukb_ref_age['Value_y']/ukb_ref_age['Value_y'].sum() *100 + 0.0001
ukb_ref_age['reletive_percent'] = ukb_ref_age['ukb_percent']/ukb_ref_age['ref_percent'] * 100
print(ukb_ref_age)

   Age range  Value_x  Value_y  ukb_percent  ref_percent  reletive_percent
0        0-4        0  3913953     0.000100     6.194810          0.001614
1        5-9        0  3516615     0.000100     5.565934          0.001797
2      10-14        0  3669326     0.000100     5.807633          0.001722
3      15-19        0  3996452     0.000100     6.325384          0.001581
4      20-24        0  4297198     0.000100     6.801382          0.001470
5      25-29        0  4306340     0.000100     6.815851          0.001467
6      30-34        0  4154232     0.000100     6.575106          0.001521
7      35-39        0  4165801     0.000100     6.593417          0.001517
8      40-44    53960  4625528    10.720978     7.321038        146.440681
9      45-49    66438  4638163    13.200131     7.341036        179.812927
10     50-54    76808  4104611    15.260463     6.496570        234.900319
11     55-59    91953  3608858    18.269501     5.711929        319.848174
12     60-64   121419  38

In [13]:
s = figure(x_range = ukb_ref_age['Age range'], title = 'Button change test')
source = ColumnDataSource(data = dict(x=ukb_ref_age['Age range'],
                                      y=ukb_ref_age['Value_x'],
                                      yt=ukb_ref_age['reletive_percent']))
s.vbar(x = 'x', top ='y', source = source,width = 0.9)
show(s)

## Testing changing all the plots

## Testing all together