In [1]:
import pandas as pd
import json

import bokeh.plotting as bpl
import os
import numpy as np
import math
from bokeh.plotting import figure, output_file, show, gridplot
from bokeh.models import ColumnDataSource, LabelSet, HoverTool, Div, Label, CustomJS, Span, BoxAnnotation,LinearAxis, Range1d
from bokeh.models.widgets import Panel, Tabs
import re

In [2]:
os.chdir('../')

import represetnation_labels.useful_functions as uf 

with open('data/raw/cohort_demographics_test_data.json', 'r') as fb:
    cohorts_dict = json.load(fb)
with open('data/raw/Reference_population.json', 'r') as fb:
    reference_dict = json.load(fb)

## Format data

In [3]:
ref_dict, graph_dict = uf.clean_data(cohorts_dict, reference_dict)
print(graph_dict['UK Biobank']['Ethnicity'].keys())

dict_keys(['Ethnicity', 'values', 'percent', 'reletive', 'reletive_colours', 'reletive_representative_or_not', 'reletive_rep_threhsold', 'reference standardised', 'missing', 'ref percent', 'description text', 'reletive text', 'abs text'])


## Testing bar split plot 

In [4]:
source = ColumnDataSource(data = graph_dict['UK Biobank']['Ethnicity'])
    
p = figure(
y_range = list(source.data['Ethnicity']), 
title = 'Ethnicity', 
x_range = (0,15),
toolbar_location= None
)

p.hbar(
y = 'Ethnicity',
right = 'percent', 
height = 0.9, 
color = '#003667',
line_alpha = 0,
source = source
)


 
p.hbar(
y = 'Ethnicity',
right = 'ref percent', 
height = 0.9,
fill_alpha = 0,
line_color = '#a0a0a0', 
line_width = 4,
line_alpha = 1,
source = source
)


hover2 = HoverTool(tooltips = [
('Ethnicity', '@Ethnicity'),
('Raw values', "@{values}"),
('Percent/%', "@{percent}{0.0}"),
('UK population percent/%', '@{ref percent}{0.0}')
],
       mode = 'mouse', name= 'data plot')

p.yaxis.major_label_text_font_size = '10pt' 
p.yaxis.major_label_text_font = 'helvetica'
p.yaxis.major_label_text_color = '#a0a0a0'
p.yaxis.major_tick_line_color = None  # turn off y-axis major ticks
p.yaxis.minor_tick_line_color = None
p.xaxis.major_tick_line_color = None  # turn off y-axis major ticks
p.xaxis.minor_tick_line_color = None
p.yaxis.axis_line_color = None
p.xaxis.axis_line_color = None
p.xaxis.major_label_text_font_size = '0pt'
p.xaxis.major_tick_line_color = None
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
p.outline_line_width = 0
p.background_fill_color = '#f5f5f5'
p.background_fill_alpha = 0.9
p.title.text_color = '#a0a0a0'
p.title.text_font_size = '24pt'
p.title.text_font = "helvetica"
p.add_tools(hover2)

q = figure(
y_range = list(source.data['Ethnicity']),  
x_range = (75,110),
toolbar_location= None
)

q.hbar(
y = 'Ethnicity',
right = 'percent', 
height = 0.9, 
color = '#003667',
legend_label = 'UK Biobank percent',
line_alpha = 0,
source = source
)


 
q.hbar(
y = 'Ethnicity',
right = 'ref percent', 
height = 0.9,
fill_alpha = 0,
line_color = '#a0a0a0', 
line_width = 4,
line_alpha = 1,
legend_label = 'UK Population Ratio',
source = source
)



hover3 = HoverTool(tooltips = [
('Ethnicity', '@Ethnicity'),
('Raw values', "@{values}"),
('Percent/%', "@{percent}{0.0}"),
('UK population percent/%', '@{ref percent}{0.0}')
],
       mode = 'mouse', name= 'data plot')

q.yaxis.major_label_text_font_size = '0pt' 
q.yaxis.major_tick_line_color = None  # turn off y-axis major ticks
q.yaxis.minor_tick_line_color = None
q.xaxis.major_tick_line_color = None  # turn off y-axis major ticks
q.xaxis.minor_tick_line_color = None
q.yaxis.axis_line_color = None
q.xaxis.axis_line_color = None
q.xaxis.major_label_text_font_size = '0pt'
q.xaxis.major_tick_line_color = None
q.xgrid.grid_line_color = None
q.ygrid.grid_line_color = None
q.outline_line_width = 0
q.background_fill_color = '#f5f5f5'
q.background_fill_alpha = 0.9
q.legend.location = 'top_right'
q.title.text_color = '#a0a0a0'
q.title.text_font_size = '24pt'
q.title.text_font = "helvetica"
q.legend.label_text_font = "helvetica"
q.legend.label_text_color = "#a0a0a0"
q.add_tools(hover2)

final = gridplot([[p,q]])
show(final)

## testing dot log plot 

In [5]:
dot_dict = graph_dict['UK Biobank']['Ethnicity']
dot_dict['log'] = [math.log(i) for i in dot_dict['percent']]
dot_dict['ref log'] = [math.log(i) for i in dot_dict['ref percent']]
dot_dict['lab_cords'] =[math.log(i) for i in [1,10,25,50,100]]
dot_dict['lab_cords_y'] = [6]*len(dot_dict['Ethnicity'])
dot_dict['label_perc'] =  ['1%','10%','25%','50%','100%']
dot_dict['new_y'] = [1,2,3,4,5]
dot_dict['label_x'] = [-1.7] * 5
print(dot_dict)

{'Ethnicity': ['White', 'Black', 'Asian', 'Mixed Race', 'Other'], 'values': [503819, 8222, 11807, 3095, 4697], 'percent': [94.8, 1.5, 2.2, 0.6, 0.9], 'reletive': [107.1, 48.4, 40.7, 28.6, 0], 'reletive_colours': ['#5FBFCE', '#FF112C', '#FF112C', '#FF112C', '#FF112C'], 'reletive_representative_or_not': ['Over-representative of the UK population', 'Under-representative of the UK population', 'Under-representative of the UK population', 'Under-representative of the UK population', 'Under-representative of the UK population'], 'reletive_rep_threhsold': [100, 100, 100, 100, 100], 'reference standardised': [470501, 16481, 28709, 11164, 4785], 'missing': [1760, 1760, 1760, 1760, 1760], 'ref percent': [88.5, 3.1, 5.4, 2.1, 0.9], 'description text': ['this is description text for this variable', 'this is description text for this variable', 'this is description text for this variable', 'this is description text for this variable', 'this is description text for this variable'], 'reletive text': 

In [6]:
source = ColumnDataSource(data = dot_dict)

r = figure(title = 'Ethnicity -log values',x_range=(-1.7,max(source.data['log'])*1.1),y_range=(0.5,6.2))
r.segment('log','new_y','ref log','new_y', color = '#555555',line_width = 3,source = source)
r.circle(x = 'ref log',y = 'new_y', color = '#a0a0a0',size = 10,legend_label = 'UK Population',source = source)
r.circle(x = 'log',y = 'new_y', color = '#003667',size = 10 ,legend_label = 'UK Biobank',source = source)



logone = Span(location = math.log(1),dimension = 'height', line_color = '#555555',line_alpha =0.2, line_width = 3)
log10 = Span(location = math.log(10),dimension = 'height', line_color = '#555555',line_alpha =0.2, line_width = 3)
log25 = Span(location = math.log(25),dimension = 'height', line_color = '#555555',line_alpha =0.2, line_width = 3)
log50 = Span(location = math.log(50),dimension = 'height', line_color = '#555555',line_alpha =0.2, line_width = 3)
log100 = Span(location = math.log(100),dimension = 'height', line_color = '#555555',line_alpha =0.2, line_width = 3)

box1 = BoxAnnotation(top = 1.5, bottom =2.5, fill_color = '#000000',fill_alpha = 0.2)
box2 = BoxAnnotation(top = 3.5, bottom =4.5, fill_color = '#000000',fill_alpha = 0.2)

hover4 = HoverTool(tooltips = [
('Ethnicity', '@Ethnicity'),
('Raw values', "@{values}"),
('Percent/%', "@{percent}{0.0}"),
('UK population percent/%', '@{ref percent}{0.0}')
],
       mode = 'mouse', name= 'data plot')

labels = LabelSet(
        x='lab_cords', 
        y='lab_cords_y', 
        text='label_perc',
        text_align='right', 
        text_font ='helvetica',
        text_color = 'grey',
        source=source
    )

labels2 = LabelSet(
        x='label_x', 
        y='new_y', 
        text='Ethnicity',
        text_align='left', 
        text_font ='helvetica',
        text_color = 'grey',
        source=source
    )

r.yaxis.major_label_text_font_size = '0pt' 
r.yaxis.major_tick_line_color = None
r.yaxis.minor_tick_line_color = None
r.xaxis.major_tick_line_color = None  # turn off y-axis major ticks
r.xaxis.minor_tick_line_color = None
r.yaxis.axis_line_color = None
r.xaxis.axis_line_color = None
r.xaxis.major_label_text_font_size = '0pt'
r.xaxis.major_tick_line_color = None
r.xgrid.grid_line_color = None
r.ygrid.grid_line_color = None
r.outline_line_width = 0
r.background_fill_color = '#f5f5f5'
r.background_fill_alpha = 0.9

r.title.text_color = '#a0a0a0'
r.title.text_font_size = '24pt'
r.title.text_font = "helvetica"
r.add_layout(box1)
r.add_layout(box2)
r.add_layout(logone)
r.add_layout(log10)
r.add_layout(log25)
r.add_layout(log50)
r.add_layout(log100)
r.add_layout(labels2)
r.add_tools(hover4)
r.add_layout(labels)
r.legend.location = 'top_left'
r.legend.label_text_font = "helvetica"
r.legend.label_text_color = "#a0a0a0"
output_file('plots/ethnicitylogs.html')
show(r)

## Split dotplot

In [7]:
dot_dict['label_x'] =[0]*5
dot_dict.keys()

dict_keys(['Ethnicity', 'values', 'percent', 'reletive', 'reletive_colours', 'reletive_representative_or_not', 'reletive_rep_threhsold', 'reference standardised', 'missing', 'ref percent', 'description text', 'reletive text', 'abs text', 'log', 'ref log', 'lab_cords', 'lab_cords_y', 'label_perc', 'new_y', 'label_x'])

In [8]:
def dot_plot(source,x_range,plot_width):
    if x_range[0] < 0:
        line_val = x_range[1]
        title = 'Ethnicity'
        place = 'right'
        other_place = 'left'
        line_end = 0
    else: 
        line_val = x_range[0]
        title = ''
        place = 'left'
        line_end = 100
        other_place = 'right'
    
    if line_end == 0:
        line_width = 1.5
    else:
        line_width = 3
    
    r = figure(title = title,x_range=x_range,y_range=(0.5,6.2),plot_width = plot_width)
    r.segment('percent','new_y','ref percent','new_y', color = '#555555',line_width = 3,source = source)
    r.circle(x = 'ref percent',y = 'new_y', color = '#a0a0a0',size = 10,legend_label = 'UK Population',source = source)
    r.circle(x = 'percent',y = 'new_y', color = '#003667',size = 10 ,legend_label = 'UK Biobank',source = source)
    
    line = Span(location = line_val,dimension = 'height', line_color = '#555555',line_alpha =0.2, line_width = 3)
    end_line = Span(location = line_end,dimension = 'height', line_color = '#555555',line_alpha =0.2, line_width = line_width)

    box1 = BoxAnnotation(top = 1.5, bottom =2.5, fill_color = '#000000',fill_alpha = 0.1)
    box2 = BoxAnnotation(top = 3.5, bottom =4.5, fill_color = '#000000',fill_alpha = 0.1)

    hover4 = HoverTool(tooltips = [
    ('Ethnicity', '@Ethnicity'),
    ('Raw values', "@{values}"),
    ('Percent/%', "@{percent}{0.0}"),
    ('UK population percent/%', '@{ref percent}{0.0}')
    ],
           mode = 'mouse', name= 'data plot')

    label = Label(x=line_val, y=6, 
                     text=str(line_val) + '%', render_mode='canvas',text_align = place,
                     border_line_alpha=0,
                     background_fill_alpha=0,
                     text_font = 'helvetica', 
                     text_color = '#a0a0a0'
                    )
    
    label_end = Label(x=line_end, y=6, 
                     text=str(line_end) + '%', render_mode='canvas',text_align = other_place,
                     border_line_alpha=0,
                     background_fill_alpha=0,
                     text_font = 'helvetica', 
                     text_color = '#a0a0a0'
                    )

    labels2 = LabelSet(
            x='label_x', 
            y='new_y', 
            text='Ethnicity',
            text_align='right', 
            text_font ='helvetica',
            text_color = 'grey',
            source=source
        )

    r.yaxis.major_label_text_font_size = '0pt' 
    r.yaxis.major_tick_line_color = None
    r.yaxis.minor_tick_line_color = None
    r.xaxis.major_tick_line_color = None  # turn off y-axis major ticks
    r.xaxis.minor_tick_line_color = None
    r.yaxis.axis_line_color = None
    r.xaxis.axis_line_color = None
    r.xaxis.major_label_text_font_size = '0pt'
    r.xaxis.major_tick_line_color = None
    r.xgrid.grid_line_color = None
    r.ygrid.grid_line_color = None
    r.outline_line_width = 0
    r.background_fill_color = '#f5f5f5'
    r.background_fill_alpha = 0.9
    r.min_border = 0
    r.title.text_color = '#a0a0a0'
    r.title.text_font_size = '24pt'
    r.title.text_font = "helvetica"
    r.add_layout(box1)
    r.add_layout(box2)
    r.add_layout(line)
    r.add_layout(end_line)
    if x_range[0] < 0:
        r.add_layout(labels2)
    r.add_tools(hover4)
    
    r.add_layout(label)
    r.add_layout(label_end)
    if x_range[0] >0:
        r.legend.location = 'top_right'
        r.legend.label_text_font = "helvetica"
        r.legend.label_text_color = "#a0a0a0"
    else:
        r.legend.glyph_height = 0
        r.legend.glyph_width = 0
        r.legend.label_text_font_size = '0pt'
        r.legend.background_fill_alpha = 0 
        r.legend.border_line_alpha = 0
    return(r)

In [9]:
source = ColumnDataSource(data = dot_dict)
left_range = (-4.75,10)
right_range = (85,100)
total_length = left_range[1] - left_range[0]+right_range[1] - right_range[0]
name_width = round(abs(left_range[0])/total_length*600)
left_plot_width = round((left_range[1])/total_length*(600 - name_width))
right_plot_width = round((right_range[1] - right_range[0])/total_length*(600 - name_width))
left = dot_plot(source,left_range,left_plot_width + name_width)
right = dot_plot(source,right_range,right_plot_width)
full_plot = gridplot([[left,right]])
output_file('plots/split.html')
show(full_plot)

In [10]:
print(left_plot_width)
print(right_plot_width)
print(name_width)

169
254
96


## Boxy sanky

In [35]:
box_dict = graph_dict['UK Biobank']['Ethnicity']
perc = box_dict['percent']
ref_p = box_dict['ref percent']
box_dict['y_coords'] = [[80 if i== 0 else sum(perc[:i]) ,
                         sum(perc[:i+1]),
                         sum(ref_p[:i+1]),
                         80 if i == 0 else sum(ref_p[:i])] for i in range(len(perc))]

box_dict['x_coords'] = [[0,0,100,100] for i in range(len(perc))]
box_dict['colours'] = ["#003667","#ed6b00","#87f5fb","#a882dd","#721817"]
print(box_dict['y_coords'])
print(box_dict['x_coords'])

[[80, 94.8, 88.5, 80], [94.8, 96.3, 91.6, 88.5], [96.3, 98.5, 97.0, 91.6], [98.5, 99.1, 99.1, 97.0], [99.1, 100.0, 100.0, 99.1]]
[[0, 0, 100, 100], [0, 0, 100, 100], [0, 0, 100, 100], [0, 0, 100, 100], [0, 0, 100, 100]]


In [37]:
source = ColumnDataSource(data = box_dict)
q = figure(title = 'Ethnicity')
colours = ["#003667","#ed6b00","#87f5fb","#a882dd","#721817"]

q.patches(xs='x_coords', ys='y_coords',color='colours',source = source)
hover4 = HoverTool(tooltips = [
    ('Ethnicity', '@Ethnicity'),
    ('Raw values', "@{values}"),
    ('Percent/%', "@{percent}{0.0}"),
    ('UK population percent/%', '@{ref percent}{0.0}')
    ],
           mode = 'mouse', name= 'data plot')

q.add_tools(hover4)
show(q)

In [51]:
source = ColumnDataSource(data = box_dict)
q = figure(title = 'Ethnicity',x_range=(-10,110))
colours = ["#003667","#ed6b00","#87f5fb","#a882dd","#721817"]

q.patches(xs='x_coords', ys='y_coords',color='colours',legend_field = 'Ethnicity',source = source)
    
   
    
perc_lab_cords = np.array([i[0] for i in source.data['y_coords']] + [100])
perc_x_lab_cords = np.array([0] * len(perc_lab_cords))
y_labels = [str(i)+'%' for i in perc_lab_cords]
perc_lab_cords = perc_lab_cords - 0.5


ref_p_lab_cords = np.array([i[3] for i in source.data['y_coords']] + [100])
ref_p_x_lab_cords = np.array([100] * len(perc_lab_cords))
ref_y_labels = [str(i)+'%' for i in ref_p_lab_cords]
ref_p_lab_cords = ref_p_lab_cords -0.5

hover4 = HoverTool(tooltips = [
    ('Ethnicity', '@Ethnicity'),
    ('Raw values', "@{values}"),
    ('Percent/%', "@{percent}{0.0}"),
    ('UK population percent/%', '@{ref percent}{0.0}')
    ],
           mode = 'mouse', name= 'data plot')

for i in range(len(perc_lab_cords)):
    label = Label(x = perc_x_lab_cords[i],y= perc_lab_cords[i],text = y_labels[i], render_mode='canvas',text_align = 'right',
        border_line_alpha=0,background_fill_alpha=0,text_font = 'helvetica', text_color = '#a0a0a0',
                  text_font_size = '10pt')
    
    label2 = Label(
        x = ref_p_x_lab_cords[i],
        y = ref_p_lab_cords[i],
        text = ref_y_labels[i],
        render_mode='canvas',
        text_align = 'left',
        border_line_alpha=0,
        background_fill_alpha=0,
        text_font = 'helvetica', 
        text_color = '#a0a0a0',
        text_font_size = '10pt')
    
    q.add_layout(label)
    q.add_layout(label2)
    
dataset_lab = Label(x = 20, y = 100, text = 'UK Biobank', render_mode='canvas',text_align = 'right',
        border_line_alpha=0,background_fill_alpha=0,text_font = 'helvetica', text_color = '#a0a0a0')
ref_lab = Label(x = 100, y = 100, text = 'UK Population', render_mode='canvas',text_align = 'right',
        border_line_alpha=0,background_fill_alpha=0,text_font = 'helvetica', text_color = '#a0a0a0')

q.yaxis.major_label_text_font_size = '0pt' 
q.yaxis.major_tick_line_color = None
q.yaxis.minor_tick_line_color = None
q.xaxis.major_tick_line_color = None  # turn off y-axis major ticks
q.xaxis.minor_tick_line_color = None
q.yaxis.axis_line_color = None
q.xaxis.axis_line_color = None
q.xaxis.major_label_text_font_size = '0pt'
q.xaxis.major_tick_line_color = None
q.xgrid.grid_line_color = None
q.ygrid.grid_line_color = None
q.outline_line_width = 0
q.background_fill_color = '#f5f5f5'
q.background_fill_alpha = 0.9
q.title.text_color = '#a0a0a0'
q.title.text_font_size = '24pt'
q.title.text_font = "helvetica"
q.legend.location = (46,24)
q.legend.label_text_font = "helvetica"
q.legend.label_text_color = "#a0a0a0"
q.add_layout(dataset_lab)
q.add_layout(ref_lab)
q.add_tools(hover4)
output_file('plots/boxysanky.html')
show(q)



In [13]:
print(perc_lab_cords)
print(perc_lab_cords -5)

[79.5 94.3 95.8 98.  98.6 99.5]
[74.5 89.3 90.8 93.  93.6 94.5]


## multi pie plot

In [14]:
pie_dict = graph_dict['UK Biobank']['Ethnicity']
pie_dict['angles'] = [i/sum(pie_dict['percent']) * 2*math.pi for i in pie_dict['percent']]
pie_dict['start_angle'] = [0] + [sum(pie_dict['angles'][:i+1]) for i in range(len(pie_dict['angles']) -1)]
pie_dict['end_angle'] = [sum(pie_dict['angles'][:i+1]) for i in range(len(pie_dict['angles']))]

pie_dict['ref_angles'] = [i/sum(pie_dict['percent']) * 2*math.pi for i in pie_dict['ref percent']]
pie_dict['ref_start_angle'] = [0] + [sum(pie_dict['ref_angles'][:i+1]) for i in range(len(pie_dict['ref_angles']) -1)]
pie_dict['ref_end_angle'] = [sum(pie_dict['ref_angles'][:i+1]) for i in range(len(pie_dict['ref_angles']))]
pie_dict['colours'] = colours
print(pie_dict['start_angle'])

[0, 5.956459671206248, 6.050707450813942, 6.188937527571893, 6.226636639414971]


In [15]:
source = ColumnDataSource(data = pie_dict)
s = figure(title = 'Ethnicity',x_range = (-0.6,0.8),y_range =(-0.6,0.6) )
s.annular_wedge(
    x =0,
    y=0,
    inner_radius = 0.31, 
    outer_radius = 0.5,
    start_angle = 'start_angle',
    end_angle ='end_angle',
    color = 'colours',
    legend_field = 'Ethnicity',
    source = source
)

s.annular_wedge(
    x =0,
    y=0,
    inner_radius = 0.1, 
    outer_radius = 0.29,
    start_angle = 'ref_start_angle',
    end_angle ='ref_end_angle',
    color = 'colours',
    source = source
)

hover4 = HoverTool(tooltips = [
    ('Ethnicity', '@Ethnicity'),
    ('Raw values', "@{values}"),
    ('Percent/%', "@{percent}{0.0}"),
    ('UK population percent/%', '@{ref percent}{0.0}')
    ],
           mode = 'mouse', name= 'data plot')
dataset_lab = Label(x = 0.6, y = 0, text = 'UK Biobank', render_mode='canvas',text_align = 'right',
        border_line_alpha=0,background_fill_alpha=0,text_font = 'helvetica', text_color = '#a0a0a0')
ref_lab = Label(x = 0.25, y = 0, text = 'UK Population', render_mode='canvas',text_align = 'right',
        border_line_alpha=0,background_fill_alpha=0,text_font = 'helvetica', text_color = '#a0a0a0')

s.yaxis.major_label_text_font_size = '0pt' 
s.yaxis.major_tick_line_color = None
s.yaxis.minor_tick_line_color = None
s.xaxis.major_tick_line_color = None  # turn off y-axis major ticks
s.xaxis.minor_tick_line_color = None
s.yaxis.axis_line_color = None
s.xaxis.axis_line_color = None
s.xaxis.major_label_text_font_size = '0pt'
s.xaxis.major_tick_line_color = None
s.xgrid.grid_line_color = None
s.ygrid.grid_line_color = None
s.outline_line_width = 0
s.background_fill_color = '#f5f5f5'
s.background_fill_alpha = 0.9
s.title.text_color = '#a0a0a0'
s.title.text_font_size = '24pt'
s.title.text_font = "helvetica"
s.legend.label_text_font = "helvetica"
s.legend.label_text_color = "#a0a0a0"
s.add_layout(dataset_lab)
s.add_layout(ref_lab)
s.add_tools(hover4)
output_file('plots/donuts.html')
show(s)