In [44]:
import pandas as pd
import json

import bokeh.plotting as bpl
import os
import numpy as np
import math
from bokeh.plotting import figure, output_file, show, gridplot
from bokeh.models import ColumnDataSource, LabelSet, HoverTool, Div, Label, CustomJS, Span, BoxAnnotation,LinearAxis, Range1d
from bokeh.models.widgets import Panel, Tabs
import re

In [2]:
os.chdir('../')

import represetnation_labels.useful_functions as uf 

with open('data/raw/cohort_demographics_test_data.json', 'r') as fb:
    cohorts_dict = json.load(fb)
with open('data/raw/Reference_population.json', 'r') as fb:
    reference_dict = json.load(fb)

## Format data

In [3]:
ref_dict, graph_dict = uf.clean_data(cohorts_dict, reference_dict)
print(graph_dict['UK Biobank']['Ethnicity'].keys())

dict_keys(['Ethnicity', 'values', 'percent', 'reletive', 'reference standardised', 'missing', 'ref percent', 'description text', 'reletive text', 'abs text'])


## Testing bar split plot 

In [4]:
source = ColumnDataSource(data = graph_dict['UK Biobank']['Ethnicity'])
    
p = figure(
y_range = list(source.data['Ethnicity']), 
title = 'Ethnicity', 
x_range = (0,15),
toolbar_location= None
)

p.hbar(
y = 'Ethnicity',
right = 'percent', 
height = 0.9, 
color = '#003667',
line_alpha = 0,
source = source
)


 
p.hbar(
y = 'Ethnicity',
right = 'ref percent', 
height = 0.9,
fill_alpha = 0,
line_color = '#a0a0a0', 
line_width = 4,
line_alpha = 1,
source = source
)


hover2 = HoverTool(tooltips = [
('Ethnicity', '@Ethnicity'),
('Raw values', "@{values}"),
('Percent/%', "@{percent}{0.0}"),
('UK population percent/%', '@{ref percent}{0.0}')
],
       mode = 'mouse', name= 'data plot')

p.yaxis.major_label_text_font_size = '10pt' 
p.yaxis.major_label_text_font = 'helvetica'
p.yaxis.major_label_text_color = '#a0a0a0'
p.yaxis.major_tick_line_color = None  # turn off y-axis major ticks
p.yaxis.minor_tick_line_color = None
p.xaxis.major_tick_line_color = None  # turn off y-axis major ticks
p.xaxis.minor_tick_line_color = None
p.yaxis.axis_line_color = None
p.xaxis.axis_line_color = None
p.xaxis.major_label_text_font_size = '0pt'
p.xaxis.major_tick_line_color = None
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
p.outline_line_width = 0
p.background_fill_color = '#f5f5f5'
p.background_fill_alpha = 0.9
p.title.text_color = '#a0a0a0'
p.title.text_font_size = '24pt'
p.title.text_font = "helvetica"
p.add_tools(hover2)

q = figure(
y_range = list(source.data['Ethnicity']),  
x_range = (75,110),
toolbar_location= None
)

q.hbar(
y = 'Ethnicity',
right = 'percent', 
height = 0.9, 
color = '#003667',
legend_label = 'UK Biobank percent',
line_alpha = 0,
source = source
)


 
q.hbar(
y = 'Ethnicity',
right = 'ref percent', 
height = 0.9,
fill_alpha = 0,
line_color = '#a0a0a0', 
line_width = 4,
line_alpha = 1,
legend_label = 'UK Population Ratio',
source = source
)



hover3 = HoverTool(tooltips = [
('Ethnicity', '@Ethnicity'),
('Raw values', "@{values}"),
('Percent/%', "@{percent}{0.0}"),
('UK population percent/%', '@{ref percent}{0.0}')
],
       mode = 'mouse', name= 'data plot')

q.yaxis.major_label_text_font_size = '0pt' 
q.yaxis.major_tick_line_color = None  # turn off y-axis major ticks
q.yaxis.minor_tick_line_color = None
q.xaxis.major_tick_line_color = None  # turn off y-axis major ticks
q.xaxis.minor_tick_line_color = None
q.yaxis.axis_line_color = None
q.xaxis.axis_line_color = None
q.xaxis.major_label_text_font_size = '0pt'
q.xaxis.major_tick_line_color = None
q.xgrid.grid_line_color = None
q.ygrid.grid_line_color = None
q.outline_line_width = 0
q.background_fill_color = '#f5f5f5'
q.background_fill_alpha = 0.9
q.legend.location = 'top_right'
q.title.text_color = '#a0a0a0'
q.title.text_font_size = '24pt'
q.title.text_font = "helvetica"
q.legend.label_text_font = "helvetica"
q.legend.label_text_color = "#a0a0a0"
q.add_tools(hover2)

final = gridplot([[p,q]])
show(final)

## testing dot log plot 

In [70]:
dot_dict = graph_dict['UK Biobank']['Ethnicity']
dot_dict['log'] = [math.log(i) for i in dot_dict['percent']]
dot_dict['ref log'] = [math.log(i) for i in dot_dict['ref percent']]
dot_dict['lab_cords'] =[math.log(i) for i in [1,10,25,50,100]]
dot_dict['lab_cords_y'] = [6]*len(dot_dict['Ethnicity'])
dot_dict['label_perc'] =  ['1%','10%','25%','50%','100%']
dot_dict['new_y'] = [1,2,3,4,5]
dot_dict['label_x'] = [-1.7] * 5
print(dot_dict)

{'Ethnicity': ['White', 'Black', 'Asian', 'Mixed Race', 'Other'], 'values': [503819, 8222, 11807, 3095, 4697], 'percent': [94.8, 1.5, 2.2, 0.6, 0.9], 'reletive': [107.1, 48.4, 40.7, 28.6, 0], 'reference standardised': [470501, 16481, 28709, 11164, 4785], 'missing': [1760, 1760, 1760, 1760, 1760], 'ref percent': [88.5, 3.1, 5.4, 2.1, 0.9], 'description text': ['this is description text for this variable', 'this is description text for this variable', 'this is description text for this variable', 'this is description text for this variable', 'this is description text for this variable'], 'reletive text': ['there is 107% of this group needed to be representative', 'there is 48% of this group needed to be representative', 'there is 41% of this group needed to be representative', 'there is 29% of this group needed to be representative', 'there is 0% of this group needed to be representative'], 'abs text': ['This group is overrepresenet. there can be 33318 fewer people in this group to be re

In [72]:
source = ColumnDataSource(data = dot_dict)

r = figure(title = 'Ethnicity -log values',x_range=(-1.7,max(source.data['log'])*1.1),y_range=(0.5,6.2))
r.segment('log','new_y','ref log','new_y', color = '#555555',line_width = 3,source = source)
r.circle(x = 'ref log',y = 'new_y', color = '#a0a0a0',size = 10,legend_label = 'UK Population',source = source)
r.circle(x = 'log',y = 'new_y', color = '#003667',size = 10 ,legend_label = 'UK Biobank',source = source)



logone = Span(location = math.log(1),dimension = 'height', line_color = '#555555',line_alpha =0.2, line_width = 3)
log10 = Span(location = math.log(10),dimension = 'height', line_color = '#555555',line_alpha =0.2, line_width = 3)
log25 = Span(location = math.log(25),dimension = 'height', line_color = '#555555',line_alpha =0.2, line_width = 3)
log50 = Span(location = math.log(50),dimension = 'height', line_color = '#555555',line_alpha =0.2, line_width = 3)
log100 = Span(location = math.log(100),dimension = 'height', line_color = '#555555',line_alpha =0.2, line_width = 3)

box1 = BoxAnnotation(top = 1.5, bottom =2.5, fill_color = '#000000',fill_alpha = 0.2)
box2 = BoxAnnotation(top = 3.5, bottom =4.5, fill_color = '#000000',fill_alpha = 0.2)

hover4 = HoverTool(tooltips = [
('Ethnicity', '@Ethnicity'),
('Raw values', "@{values}"),
('Percent/%', "@{percent}{0.0}"),
('UK population percent/%', '@{ref percent}{0.0}')
],
       mode = 'mouse', name= 'data plot')

labels = LabelSet(
        x='lab_cords', 
        y='lab_cords_y', 
        text='label_perc',
        text_align='right', 
        text_font ='helvetica',
        text_color = 'grey',
        source=source
    )

labels2 = LabelSet(
        x='label_x', 
        y='new_y', 
        text='Ethnicity',
        text_align='left', 
        text_font ='helvetica',
        text_color = 'grey',
        source=source
    )

r.yaxis.major_label_text_font_size = '0pt' 
r.yaxis.major_tick_line_color = None
r.yaxis.minor_tick_line_color = None
r.xaxis.major_tick_line_color = None  # turn off y-axis major ticks
r.xaxis.minor_tick_line_color = None
r.yaxis.axis_line_color = None
r.xaxis.axis_line_color = None
r.xaxis.major_label_text_font_size = '0pt'
r.xaxis.major_tick_line_color = None
r.xgrid.grid_line_color = None
r.ygrid.grid_line_color = None
r.outline_line_width = 0
r.background_fill_color = '#f5f5f5'
r.background_fill_alpha = 0.9

r.title.text_color = '#a0a0a0'
r.title.text_font_size = '24pt'
r.title.text_font = "helvetica"
r.add_layout(box1)
r.add_layout(box2)
r.add_layout(logone)
r.add_layout(log10)
r.add_layout(log25)
r.add_layout(log50)
r.add_layout(log100)
r.add_layout(labels2)
r.add_tools(hover4)
r.add_layout(labels)
r.legend.location = 'top_left'
r.legend.label_text_font = "helvetica"
r.legend.label_text_color = "#a0a0a0"
output_file('plots/ethnicitylogs.html')
show(r)