# This notebook is the final product to Datacamps Interactive graphs with Bokeh. I do not own or teach the course here, only share the work I did in it. It includes the data set from gapminder. In adition to what the course walks you through, I added some features to the graph of my own divising. This includes updating axes, as well as an input to search and highlihgt specific countries in the graph.

## On top of the inspiration from Datacamp I also allow the notebook to create the figure in the notebook without going to the Bokeh server. If you enjoy this please check out Datacamp and the other notebooks I share with insperation from thier courses.

In [1]:
import pandas as pd
import numpy as np

data = pd.read_csv('gapminder_tidy.csv',index_col='Year')
data.head()

Unnamed: 0_level_0,Country,fertility,life,population,child_mortality,gdp,region
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1964,Afghanistan,7.671,33.639,10474903.0,339.7,1182.0,South Asia
1965,Afghanistan,7.671,34.152,10697983.0,334.1,1182.0,South Asia
1966,Afghanistan,7.671,34.662,10927724.0,328.7,1168.0,South Asia
1967,Afghanistan,7.671,35.17,11163656.0,323.3,1173.0,South Asia
1968,Afghanistan,7.671,35.674,11411022.0,318.1,1187.0,South Asia


In [2]:
from bokeh.io import output_notebook, show
output_notebook()

In [6]:
from bokeh.plotting import figure
from bokeh.models import HoverTool, ColumnDataSource, Slider, CategoricalColorMapper, Select,LabelSet, TextInput
from bokeh.layouts import widgetbox,row, column
from bokeh.palettes import Category10

# Make a list of the unique values from the region column: regions_list
regions_list = data.region.unique().tolist()

#Make a function that will take a subset of our dataframe based on country name: i.e. United States
def new_df(country_name):
    mask = data['Country'] == country_name
    new_df = data[mask]
    return new_df


#This function will push the bokeh plot with updateds
def modify_doc(doc):
    # Make a color mapper: color_mapper
    color_mapper = CategoricalColorMapper(factors=regions_list,palette=Category10[6])
    
    # Make the ColumnDataSource for main graph: source
    source = ColumnDataSource(data={
        'x'       : data.loc[1970].fertility,
        'y'       : data.loc[1970].life,
        'country'      : data.loc[1970].Country,
        'pop'      : (data.loc[1970].population / 20000000)+4.75,
        'region'      : data.loc[1970].region,
    })
    
    
    #################################################
    #Make source for putting the single country label
    
    #First us defined function to pick the label for the United States
    country_df = new_df('United States')
    
    #Pass this data frame to ColumnDataSource, using brackets of the values because the dictionary needs a list
    source2 = ColumnDataSource(data={
        'x2'       : [country_df.loc[1970].fertility],
        'y2'       : [country_df.loc[1970].life],
        'country2'      : [country_df.loc[1970].Country],
        'pop2'      : [((country_df.loc[1970].population / 20000000)+4.75)*2],
        'region2'      : [country_df.loc[1970].region],
    })
    
    
    
    ####################################
    #Make the widgets that will go into our interactive graph
    
    
    #Make input box
    input = TextInput(title="Country", value="United States")
    
    # Make a slider object: slider
    slider = Slider(start=1970,end = 2010,step =1,value=1970,title='Year')
    
    # Create a dropdown Select widget for the x data: x_select
    x_select = Select(
        options=['fertility', 'life', 'child_mortality', 'gdp'],
        value='fertility',
        title='x-axis data'
    )
    
        # Create a dropdown Select widget for the y data: y_select
    y_select = Select(
        options=['fertility', 'life', 'child_mortality', 'gdp'],
        value='life',
        title='y-axis data'
    )
    
    
    #######################################
    #We now must define callback functions for our varius widgets
    
    # Define the callback function: update_plot
    def update_plot(attr, old, new):
        # Read the current value off the slider and 2 dropdowns: yr, x, y
        yr = slider.value
        x = x_select.value
        y = y_select.value
        # Label axes of plot
        plot.xaxis.axis_label = x
        plot.yaxis.axis_label = y
        # Set new_data
        new_data = {
            'x'       : data.loc[yr][x],
            'y'       : data.loc[yr][y],
            'country' : data.loc[yr].Country,
            'pop'     : (data.loc[yr].population / 20000000)+4.75,
            'region'  : data.loc[yr].region,
        }
        # Assign new_data to source.data
        source.data = new_data

        # Set the range of all axes
        plot.x_range.start = min(data[x])
        plot.x_range.end = max(data[x])
        plot.y_range.start = min(data[y]) 
        plot.y_range.end = max(data[y])

        # Add title to plot
        plot.title.text = 'Gapminder data for {}'.format(yr)
        
        ax_dict = {'fertility':'Fertility (children per woman)', 'life':'Life Expectancy (years)', 'child_mortality':'Child Mortality (0-5 year-olds dying per 1000 born)', 'gdp':'Gross Domestic Product per Capita'}
        # Set the x-axis label
        plot.xaxis.axis_label ='{}'.format(ax_dict[x])
    
        # Set the y-axis label
        plot.yaxis.axis_label = '{}'.format(ax_dict[y])
    
    #This update function is specifically for creating the label that will follow a data point around
    def update_country(attrname, old, new):
        yr = slider.value
        x = x_select.value
        y = y_select.value
        
        hold_data = new_df(input.value)
        new_data={
        'x2'       : [hold_data.loc[yr][x]],
        'y2'       : [hold_data.loc[yr][y]],
        'country2' : [hold_data.loc[yr].Country],
        'pop2'     : [((hold_data.loc[yr].population / 20000000)+4.75)*2],
        'region2'  : [hold_data.loc[yr].region],
        }
        
        source2.data = new_data
    
    
    ####################################################
    #We must now call all of our change values incase one of the widget value changes,
    #here we can update both the graph and country label so that whenever any value changes our
    #plot updates
    
    # Attach the callback to the 'value' property of input window
    input.on_change('value',update_plot, update_country)

    # Attach the callback to the 'value' property of slider
    slider.on_change('value',update_plot, update_country)
    
    # Attach the update_plot callback to the 'value' property of x_select
    x_select.on_change('value',update_plot, update_country)
    
    # Attach the update_plot callback to the 'value' property of y_select
    y_select.on_change('value',update_plot, update_country)
    
    
    
    #######################################
    #This section sets up all of the limits for the graph window as well as initializes the plot
    
    # Save the minimum and maximum values of the fertility column: xmin, xmax
    xmin, xmax = min(data.fertility), max(data.fertility)

    # Save the minimum and maximum values of the life expectancy column: ymin, ymax
    ymin, ymax = min(data.life), max(data.life)

    # Create the figure: plot
    plot = figure(title='Gapminder Data for 1970', plot_height=400, plot_width=700,
                  x_range=(xmin, xmax), y_range=(ymin, ymax))

    # Add the color mapper to the circle glyph
    p1 = plot.circle(x='x', y='y', fill_alpha=0.8, source=source,
            color=dict(field='region', transform=color_mapper), legend='region',size='pop')
    
    # Set the legend.location attribute of the plot to 'top_right'
    plot.legend.location = 'bottom_left'

    # Set the x-axis label
    plot.xaxis.axis_label ='Fertility (children per woman)'
    
    # Set the y-axis label
    plot.yaxis.axis_label = 'Life Expectancy (years)'
    

    #Instead of creating a label, if we want to find a specific country we will color it black, and increase the 
    #point size by a factor of 2
    p2 = plot.circle(x='x2', y='y2', fill_alpha=0.3, source=source2,
            color='black', legend='Text Input',size='pop2')

    ############################################
    #Create hover tool for adhoc analysis to view other countrie titles upon mousing over them
    
    # Create a HoverTool using renderers on p1 so that we don't double label the countries
    hover = HoverTool(renderers = [p1],tooltips=[('Country', '@country')])
    
    # Add the HoverTool to the plot
    plot.add_tools(hover)
    
    
    ####################
    #Set the layout for the plot so it is visually pleasing.
    layout = column(row(input, width=400),row(widgetbox(slider, x_select, y_select),plot))
    #Set the doc and then show the modified document, having an interactive graph
    doc.add_root(layout)
show(modify_doc)


In [4]:
data.Country.unique()

array(['Afghanistan', 'Albania', 'Algeria', 'Angola',
       'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba',
       'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain',
       'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin',
       'Bhutan', 'Bolivia', 'Bosnia and Herzegovina', 'Botswana',
       'Brazil', 'Brunei', 'Bulgaria', 'Burkina Faso', 'Burundi',
       'Cambodia', 'Cameroon', 'Canada', 'Cape Verde',
       'Central African Rep.', 'Chad', 'Channel Islands', 'Chile',
       'China', 'Colombia', 'Comoros', 'Congo, Dem. Rep.', 'Congo, Rep.',
       'Costa Rica', "Cote d'Ivoire", 'Croatia', 'Cuba', 'Cyprus',
       'Czech Rep.', 'Denmark', 'Djibouti', 'Dominican Rep.', 'Ecuador',
       'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia',
       'Ethiopia', 'Fiji', 'Finland', 'France', 'French Guiana',
       'French Polynesia', 'Gabon', 'Gambia', 'Georgia', 'Germany',
       'Ghana', 'Greece', 'Greenland', 'Grenada', 'Guadeloupe', 'Gu