## Life expectancy versus GDP per capita from 1952 - 2007

#### Function definitions for animated bubble plot (to be run once, but not to be changed)

Define a function to set the colour of the points for each category.

In [None]:
def colour_scheme(category, colourscheme):
    # return colour corresponding to category
    for i in range(0, len(colourscheme.iloc[:,0])):
        if (colourscheme.iloc[i,0] == category):
            return colourscheme.iloc[i,1]
    
    # if no match return black
    return 'black'

Define function to create animation of atomic bomb tests, with points coloured by country and sized by bomb yield. 

In [None]:
import numpy as np
import plotly.offline as py
import plotly.graph_objs as go

def bubble_animation(dataset, colourscheme, variable_x, variable_y, variable_category, variable_size, variable_time, \
                variable_hover=None, logscale_x=False, logscale_y=False):
    # make list of times
    times = []
    for time in dataset[variable_time]:
        if time not in times:
            times.append(time)
    # make list of categories
    categories = []
    for category in dataset[variable_category]:
        if category not in categories:
            categories.append(category)
    # derive typical point size to scale bubbles
    sizeref = 0.05*np.median(dataset[variable_size])
        
    # define figure contents using a dictionary
    fig_dict = {
        "data": [],
        "layout": {},
        "frames": []
    }

    # fill in most of the layout
    if logscale_x == True:
        xmin = np.log10(np.quantile(dataset[variable_x], 0.005))
        xmax = np.log10(np.quantile(dataset[variable_x], 0.995))
        fig_dict["layout"]["xaxis"] = {"range": [xmin - 0.1*(xmax - xmin), \
                        xmax + 0.1*(xmax - xmin)], "zeroline": False, "title": variable_x, "type": "log"}
    else:
        xmin = (np.quantile(dataset[variable_x], 0.005))
        xmax = (np.quantile(dataset[variable_x], 0.995))
        fig_dict["layout"]["xaxis"] = {"range": [xmin - 0.1*(xmax - xmin), \
                        xmax + 0.1*(xmax - xmin)], "zeroline": False, "title": variable_x}   
    if logscale_y == True:
        ymin = np.log10(np.quantile(dataset[variable_y], 0.005))
        ymax = np.log10(np.quantile(dataset[variable_y], 0.995))
        fig_dict["layout"]["yaxis"] = {"range": [ymin - 0.1*(ymax - ymin), \
                        ymax + 0.1*(ymax - ymin)], "zeroline": False, "title": variable_y, "type": "log"}
    else:
        ymin = (np.quantile(dataset[variable_y], 0.005))
        ymax = (np.quantile(dataset[variable_y], 0.995))
        fig_dict["layout"]["yaxis"] = {"range": [ymin - 0.1*(ymax - ymin), \
                        ymax + 0.1*(ymax - ymin)], "zeroline": False, "title": variable_y}
    fig_dict["layout"]["hovermode"] = "closest"
    fig_dict["layout"]["plot_bgcolor"] = "closest"
    fig_dict["layout"]["sliders"] = {
        "args": ["transition", {"duration": 400, "easing": "cubic-in-out"}],
        "initialValue": min(times), "plotlycommand": "animate", "values": dataset[variable_time], \
        "visible": True
    }
    fig_dict["layout"]["updatemenus"] = [
        {
            "buttons": [
                {
                    "args": [None, {"frame": {"duration": 500, "redraw": False}, "fromcurrent": False, \
                                    "transition": {"duration": 300, "easing": "quadratic-in-out"}}],
                    "label": "Play",
                    "method": "animate"
                },
                {
                    "args": [[None], {"frame": {"duration": 0, "redraw": False}, "mode": "immediate", \
                                      "transition": {"duration": 0}}],
                    "label": "Pause",
                    "method": "animate"
                }
            ],
            "direction": "left", "pad": {"r": 10, "t": 87}, "showactive": True, "type": "buttons", \
            "x": 0.1, "xanchor": "right", "y": 0, "yanchor": "top"
        }
    ]

    sliders_dict = {
        "active": 0, "yanchor": "top", "xanchor": "left",
        "currentvalue": {
            "font": {"size": 18},
            "prefix": "Year:",
            "visible": True,
            "xanchor": "right"
        },
        "transition": {"duration": 300, "easing": "cubic-in-out"},
        "pad": {"b": 10, "t": 50}, "len": 0.9, "x": 0.1, "y": 0, "steps": []
    }

    # make 'fake' data for first year to set up plot correctly
    time = min(times)
    for category in categories:
        dataset_by_time_and_category = dataset[(dataset[variable_time] == time) & \
                        (dataset[variable_category] == category)]
        data_dict = {
            "x": list(dataset_by_time_and_category[variable_x]),
            "y": list(dataset_by_time_and_category[variable_y]),
            "mode": "markers",
            "text": list(dataset_by_time_and_category[variable_time]),
            "marker": {
                "sizemode": "area",
                "sizeref": sizeref,
                "size": list(dataset_by_time_and_category[variable_size]),
                "color": colour_scheme(category, colourscheme)
            },
            "name": category
        }
        fig_dict["data"].append(data_dict)

    # make frames for each year of the animation
    for time in times:
        frame = {"data": [], "name": str(time)}
        for category in categories:
            dataset_by_time_and_category = dataset[(dataset[variable_time] == time) & \
                            (dataset[variable_category] == category)]
            if variable_hover == None:
                hovertext = False
            else:
                hovertext = list(dataset_by_time_and_category[variable_hover])
            data_dict = {
                "x": list(dataset_by_time_and_category[variable_x]),
                "y": list(dataset_by_time_and_category[variable_y]),
                "mode": "markers",
                "text": list(dataset_by_time_and_category[variable_time]),
                "hovertext": hovertext,
                "marker": {
                    "sizemode": "area",
                    "sizeref": sizeref,
                    "size": np.fmax(dataset_by_time_and_category[variable_size].values.astype(float), \
                                   10*sizeref).tolist(),
                    "color": colour_scheme(category, colourscheme)
                },
                "name": category
            }
            frame["data"].append(data_dict)

        fig_dict["frames"].append(frame)
        slider_step = {"args": [
            [time],
            {"frame": {"duration": 500, "redraw": False}, "mode": "immediate", "transition": {"duration": 300}}
        ],
            "label": time, "method": "animate"}
        sliders_dict["steps"].append(slider_step)

    fig_dict["layout"]["sliders"] = [sliders_dict]

    # plot the figure
    fig = go.Figure(data=fig_dict) # for scatter plot
    py.plot(fig, config={'scrollzoom': False}, filename='bubble_animation')
    fig.show()

#### Import bubble plot timeseries dataset, and colourscheme to apply to each category (change file location as desired)

Import the Life and Death timeseires dataset from `github` repository into a `pandas` dataframe. Note this reads in the `.csv` version of the dataset directly from `github`.

This dataset must include at least three numerical variables, a categorical variable, and a time variable. The column headers are used as function inputs to specify which variable will be plotted along which axis etc. 

In [None]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

# read-in the dataset from github into a pandas dataframe
url = "https://raw.githubusercontent.com/xbr209/tutorials/master/Life_and_death_timeseries.csv"
dataset = pd.read_csv(url)

# print out the pandas dataframe to screen
dataset

Import the Life and Death colourscheme dataset from `github` repository into a `pandas` dataframe.

This dataset is formatted so that the first column is the category name (used in the main dataset) and the second column is the colour name to be used for that group. The colour name must be in a format recognised by `plotly`; the full range of colours can be found at https://matplotlib.org/3.1.0/gallery/color/named_colors.html.

In [None]:
# read-in colour scheme for each group from github into a pandas dataframe
url = "https://raw.githubusercontent.com/xbr209/tutorials/master/Life_and_death_colourscheme.csv"
colourscheme = pd.read_csv(url)

colourscheme

#### Produce network plot from input dataset and colour scheme

Create the animated bubble plot of the life expectancy as a function of GDP per capita, with the population as the point size, the continent as the colour, and the year as the time variable.

The 'bubble_animation' function takes the dataset and colourscheme as parameters, then the names of the x variable, y variable, categorical variable, point size variable, time series variable. The function also takes three optional parameters, the name of an addition column of text to display upon hovering over points, and two true/false variables that state whether the x and y axes are log scale or not.

In [None]:
bubble_animation(dataset, colourscheme, variable_x='GPD per capita', \
                variable_y='Life expectancy at birth, total (years)', variable_category='Continent', \
                variable_size='Population', variable_time='Year', variable_hover='Country', \
                logscale_x=True, logscale_y=False)