In [5]:
import pandas as pd
import geopandas as gpd
import plotly.graph_objects as go

import warnings
warnings.filterwarnings("ignore", category=FutureWarning) 

In [6]:
data = pd.read_csv('datasets/Normalized_Dataset_EDU.csv')

# Convert the 'Value' and 'Life expectancy' columns to numeric
data['Value'] = pd.to_numeric(data['Value'], errors='coerce')
data['Life expectancy'] = pd.to_numeric(data['Life expectancy'], errors='coerce')

# Load the dataset with country borders
border = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
border = border[border['name'] != 'Antarctica']

# Merge the non-spatial data with the spatial GeoDataFrame based on the country name
merged_data = border.merge(data, left_on='name', right_on='Country')

# Ensure the GeoDataFrame has a CRS set
if merged_data.crs is None:
    merged_data = merged_data.set_crs('EPSG:4326')

# Add the 'Bi_Class' column for bivariate choropleth
merged_data['Bi_Class'] = pd.qcut(merged_data['Value'], 3, labels=['low', 'medium', 'high']).astype(str) + \
                          pd.qcut(merged_data['Life expectancy'], 3, labels=['_low', '_medium', '_high']).astype(str)

# Define bivariate colors
ordered_categories = ['low_low', 'low_medium', 'low_high',
                      'medium_low', 'medium_medium', 'medium_high',
                      'high_low', 'high_medium', 'high_high']

# Colors are formatted as: life expectancy | education
category_colors = {
    'low_low': '#d3d3d3',  # low | low
    'low_medium': '#c098b9',  # medium | low
    'low_high': '#ad5b9c',  # high | low
    'medium_low': '#97c5c5',  # low | medium
    'medium_medium': '#898ead',  # medium | medium
    'medium_high': '#7c5592',  # high | medium
    'high_low': '#52b6b6',  # low | high
    'high_medium': '#4a839f',  # medium | high
    'high_high': '#004529'  # high | high
}
merged_data['color'] = merged_data['Bi_Class'].map(category_colors)

# Prepare the data for Plotly
merged_data['iso_a3'] = merged_data['iso_a3'].apply(lambda x: x if x != -99 else None)

fig = go.Figure()

# Add choropleth traces in the order of the categories
for category in ordered_categories:
    subset = merged_data[merged_data['Bi_Class'] == category]
    fig.add_trace(go.Choropleth(
        locations=subset['iso_a3'],
        z=subset['Value'],
        text=subset.apply(lambda row: f"<b>{row['name']}</b><br>Bi-Class: {row['Bi_Class']}<br>Education: {row['Value']}<br>Life Expectancy: {row['Life expectancy']}", axis=1),
        hoverinfo='text',
        geo='geo',
        colorscale=[[0, category_colors[category]], [1, category_colors[category]]],
        showscale=False,

        # Update category names
        name=category
        .replace('_low', '_Low Education & L.E')
        .replace('_medium', '_Medium Education & L.E')
        .replace('_high', '_High Education & L.E')
        .replace('low', 'Low Education')
        .replace('medium', 'Medium Education')
        .replace('high', 'High Education')
    ))

fig.update_geos(
    showcountries=True,
    countrycolor="black",
    showcoastlines=True,
    coastlinecolor="black",
    projection_type="natural earth"
)

fig.update_layout(
    title_text='Impact of education on life expectancy<br><sup>Countries with high education levels generally have high life expectancy</sup>',
    title_x=0.5, title_y=0.93,
    plot_bgcolor='#cff8d6',
    paper_bgcolor='#cff8d6',
    margin = dict(
                l=10,
                r=10,
                b=135,
                t=75,
                pad=4,
                autoexpand=True
            ),  
    autosize=False,
    width=780,
    height=570,
    legend=dict(
        title="Categories",
        traceorder='normal',
        x=0.85, 
        y=0.95,
        xanchor='left',
        yanchor='top'
    ),
    geo=dict(
        lakecolor='#FFFFFF',
        bgcolor='#a8d5f2'
    )
)

# Add bivariate legend
fig.add_layout_image(
    dict(
        source='bivar_choro_legend.png',
        xref='paper', yref='paper',
        x=0.77, y=0.1,
        sizex=0.45, sizey=0.45
    )
)

fig.add_annotation(x=0.04, y=-0.3,
                   showarrow=False,
                   xref='paper', yref='paper',
                   xanchor='left', yanchor='bottom',
                   align='left',
                   text='The data, sourced from 2010, is presented using a gradient of three different colors layered over eachother.<br>' +
                   'This results in 9 different colors, each one corresponding to a specific education level and life expectancy. Each<br>' +
                   'country is assigned a color according to these two factors. The two variables are normalized to prevent biases introduced<br>' +
                   'by the size of each country<br><br>' +
                   '<b>Example:</b> Ukraine has a light blue color. Looking up the color in the legend, the country appears to have<br>' + 
                   'a high level of education, but low life expectancy.',
      font=dict(
          size=12
      ))

fig.show()