In [1]:
import pandas as pd
import geopandas as gpd
import plotly.graph_objects as go

# Load the CSV file
data = pd.read_csv('datasets/Normalized_Dataset_EDU.csv')

# Convert the 'GDP' and 'Life expectancy' columns to numeric
data['Value'] = pd.to_numeric(data['Value'], errors='coerce')
data['Life expectancy'] = pd.to_numeric(data['Life expectancy'], errors='coerce')

# Load the dataset with country borders
border = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
border = border[border['name'] != 'Antarctica']

# Merge the non-spatial data with the spatial GeoDataFrame based on the country name
merged_data = border.merge(data, left_on='name', right_on='Country')

# Ensure the GeoDataFrame has a CRS set
if merged_data.crs is None:
    merged_data = merged_data.set_crs('EPSG:4326')  # Assuming WGS84

# Add the 'Bi_Class' column to your data for bivariate choropleth
merged_data['Bi_Class'] = pd.qcut(merged_data['Value'], 3, labels=['low', 'medium', 'high']).astype(str) + \
                          pd.qcut(merged_data['Life expectancy'], 3, labels=['_low', '_medium', '_high']).astype(str)

# Define custom colors for each category with ordered keys
ordered_categories = ['low_low', 'low_medium', 'low_high',
                      'medium_low', 'medium_medium', 'medium_high',
                      'high_low', 'high_medium', 'high_high']
category_colors = {
    'low_low': '#ffffe5',  # low GDP, low life expectancy
    'low_medium': '#f7fcb9',  # low GDP, medium life expectancy
    'low_high': '#d9f0a3',  # low GDP, high life expectancy
    'medium_low': '#addd8e',  # medium GDP, low life expectancy
    'medium_medium': '#78c679',  # medium GDP, medium life expectancy
    'medium_high': '#41ab5d',  # medium GDP, high life expectancy
    'high_low': '#238443',  # high GDP, low life expectancy
    'high_medium': '#006837',  # high GDP, medium life expectancy
    'high_high': '#004529'  # high GDP, high life expectancy
}
merged_data['color'] = merged_data['Bi_Class'].map(category_colors)

# Prepare the data for Plotly
merged_data['iso_a3'] = merged_data['iso_a3'].apply(lambda x: x if x != -99 else None)

# Create the Plotly figure
fig = go.Figure()

# Add choropleth traces in the order of the categories
for category in ordered_categories:
    subset = merged_data[merged_data['Bi_Class'] == category]
    fig.add_trace(go.Choropleth(
        locations=subset['iso_a3'],
        z=subset['Value'],  # Here, use GDP for the color intensity
        text=subset.apply(lambda row: f"{row['name']}<br>Bi-Class: {row['Bi_Class']}<br>GDP: {row['Value']}<br>Life Expectancy: {row['Life expectancy']}", axis=1),
        hoverinfo='text',
        geo='geo',
        colorscale=[[0, category_colors[category]], [1, category_colors[category]]],
        showscale=False,
        name=category.replace('_low', '_Low Education & L.E').replace('_medium', '_Medium Education & L.E').replace('_high', '_High Education & L.E').replace('low', 'Low Education').replace('medium', 'Medium Education').replace('high', 'High Education')  # Update the category name
    ))

# Add custom legend using scattergeo
for category in ordered_categories:
    fig.add_trace(go.Scattergeo(
        locationmode='ISO-3',
        locations=[None],  # No actual points, just for the legend
        marker=dict(
            size=10,
            color=category_colors[category],
        ),
        showlegend=True,
        name=category.replace('_low', '_Low L.E').replace('_medium', '_Medium L.E').replace('_high', '_High L.E').replace('low', 'Low Education').replace('medium', 'Medium Education').replace('high', 'High Education')  # Update the category name
    ))

# Update the layout for better appearance
fig.update_geos(
    showcountries=True,
    countrycolor="black",
    showcoastlines=True,
    coastlinecolor="black",
    projection_type="natural earth"
)

fig.update_layout(
    title_text='Education vs Life Expectancy Bivariate Choropleth',
    margin={"r":0, "t":50, "l":0, "b":0},
    legend=dict(
        title="Categories",
        traceorder='normal'  # Ensure the legend follows the trace order
    ),
    geo=dict(
        lakecolor='#FFFFFF',
        bgcolor='#a8d5f2'  # Set the water color to #a8d5f2
    )
)

# Show the figure
fig.show()

  border = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
