In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import ipywidgets as widgets
from IPython.display import display



  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [2]:
def import_data(data_path):
    df = pd.read_csv(data_path, header=0, encoding = "utf-8", na_values=["NA", "null", "", "NaN"])

    df = df.dropna(axis = 1, thresh=1)

    return df


def pivot_IHME(df):

    pivoted_df = df.pivot_table(index=['location', 'sex', 'cause', 'year'], 
                            columns='measure', 
                            values='val', 
                            aggfunc='first') 

    # Reset the index if needed
    pivoted_df = pivoted_df.reset_index()

    # Rename the columns for clarity
    pivoted_df = pivoted_df.rename(columns={'death': 'death_rate', 'incidence': 'incidence_rate'})

    pivoted_df.to_csv('data/pivotted_dataframe_IHME.csv', index=False)

    return pivoted_df


def pivot_WHO(df):

    pivoted_df = df.pivot_table(index=['ParentLocation', 'Location', 'Period'], 
                            columns='Indicator', 
                            values='Value', 
                            aggfunc='first') 

    # Reset the index if needed
    pivoted_df = pivoted_df.reset_index()

    pivoted_df.to_csv('data/pivotted_dataframe_WHO.csv', index=False)

    return pivoted_df

data_WHO = import_data("data/WHO data.csv")
data_WHO = data_WHO.drop(['ParentLocationCode', 'SpatialDimValueCode', 'IndicatorCode', 'Period type', 'Location type', 'ValueType', 'FactComments', 'FactValueNumeric', 'Language', 'IsLatestYear', 'DateModified'], axis=1)


data_IHME_1 = import_data("data/IHME-1.csv")
data_IHME_2 = import_data("data/IHME-2.csv")
data_IHME_combined = pd.concat([data_IHME_1, data_IHME_2], axis=0, ignore_index=True)
data_IHME_combined = data_IHME_combined.drop(['age', 'metric', 'upper', 'lower'], axis=1)


df_IHME = pivot_IHME(data_IHME_combined)
df_WHO = pivot_WHO(data_WHO)

In [3]:
df_IHME.head(2)

measure,location,sex,cause,year,Deaths,Incidence
0,Afghanistan,Both,Cardiovascular diseases,1980,388.484292,
1,Afghanistan,Both,Cardiovascular diseases,1981,423.291843,


In [4]:
numeric_columns = [
    "Generalist medical practitioners (number)", 
    "Medical doctors (number)", 
    "Medical doctors not further defined (number)", 
    "Specialist medical practitioners (number)"
]

for col in numeric_columns:
    df_WHO[col] = pd.to_numeric(df_WHO[col].astype(str).str.replace(" ", ""), errors='coerce')

df_WHO.head(2)

Indicator,ParentLocation,Location,Period,Generalist medical practitioners (number),Medical doctors (number),"Medical doctors (per 10,000)",Medical doctors not further defined (number),Specialist medical practitioners (number)
0,Africa,Algeria,2002,,35368,11.14,35368.0,
1,Africa,Algeria,2005,,33952,10.26,33952.0,


In [5]:
df = pd.read_csv("data_prep/inner_merged_data.csv")

df.head(2)

Unnamed: 0,location,year,Deaths,Incidence,Location,Period,Generalist medical practitioners (number),Medical doctors (number),"Medical doctors (per 10,000)",Medical doctors not further defined (number),Specialist medical practitioners (number)
0,Afghanistan,2001,1430.246526,420806.62194,Afghanistan,2001,,4104,2.02,4104,
1,Afghanistan,2006,1078.11022,406093.424902,Afghanistan,2006,,4220,1.66,4220,


In [25]:
def plot_ihme_data(df, metric="Deaths"):
    required_columns = {"location", "cause", "year", metric}

    if not required_columns.issubset(df.columns):
        missing_cols = required_columns - set(df.columns)
        raise ValueError(f"Missing required columns: {missing_cols}")

    most_recent_year = df["year"].max()
    df = df[df["year"] == most_recent_year].dropna(subset=["location", "cause", metric])

    locations = sorted(df["location"].unique())
    if not locations:
        raise ValueError("No valid locations found in the dataset.")

    fig = go.Figure()

    for location in locations:
        df_location = df[df["location"] == location].sort_values(by=metric, ascending=False)

        fig.add_trace(go.Bar(
            x=df_location["cause"],
            y=df_location[metric],
            name=f"{metric} ({location})",
            visible=(location == locations[0]),
            marker=dict(
                color="red",
                line=dict(width=0))
        ))

    buttons = [
        dict(
            label=location,
            method="update",
            args=[{"visible": [i == j for j in range(len(locations))]}]
        )
        for i, location in enumerate(locations)
    ]

    fig.update_layout(
        updatemenus=[dict(
            buttons=buttons,
            direction="down",
            showactive=True,
        )],
        title=f"<b>{metric} by Cause</b>",
        xaxis=dict(title="<b>Cause</b>", tickangle=-45),
        yaxis=dict(title=f"<b>{metric}</b>", side="left", showgrid=True),
        template="plotly_white",
        hovermode="x unified",
        legend=dict(
            x=0.5,
            y=1.15,
            xanchor="center",
            orientation="h",
            font=dict(size=12)
        ),
        bargap=0.2
    )

    fig.show()

plot_ihme_data(df_IHME, metric="Deaths")
# plot_ihme_data(df_IHME, metric="Incidence")


In [None]:
def plot_who_data(df):

    most_recent_year = df["Period"].max()
    df_filtered = df[df["Period"] == most_recent_year]

    parent_locations = sorted(df_filtered["ParentLocation"].dropna().unique())

    fig = go.Figure()

    for region in parent_locations:
        df_region = df_filtered[df_filtered["ParentLocation"] == region].sort_values(
            by="Medical doctors (number)", ascending=False
        )

        categories = [
            ("Medical doctors (number)", "blue"),
            ("Specialist medical practitioners (number)", "green"),
            ("Generalist medical practitioners (number)", "red")
        ]

        for category, color in categories:
            fig.add_trace(go.Bar(
                x=df_region["Location"],
                y=df_region[category],
                name=f"{category} ({region})",
                visible=(region == parent_locations[0]),
                marker_color=color
            ))

    buttons = []
    for i, region in enumerate(parent_locations):
        visible_array = [False] * (3 * len(parent_locations))
        visible_array[3 * i] = True
        visible_array[3 * i + 1] = True
        visible_array[3 * i + 2] = True

        buttons.append(dict(
            label=region,
            method="update",
            args=[{"visible": visible_array}]
        ))

    fig.update_layout(
        updatemenus=[dict(
            buttons=buttons,
            direction="down",
            showactive=True,
        )],
        title=f"<b>Medical Practitioners by Location ({most_recent_year})</b>",
        xaxis=dict(title="<b>Location</b>", tickangle=-45),
        yaxis=dict(
            title="<b>Number of Practitioners (in Thousands)</b>",
            tickformat=",",
            showgrid=True
        ),
        barmode="group",
        template="plotly_white",
        hovermode="x unified",
        legend=dict(
            x=0.5, y=1.15, xanchor="center", orientation="h",
            font=dict(size=12)
        )
    )

    fig.show()

plot_who_data(df_WHO)


In [9]:
def plot_metrics_by_country(df, primary_metric="Deaths", secondary_metric="Medical doctors (per 10,000)"):
    if primary_metric not in df.columns:
        raise ValueError(f"primary_metric '{primary_metric}' not found in dataframe columns.")
    
    if secondary_metric not in df.columns:
        raise ValueError(f"secondary_metric '{secondary_metric}' not found in dataframe columns.")

    years = sorted(df["Period"].dropna().unique(), reverse=True)
    most_recent_year = years[0]

    fig = go.Figure()

    for year in years:
        df_year = df[df["Period"] == year].sort_values(by=primary_metric, ascending=False)

        fig.add_trace(go.Bar(
            x=df_year["Location"],
            y=df_year[primary_metric],
            name=f"{primary_metric} ({year})",
            visible=(year == most_recent_year),
            marker_color="red",
            yaxis="y1"
        ))

        fig.add_trace(go.Scatter(
            x=df_year["Location"],
            y=df_year[secondary_metric],
            name=f"{secondary_metric} ({year})",
            visible=(year == most_recent_year),
            mode="lines+markers",
            line=dict(color="blue", dash="dash"),
            yaxis="y2"
        ))

    buttons = []
    for i, year in enumerate(years):
        visible_array = [False] * (2 * len(years))
        visible_array[2 * i] = True
        visible_array[2 * i + 1] = True

        buttons.append(dict(
            label=str(year),
            method="update",
            args=[{"visible": visible_array}]
        ))

    fig.update_layout(
        updatemenus=[dict(
            buttons=buttons,
            direction="down",
            showactive=True,
        )],
        title=f"<b>{primary_metric} & {secondary_metric} by Location</b>",
        xaxis=dict(title="<b>Location</b>", tickangle=-45),
        yaxis=dict(title=f"<b>{primary_metric}</b>", side="left", showgrid=True),
        yaxis2=dict(title=f"<b>{secondary_metric}</b>", overlaying="y", side="right", showgrid=False),
        template="plotly_white",
        hovermode="x unified",
        legend=dict(
            x=0.5,
            y=1.15,
            xanchor="center",
            orientation="h",
            font=dict(size=12)
        ),
        bargap=0.2
    )

    fig.show()


plot_metrics_by_country(df, primary_metric="Deaths", secondary_metric="Medical doctors (per 10,000)")
# plot_metrics_by_country(df, primary_metric="Incidence", secondary_metric="Medical doctors (per 10,000)")



In [10]:
def plot_metrics_over_time(df, primary_metric="Deaths", secondary_metric="Medical doctors (per 10,000)"):
   
    if primary_metric not in ["Deaths", "Incidence"]:
        raise ValueError("primary_metric must be either 'Deaths' or 'Incidence'")
    
    if secondary_metric not in df.columns:
        raise ValueError(f"secondary_metric '{secondary_metric}' not found in dataframe columns.")

    locations = df["Location"].dropna().unique()
    locations.sort()

    fig = go.Figure()

    for location in locations:
        df_loc = df[df["Location"] == location]

        fig.add_trace(go.Scatter(
            x=df_loc["Period"], 
            y=df_loc[primary_metric], 
            name=f"{primary_metric} - {location}",
            visible=(location == locations[0]),
            mode="lines+markers",
            line=dict(color="red" if primary_metric == "Deaths" else "orange"),
            yaxis="y1"
        ))

        fig.add_trace(go.Scatter(
            x=df_loc["Period"], 
            y=df_loc[secondary_metric], 
            name=f"{secondary_metric} - {location}",
            visible=(location == locations[0]),
            mode="lines+markers",
            line=dict(color="blue", dash="dash"),
            yaxis="y2"
        ))

    buttons = []
    for i, location in enumerate(locations):
        visible_array = [False] * (2 * len(locations))
        visible_array[2 * i] = True
        visible_array[2 * i + 1] = True

        buttons.append(dict(
            label=location,
            method="update",
            args=[{"visible": visible_array}]
        ))

    fig.update_layout(
        updatemenus=[dict(
            buttons=buttons,
            direction="down",
            showactive=True,
        )],
        title=f"<b>{primary_metric} & {secondary_metric} by Year</b>",
        xaxis=dict(title="<b>Year</b>"),
        yaxis=dict(title=f"<b>{primary_metric}</b>", side="left", showgrid=True),
        yaxis2=dict(title=f"<b>{secondary_metric}</b>", overlaying="y", side="right", showgrid=False),
        template="plotly_white",
        hovermode="x unified",
        legend=dict(
            x=0.5,
            y=1.15,
            xanchor="center",
            orientation="h",
            font=dict(size=12)
        )
    )

    fig.show()

# plot_metrics_over_time(df, primary_metric="Deaths", secondary_metric="Medical doctors (per 10,000)")
plot_metrics_over_time(df, primary_metric="Incidence", secondary_metric="Medical doctors (per 10,000)")
