## Import raw data and pre-processing

In [None]:
import pandas as pd

# Read raw WHO data
df_raw = pd.read_csv("TB_burden_countries_2025-12-05.csv")

# Map WHO region codes -> readable names
region_map = {
    "AFR": "Africa",
    "AMR": "Americas",
    "EMR": "Eastern Mediterranean",
    "EUR": "Europe",
    "SEAR": "South-East Asia",
    "WPR": "Western Pacific",
}

df = df_raw.copy()
df["g_whoregion"] = df["g_whoregion"].astype(str).str.strip()
df["region"] = df["g_whoregion"].map(region_map)

# Keep only rows with a valid region
df = df[~df["region"].isna()].copy()

# Keep only 2017 and 2023 incidence rates
df_sub = df[df["year"].isin([2017, 2023])].copy()

# Only keep the columns we need
df_sub = df_sub[["country", "iso3", "region", "year", "e_inc_100k"]]

After filtering to 2017–2023: (5099, 51)


## Step 2: Clean data for graphing

In [None]:
# Pivot: one row per country, columns = incidence for 2017 and 2023
wide = df_sub.pivot_table(
    index=["country", "iso3", "region"],
    columns="year",
    values="e_inc_100k"
).reset_index()

# Rename columns for clarity
wide = wide.rename(columns={2017: "inc_2017", 2023: "inc_2023"})

# Compute % reduction from 2017 to 2023
# (inc_2017 - inc_2023) / inc_2017 * 100
wide = wide[wide["inc_2017"].notna()]      # need a baseline
wide = wide[wide["inc_2017"] > 0]          # avoid divide-by-zero

wide["reduction_pct"] = (wide["inc_2017"] - wide["inc_2023"]) / wide["inc_2017"] * 100

# Cap extreme values for nicer color scaling
wide["reduction_pct_capped"] = wide["reduction_pct"].clip(lower=-50, upper=100)

print(wide.head())
print(wide.shape)


year         country iso3                 region  inc_2017  inc_2023  \
0        Afghanistan  AFG  Eastern Mediterranean     209.0     204.0   
1            Albania  ALB                 Europe      20.0      15.0   
2            Algeria  DZA                 Africa      75.0      55.0   
3     American Samoa  ASM        Western Pacific       9.7       3.9   
4            Andorra  AND                 Europe       1.4       4.9   

year  reduction_pct  reduction_pct_capped       reduction_category  
0          2.392344              2.392344          0–25% reduction  
1         25.000000             25.000000         25–50% reduction  
2         26.666667             26.666667         25–50% reduction  
3         59.793814             59.793814         50–75% reduction  
4       -250.000000            -50.000000  Increase / no reduction  
(198, 8)


In [5]:
wide.to_csv("tb_incidence_reduction_map.csv", index=False)