In [112]:
import duckdb
import altair as alt

alt.theme.enable('ggplot2')

jco_colors = [
    "#0073C2",  # blue
    "#EFC000",  # yellow
    "#868686",  # gray
    "#CD534C",  # red
    "#7AA6DC",  # light blue
    "#003C67",  # dark blue
    "#8F7700",  # dark yellow
    "#3B3B3B",  # dark gray
    "#A73030",  # dark red
    "#4A6990",  # slate blue
]

## load data

In [2]:
uri = 'https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-11-18/who_tb_data.csv'

tb_data = duckdb.sql(f"from read_csv('{uri}', nullstr=['', 'NA']);").pl()

In [50]:
tb_data

country,g_whoregion,iso_numeric,iso2,iso3,year,c_cdr,c_newinc_100k,cfr,e_inc_100k,e_inc_num,e_mort_100k,e_mort_exc_tbhiv_100k,e_mort_exc_tbhiv_num,e_mort_num,e_mort_tbhiv_100k,e_mort_tbhiv_num,e_pop_num
str,str,i64,str,str,i64,f64,f64,f64,f64,i64,f64,f64,i64,i64,f64,i64,i64
"""Afghanistan""","""Eastern Mediterranean""",4,"""AF""","""AFG""",2000,19.0,35.0,0.37,190.0,38000,68.0,68.0,14000,14000,0.17,34,20130323
"""Afghanistan""","""Eastern Mediterranean""",4,"""AF""","""AFG""",2001,26.0,50.0,0.35,189.0,38000,63.0,63.0,13000,13000,0.3,61,20284311
"""Afghanistan""","""Eastern Mediterranean""",4,"""AF""","""AFG""",2002,34.0,65.0,0.31,189.0,40000,57.0,57.0,12000,12000,0.27,58,21378110
"""Afghanistan""","""Eastern Mediterranean""",4,"""AF""","""AFG""",2003,32.0,61.0,0.32,189.0,43000,58.0,58.0,13000,13000,0.29,66,22733047
"""Afghanistan""","""Eastern Mediterranean""",4,"""AF""","""AFG""",2004,41.0,78.0,0.28,189.0,44000,52.0,51.0,12000,12000,0.29,67,23560660
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""Zimbabwe""","""Africa""",716,"""ZW""","""ZWE""",2019,69.0,138.0,0.22,199.0,30000,43.0,11.0,1700,6500,31.0,4800,15271376
"""Zimbabwe""","""Africa""",716,"""ZW""","""ZWE""",2020,54.0,101.0,0.24,188.0,29000,44.0,13.0,2000,6800,31.0,4800,15526885
"""Zimbabwe""","""Africa""",716,"""ZW""","""ZWE""",2021,52.0,103.0,0.25,199.0,31000,49.0,15.0,2300,7800,34.0,5400,15797209
"""Zimbabwe""","""Africa""",716,"""ZW""","""ZWE""",2022,54.0,113.0,0.26,209.0,34000,52.0,16.0,2500,8400,37.0,5900,16069054


## exporation

In [70]:
duckdb.sql("""
--sqlbegin

    select iso3,
           country,
           g_whoregion,
           round(avg(e_mort_100k), 2) estimated_mortality_rate
    from tb_data
    group by 1, 2, 3
    order by 4 desc

--sqlend
""")

┌─────────┬──────────────────────────────────────────────────────────┬───────────────────────┬──────────────────────────┐
│  iso3   │                         country                          │      g_whoregion      │ estimated_mortality_rate │
│ varchar │                         varchar                          │        varchar        │          double          │
├─────────┼──────────────────────────────────────────────────────────┼───────────────────────┼──────────────────────────┤
│ LSO     │ Lesotho                                                  │ Africa                │                   332.92 │
│ SWZ     │ Eswatini                                                 │ Africa                │                   284.75 │
│ ZAF     │ South Africa                                             │ Africa                │                   235.88 │
│ NAM     │ Namibia                                                  │ Africa                │                   217.21 │
│ CAF     │ Central Afri

In [None]:
duckdb.sql("""
--sqlbegin

    select iso3,
           country,
           g_whoregion,
           round(max(e_mort_100k) - min(e_mort_100k), 2) as estimated_mortality_change
    from tb_data
    group by 1, 2, 3
    order by 4 desc

--sqlend
""")

┌─────────┬──────────────────────────────────────────────────────────┬───────────────────────┬────────────────────────────┐
│  iso3   │                         country                          │      g_whoregion      │ estimated_mortality_change │
│ varchar │                         varchar                          │        varchar        │           double           │
├─────────┼──────────────────────────────────────────────────────────┼───────────────────────┼────────────────────────────┤
│ SWZ     │ Eswatini                                                 │ Africa                │                      456.0 │
│ ZAF     │ South Africa                                             │ Africa                │                      352.0 │
│ NAM     │ Namibia                                                  │ Africa                │                      263.0 │
│ LSO     │ Lesotho                                                  │ Africa                │                      261.0 │
│ BWA   

In [None]:
duckdb.sql("""
--sqlbegin

    select iso3,
           country,
           g_whoregion,
           round(max(e_inc_100k) - min(e_inc_100k), 2) as estimated_incidence_change
    from tb_data
    group by 1, 2, 3
    order by 4 desc

--sqlend
""")

┌─────────┬───────────────────────────────────────┬───────────────────────┬────────────────────────────┐
│  iso3   │                country                │      g_whoregion      │ estimated_incidence_change │
│ varchar │                varchar                │        varchar        │           double           │
├─────────┼───────────────────────────────────────┼───────────────────────┼────────────────────────────┤
│ SWZ     │ Eswatini                              │ Africa                │                     1250.0 │
│ MHL     │ Marshall Islands                      │ Western Pacific       │                     1096.0 │
│ ZAF     │ South Africa                          │ Africa                │                      843.0 │
│ NAM     │ Namibia                               │ Africa                │                      805.0 │
│ BWA     │ Botswana                              │ Africa                │                      677.0 │
│ LSO     │ Lesotho                               │ Afr

## highest estimated mortality rates and trends

In [105]:
df = duckdb.sql("""
--sqlbegin

    with countries_iso3 as (
            select iso3,
                   round(avg(e_inc_100k), 2)
                   --max(e_mort_100k) - min(e_mort_100k)
            from tb_data
            --where year between 2018 and 2023
            group by 1
            order by 2 desc
            limit 8
    )

    from tb_data
    where iso3 in (select iso3 from countries_iso3)

--sqlend
""")

display(df)

alt.Chart(
    df,
    width=850,
    height=650,
).mark_line(
    size=4.0,
    opacity=0.8,
    #point={'size': 50},
    interpolate='catmull-rom',
).encode(
    x='year',
    y='e_inc_100k',
    color=alt.Color('country', scale=alt.Scale(scheme='set2')),
)

┌──────────────────────────┬─────────────┬─────────────┬─────────┬─────────┬───────┬────────┬───────────────┬────────┬────────────┬───────────┬─────────────┬───────────────────────┬──────────────────────┬────────────┬───────────────────┬──────────────────┬───────────┐
│         country          │ g_whoregion │ iso_numeric │  iso2   │  iso3   │ year  │ c_cdr  │ c_newinc_100k │  cfr   │ e_inc_100k │ e_inc_num │ e_mort_100k │ e_mort_exc_tbhiv_100k │ e_mort_exc_tbhiv_num │ e_mort_num │ e_mort_tbhiv_100k │ e_mort_tbhiv_num │ e_pop_num │
│         varchar          │   varchar   │    int64    │ varchar │ varchar │ int64 │ double │    double     │ double │   double   │   int64   │   double    │        double         │        int64         │   int64    │      double       │      int64       │   int64   │
├──────────────────────────┼─────────────┼─────────────┼─────────┼─────────┼───────┼────────┼───────────────┼────────┼────────────┼───────────┼─────────────┼───────────────────────┼────────────

In [128]:
df = duckdb.sql("""
--sqlbegin

    with countries_iso3 as (
            select iso3,
                   country,
                   g_whoregion,
                   round(avg(e_inc_100k), 2) as metric
                   --max(e_mort_100k) - min(e_mort_100k) as metric
            from tb_data
            --where year between 2018 and 2023
            group by 1, 2, 3
            order by metric desc
            limit 8
    )

    from countries_iso3

--sqlend
""")

display(df)

┌─────────┬───────────────────────────────────────┬─────────────────┬────────┐
│  iso3   │                country                │   g_whoregion   │ metric │
│ varchar │                varchar                │     varchar     │ double │
├─────────┼───────────────────────────────────────┼─────────────────┼────────┤
│ SWZ     │ Eswatini                              │ Africa          │  947.5 │
│ ZAF     │ South Africa                          │ Africa          │ 943.46 │
│ LSO     │ Lesotho                               │ Africa          │ 930.42 │
│ NAM     │ Namibia                               │ Africa          │ 830.13 │
│ PHL     │ Philippines                           │ Western Pacific │ 555.63 │
│ GAB     │ Gabon                                 │ Africa          │ 547.17 │
│ CAF     │ Central African Republic              │ Africa          │  540.0 │
│ PRK     │ Democratic People's Republic of Korea │ South-East Asia │  513.0 │
└─────────┴───────────────────────────────────────┴─

In [113]:
df = duckdb.sql("""
--sqlbegin

select g_whoregion as region,
       year,
       round(avg(e_inc_100k), 2) as avg_e_inc_100k
from tb_data
group by 1, 2

--sqlend
""").pl()

import polars as pl

min_year = df.select(pl.col('year').min()).item()
max_year = df.select(pl.col('year').max()).item()

alt.Chart(
    df,
    width=700,
    height=600,
    title=alt.Title(
        f'Estimated incidence of tuberculosis ({min_year} to {max_year})',
        fontSize=18.5, fontWeight='normal',
        subtitle='Incidence has decreased but we see recent increases in some regions',
        subtitleFontSize=15,
    ),
).mark_line(
    size=3.8,
    opacity=0.7,
    interpolate='catmull-rom',
).encode(
    x=alt.X(
        'year',
         title='',
         axis=alt.Axis(format='d', values=list(range(min_year, max_year+1, 2))),
    ),
    y=alt.Y(
        'avg_e_inc_100k',
        title='incidence per 100,000 people (estimated)',
        axis=alt.Axis(values=range(0, 400+1, 20))
    ),
    color=alt.Color(
        'region',
        title='',
        scale=alt.Scale(range=jco_colors),
    ),
    tooltip='region',
)