In [32]:
import duckdb

data_source = '~/repos/code/suicides/who_suicide_statistics.csv' # world health organization (who) data

query = f'''
from (from '{data_source}'
      select country, age, sex, year, round(suicides_no / population * 1000000, 5) as rate
      where country ilike '%united states%'
      limit 20)
select country, age, sex, round(mean(rate), 1) as avg_rate
group by country, age, sex
order by avg_rate desc;
'''

us_avg_rates = duckdb.sql(query).df()

#display(us_avg_rates)

In [33]:
import altair as alt

alt.theme.enable('ggplot2')

age_labels = ['75+ years', '55-74 years', '35-54 years', '25-34 years', '15-24 years', '5-14 years']

alt.Chart(
    us_avg_rates,
    title=alt.Title(
        'average suicide rates in the united states from 1979 to 2016',
        subtitle=['source: world health organization (who)']
    ),
    width=750,
    height=140,
).mark_bar(
    stroke='black',
    strokeWidth=0.5,
    opacity=0.8,
).encode(
    x=alt.X('avg_rate', title='average suicide rate per million people'),
    y=alt.Y('age:O', title='', sort=alt.Sort(age_labels)),
    row=alt.Row('sex', title=''),
    color=alt.Color('sex:N', title='', scale=alt.Scale(scheme='accent')),
    tooltip='custom_tooltip:N',
).transform_calculate(
    custom_tooltip='datum.age + " – " + datum.avg_rate + " suicides per million people"',
)

In [10]:
us_avg_rates.shape

(12, 4)