In [113]:
import duckdb
import altair as alt

# world health organization (who) data
data_source = '~/repos/code/suicides/who_suicide_statistics.csv'

alt.theme.enable('ggplot2')

jco_colors = [
    "#0073C2",  # blue
    "#EFC000",  # yellow
    "#868686",  # gray
    "#CD534C",  # red
    "#7AA6DC",  # light blue
    "#003C67",  # dark blue
    "#8F7700",  # dark yellow
    "#3B3B3B",  # dark gray
    "#A73030",  # dark red
    "#4A6990"   # slate blue
]

In [95]:
# u.s. calculated average suicide rates

query = f'''
  from '{data_source}'
  select country, age, sex, round(sum(suicides_no) / sum(population) * 1_000_000, 1) as avg_rate
  where country ilike '%united states%' and suicides_no is not null
  group by country, age, sex
'''

us_avg_rates = duckdb.sql(query).df()

#display(us_avg_rates)

In [145]:
age_labels = ['75+ years', '55-74 years', '35-54 years', '25-34 years', '15-24 years', '5-14 years']

alt.Chart(
    us_avg_rates,
    title=alt.Title(
        'average suicide rates in the u.s. by age group (1979 to 2015)',
        subtitle=['source: world health organization (who)']
    ),
    width=750,
    height=140,
).mark_bar(
    opacity=0.6,
).encode(
    x=alt.X('avg_rate', title='average suicide rate per million people'),
    y=alt.Y(
        'age:O',
        title='',
        sort=alt.Sort(age_labels),
    ),
    row=alt.Row('sex', title=''),
    color=alt.Color(
        'sex:N',
        title='',
        scale=alt.Scale(range=jco_colors[8:]),
    ),
    tooltip='custom_tooltip:N',
).transform_calculate(
    custom_tooltip='datum.age + " – " + datum.avg_rate + " suicides per million people"',
)

In [97]:
# u.s. yearly rates

query2 = f'''
  from '{data_source}'
  select country, age, sex, year, round(suicides_no / population * 1_000_000, 1) as suicide_rate
  where country ilike '%united states%' and suicides_no is not null
'''

us_yearly_rates = duckdb.sql(query2).df()

display(us_yearly_rates.sample(4))

Unnamed: 0,country,age,sex,year,suicide_rate
438,United States of America,15-24 years,male,2015,192.7
156,United States of America,15-24 years,female,1992,36.7
102,United States of America,15-24 years,male,1987,212.6
115,United States of America,25-34 years,male,1988,250.0


In [135]:
alt.Chart(
    us_yearly_rates,
    title=alt.Title(
        'suicide rates in the u.s. by age group and gender (1979 to 2015)',
        subtitle=['source: world health organization (who)']
    ),
    width=500,
    height=450,
).mark_line(
    size=4.0,
    opacity=0.6,
    interpolate='catmull-rom',
).encode(
    x=alt.X('year',
        title='',
        axis=alt.Axis(format='d'),
        scale=alt.Scale(domain=[1979,2015]),
    ),
    y=alt.Y(
        'suicide_rate:Q',
        title='suicide rate (per one million people)',
    ),
    color=alt.Color(
        'age:N',
        sort=alt.Sort(age_labels),
        scale=alt.Scale(range=jco_colors),
        legend=alt.Legend(symbolStrokeWidth=4.0),
    ),
    column=alt.Column('sex', title=''),
)