In [1]:
import pandas as pd
import altair as alt
import matplotlib.pyplot as plt

In [2]:
USAdata = pd.read_csv("data/usa_deaths_data.csv")
USAdata

Unnamed: 0.1,Unnamed: 0,YEAR,AGE_GROUP,GENDER,DEATHS,COUNTRY
0,0,2020,0 to 29,female,1854,USA
1,1,2020,0 to 29,male,3318,USA
2,2,2020,30 to 49,female,5000,USA
3,3,2020,30 to 49,male,10289,USA
4,4,2020,50 +,female,680377,USA
5,5,2020,50 +,male,792134,USA
6,6,2021,0 to 29,female,6078,USA
7,7,2021,0 to 29,male,9207,USA
8,8,2021,30 to 49,female,16751,USA
9,9,2021,30 to 49,male,26996,USA


In [3]:
CAdata = pd.read_csv("data/canada_deaths_data.csv")
CAdata

Unnamed: 0.1,Unnamed: 0,YEAR,AGE_GROUP,GENDER,DEATHS,COUNTRY
0,0,2020,0 to 29,female,11,Canada
1,1,2020,0 to 29,male,11,Canada
2,2,2020,30 to 49,male,98,Canada
3,3,2020,30 to 49,female,43,Canada
4,4,2020,50 +,male,4405,Canada
5,5,2020,50 +,female,4408,Canada
6,6,2021,0 to 29,female,36,Canada
7,7,2021,0 to 29,male,41,Canada
8,8,2021,30 to 49,male,334,Canada
9,9,2021,30 to 49,female,195,Canada


In [4]:
USAdata1 = pd.read_csv("data/usa_w_population.csv")
USAdata1 

Unnamed: 0.1,Unnamed: 0,YEAR,AGE_GROUP,TOTAL_DEATHS,POPULATION,MORTALITY_RATE
0,0,2020,0 to 29,5172,254210460,2.034535
1,1,2020,30 to 49,15289,172198146,8.878725
2,2,2020,50 +,1472511,236593554,622.380016
3,3,2021,0 to 29,15285,252095620,6.063176
4,4,2021,30 to 49,43747,172575984,25.349414
5,5,2021,50 +,1681849,239115886,703.361465
6,6,2022,0 to 29,6478,251414856,2.576618
7,7,2022,30 to 49,12549,173851316,7.218237
8,8,2022,50 +,927600,241276650,384.45494
9,9,2023,0 to 29,655,250806986,0.261157


In [5]:
CAdata1 = pd.read_csv("data/ca_w_population.csv")
CAdata1 

Unnamed: 0.1,Unnamed: 0,YEAR,AGE_GROUP,TOTAL_DEATHS,POPULATION,MORTALITY_RATE
0,0,2020,0 to 29,22,13332834,0.165006
1,1,2020,30 to 49,141,10131696,1.391672
2,2,2020,50 +,8813,14564108,60.511773
3,3,2021,0 to 29,77,13228492,0.582077
4,4,2021,30 to 49,529,10234175,5.168956
5,5,2021,50 +,9259,14777197,62.65735
6,6,2022,0 to 29,93,13469283,0.69046
7,7,2022,30 to 49,312,10473634,2.978909
8,8,2022,50 +,14154,14993017,94.403948
9,9,2023,0 to 29,27,13923612,0.193915


In [6]:
CAdata['YEAR'] = CAdata['YEAR'].astype(str)
USAdata['YEAR'] = USAdata['YEAR'].astype(str)
CAdata1['YEAR'] = CAdata1['YEAR'].astype(str)
USAdata1['YEAR'] = USAdata1['YEAR'].astype(str)

## 1. How did the age distribution of COVID-19 deaths differ between Canada and the USA over time?

In [7]:

age_line1 = alt.Chart(USAdata1).mark_line(point=True).encode(
    x='YEAR:N',
    y='MORTALITY_RATE:Q',
    color = alt.value("orange"),
    tooltip=['YEAR', 'AGE_GROUP', 'MORTALITY_RATE']
).facet(
    facet='AGE_GROUP:N',
    columns=3
).resolve_scale(
    y='independent'
).properties(
    title='COVID-19 Deaths Over Time by Age Group (USA)'
)

age_line2 = alt.Chart(CAdata1).mark_line(point=True).encode(
    x='YEAR:N',
    y='MORTALITY_RATE:Q',
    tooltip=['YEAR', 'AGE_GROUP', 'MORTALITY_RATE']
).facet(
    facet='AGE_GROUP:N',
    columns=3
).resolve_scale(
    y='independent'
).properties(
    title='COVID-19 Deaths Over Time by Age Group (CANADA)'
)


alt.hconcat(age_line1, age_line2)


##  2. Were there specific age groups that experienced disproportionately higher mortality rates in one country compared to the other?

In [8]:

age_bar1 = alt.Chart(USAdata1).mark_bar().encode(
    x=alt.X('AGE_GROUP:N', title='Age Group'),
    y=alt.Y('MORTALITY_RATE:Q', title='MORTALITY_RATE'),
    color = alt.value("orange"),
    tooltip=['YEAR', 'AGE_GROUP', 'MORTALITY_RATE']
).facet(
    column=alt.Column('YEAR:N', title='Year')
).properties(title='USA: Age-Specific COVID-19 Deaths by Year')


age_bar2 = alt.Chart(CAdata1).mark_bar().encode(
    x=alt.X('AGE_GROUP:N', title='Age Group'),
    y=alt.Y('MORTALITY_RATE:Q', title='MORTALITY_RATE'),
    tooltip=['YEAR', 'AGE_GROUP', 'MORTALITY_RATE']
).facet(
    column=alt.Column('YEAR:N', title='Year')
).properties(title='Canada: Age-Specific COVID-19 Deaths by Year')


alt.hconcat(age_bar1, age_bar2)


## 3. How did the pandemic impact mortality rates differently for males and females across various age groups in Canada and the United States?

In [9]:
stacked1 = alt.Chart(USAdata).mark_bar().encode(
    y=alt.Y('AGE_GROUP:N', title='Age Group'),
    x=alt.X('DEATHS:Q', stack='zero', title='COVID-19 Deaths'),
    color=alt.Color('GENDER:N', title='Gender'),
    tooltip=['YEAR', 'COUNTRY', 'AGE_GROUP', 'GENDER', 'DEATHS']
).facet('YEAR:N', columns = 1).resolve_scale(x='independent').properties(
    title='COVID-19 Deaths by Gender, Age Group, Year, in USA')


stacked2 = alt.Chart(CAdata).mark_bar().encode(
    y=alt.Y('AGE_GROUP:N', title='Age Group'),
    x=alt.X('DEATHS:Q', stack='zero', title='COVID-19 Deaths'),
    color=alt.Color('GENDER:N', title='Gender'),
    tooltip=['YEAR', 'COUNTRY', 'AGE_GROUP', 'GENDER', 'DEATHS']
).facet('YEAR:N', columns = 1).resolve_scale(x='independent').properties(
    title='COVID-19 Deaths by Gender, Age Group, Year, in CANADA')


alt.hconcat(stacked1,stacked2)