In [1]:
#import libraries
import numpy as np
import matplotlib.pyplot as plt
import os
import seaborn as sns
import pandas as pd
import sklearn

In [2]:
#retrieve cleaned data frames 
%store -r gdf_quarter
%store -r gdf_semi
%store -r gdf_annual

### Sparsity Rate Tables

In [5]:
#annual
sparsity_df_annual = pd.DataFrame(columns=['year', 'Total Deaths', 'Sparsity Rate'])

years = range(2014, 2023)
for year in years:
    year_df = gdf_annual[gdf_annual['year'] == year]
    zero_deaths_count = year_df[year_df['deaths'] == 0].shape[0]
    total_count = year_df.shape[0]
    sparsity_rate = (zero_deaths_count / total_count) * 100
    total_deaths = year_df['deaths'].sum()
    sparsity_df_annual = pd.concat([sparsity_df_annual, pd.DataFrame({'year': [year], 'Total Deaths': [total_deaths], 'Sparsity Rate': [sparsity_rate]})], ignore_index=True)


raw_sparsity = sparsity_df_annual['Sparsity Rate']
sparsity_df_annual['Sparsity Rate'] = sparsity_df_annual['Sparsity Rate'].round(3).astype(str) + '%'
print(sparsity_df_annual)

   year Total Deaths Sparsity Rate
0  2014          209       86.036%
1  2015          616       67.342%
2  2016         1029       53.754%
3  2017         1109       53.303%
4  2018         1078       54.429%
5  2019         1210       52.102%
6  2020         1687       43.619%
7  2021         1790       43.168%
8  2022         1849       43.018%


In [34]:
##quarterly sparsity table

quarterly_deaths = gdf_quarter.groupby(['year', 'quarter'])['deaths'].sum().reset_index()

# Calculate sparsity rate
zero_deaths_count = gdf_quarter[gdf_quarter['deaths'] == 0].groupby(['year', 'quarter']).size().reset_index(name='Zero Deaths Count')
total_tracts_count = gdf_quarter.groupby(['year', 'quarter']).size().reset_index(name='Total Tracts Count')
sparsity_df_quarter = pd.merge(zero_deaths_count, total_tracts_count, on=['year', 'quarter'])
sparsity_df_quarter['Sparsity Rate'] = (sparsity_df_quarter['Zero Deaths Count'] / sparsity_df_quarter['Total Tracts Count']) * 100
sparsity_df_quarter['Sparsity Rate'] = sparsity_df_quarter['Sparsity Rate'].round(3).astype(str) + '%'

# Merge sparsity rate with quarterly_deaths table
quarterly_deaths = pd.merge(quarterly_deaths, sparsity_df_quarter[['year', 'quarter', 'Sparsity Rate']], on=['year', 'quarter'])

# Rename the 'deaths' column
quarterly_deaths.rename(columns={'deaths': 'Total Deaths'}, inplace=True)

# Exclude 2023 from the table
quarterly_deaths = quarterly_deaths[quarterly_deaths['year'] != 2023]

# Add an extra space between every year (or after every 4 quarters)
formatted_table = ''
current_year = None
for _, row in quarterly_deaths.iterrows():
    year = row['year']
    quarter = row['quarter']
    total_deaths = row['Total Deaths']
    sparsity_rate = row['Sparsity Rate']
    
    if year != current_year:
        formatted_table += '\n'  # Add an extra space between years
        current_year = year
    
    formatted_table += f"Year: {year}, Quarter: {quarter}, Total Deaths: {total_deaths}, Sparsity Rate: {sparsity_rate}\n"

# Print the resulting table
print(formatted_table)



Year: 2014, Total Deaths: 209, Sparsity Rate: 86.036%

Year: 2015, Total Deaths: 616, Sparsity Rate: 67.342%

Year: 2016, Total Deaths: 1029, Sparsity Rate: 53.754%

Year: 2017, Total Deaths: 1109, Sparsity Rate: 53.303%

Year: 2018, Total Deaths: 1078, Sparsity Rate: 54.429%

Year: 2019, Total Deaths: 1210, Sparsity Rate: 52.102%

Year: 2020, Total Deaths: 1687, Sparsity Rate: 43.619%

Year: 2021, Total Deaths: 1790, Sparsity Rate: 43.168%

Year: 2022, Total Deaths: 1849, Sparsity Rate: 43.018%



In [23]:
#semi-annual
semiannual_deaths = gdf_semi.groupby(['year', 'semiannual'])['deaths'].sum().reset_index()

# Calculate sparsity rate
zero_deaths_count = gdf_semi[gdf_semi['deaths'] == 0].groupby(['year', 'semiannual']).size().reset_index(name='Zero Deaths Count')
total_tracts_count = gdf_semi.groupby(['year', 'semiannual']).size().reset_index(name='Total Tracts Count')
sparsity_df_semi = pd.merge(zero_deaths_count, total_tracts_count, on=['year', 'semiannual'])
sparsity_df_semi['Sparsity Rate'] = (sparsity_df_semi['Zero Deaths Count'] / sparsity_df_semi['Total Tracts Count']) * 100
sparsity_df_semi['Sparsity Rate'] = sparsity_df_semi['Sparsity Rate'].round(3).astype(str) + '%'

# Merge sparsity rate with semiannual_deaths table
semiannual_deaths = pd.merge(semiannual_deaths, sparsity_df_semi[['year', 'semiannual', 'Sparsity Rate']], on=['year', 'semiannual'])

semiannual_deaths.rename(columns={'deaths': 'Total Deaths'}, inplace=True)

#format
formatted_table = ''
current_year = None
for _, row in semiannual_deaths.iterrows():
    year = row['year']
    semiannual = row['semiannual']
    total_deaths = row['Total Deaths']
    sparsity_rate = row['Sparsity Rate']
    
    if year != current_year:
        formatted_table += '\n'  # Add an extra space between years
        current_year = year
    
    formatted_table += f"Year: {year}, Semiannual: {semiannual}, Total Deaths: {total_deaths}, Sparsity Rate: {sparsity_rate}\n"

# Print the resulting table
print(formatted_table)





Year: 2014, Semiannual: 1, Total Deaths: 1, Sparsity Rate: 99.925%
Year: 2014, Semiannual: 2, Total Deaths: 208, Sparsity Rate: 86.111%

Year: 2015, Semiannual: 1, Total Deaths: 278, Sparsity Rate: 82.282%
Year: 2015, Semiannual: 2, Total Deaths: 338, Sparsity Rate: 79.73%

Year: 2016, Semiannual: 1, Total Deaths: 533, Sparsity Rate: 70.195%
Year: 2016, Semiannual: 2, Total Deaths: 496, Sparsity Rate: 72.898%

Year: 2017, Semiannual: 1, Total Deaths: 510, Sparsity Rate: 72.823%
Year: 2017, Semiannual: 2, Total Deaths: 599, Sparsity Rate: 69.144%

Year: 2018, Semiannual: 1, Total Deaths: 518, Sparsity Rate: 72.523%
Year: 2018, Semiannual: 2, Total Deaths: 560, Sparsity Rate: 70.796%

Year: 2019, Semiannual: 1, Total Deaths: 528, Sparsity Rate: 71.922%
Year: 2019, Semiannual: 2, Total Deaths: 682, Sparsity Rate: 66.141%

Year: 2020, Semiannual: 1, Total Deaths: 914, Sparsity Rate: 60.21%
Year: 2020, Semiannual: 2, Total Deaths: 773, Sparsity Rate: 63.063%

Year: 2021, Semiannual: 1, Tot

In [33]:
#number of census tracts with 5+ deaths in a year
filtered_df = gdf_annual[(gdf_annual['year'] >= 2014) & (gdf_annual['year'] <= 2022)]

result = filtered_df.groupby('year').apply(lambda x: (x['deaths'] >= 5).sum()).reset_index()

result.columns = ['Year', '   # of Tracts']
print("Number of Census Tracts with\n  5+ Deaths in Given Year\n")
print(result)


Number of Census Tracts with
  5+ Deaths in Given Year

   Year     # of Tracts
0  2014               0
1  2015               6
2  2016              25
3  2017              29
4  2018              30
5  2019              33
6  2020              69
7  2021              76
8  2022              96
