In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import seaborn as sns
import pandas as pd
import sklearn

In [5]:
%store -r cook_county_gdf

In [4]:
cook_county_gdf['quarter'] = cook_county_gdf['DEATH_DATE'].dt.quarter

KeyError: 'DEATH_DATE'

In [None]:
columns_to_keep = ['STATEFP', 'COUNTYFP', 'TRACTCE','NAME', 'NAMELSAD', 'MTFCC', 'FUNCSTAT', 'ALAND', 'AWATER',
       'INTPTLAT', 'INTPTLON', 'geometry']

gdf_quarterly = cook_county_gdf.groupby(['GEOID', 'quarter']).agg({'deaths': 'sum', **{col: 'first' for col in columns_to_keep}}).reset_index()

## Account for missing tracts and quarters

In [7]:
#Takes the rows in CC_nodeaths and adds them to df_quarterly 

for index, row in CC_nodeaths.iterrows():
    tract_geoID = row['GEOID']
    
    for year in range(2014, 2024):
        for quarter in range(1, 5):
            existing_row = df_quarterly[(df_quarterly['tract_geoID'] == tract_geoID) & (df_quarterly['year'] == year) & (df_quarterly['quarter'] == quarter)]
            
            if existing_row.empty:
                new_row = pd.DataFrame([[tract_geoID, year, quarter, 0]], columns=df_quarterly.columns)
                df_quarterly = pd.concat([df_quarterly, new_row], ignore_index=True)


In [8]:
all_quarters = []
new_rows = []

for year in range(2014, 2024):
    for quarter in range(1, 5):
        all_quarters.append((year, quarter))

for quarter in all_quarters:
    if not ((df_quarterly['year'] == quarter[0]) & (df_quarterly['quarter'] == quarter[1])).any():
        df_quarterly = df_quarterly.append({'year': quarter[0], 'quarter': quarter[1], 'deaths_sum': 0}, ignore_index=True)

df_quarterly = df_quarterly.sort_values(by=['tract_geoID', 'year', 'quarter']).reset_index(drop=True)

In [9]:
df_quarterly.head()

Unnamed: 0,tract_geoID,year,quarter,deaths_sum
0,17031010100,2014,4,1
1,17031010100,2015,4,1
2,17031010100,2016,2,2
3,17031010100,2016,4,1
4,17031010100,2017,1,1


In [10]:
df_quarterly['tract_geoID'].nunique()

1332

In [None]:
df_quarterly.shape

In [13]:
missing_quarters = []

for tract_geoID in df_quarterly['tract_geoID'].unique():
    for  quarter in all_quarters:
        if not ((df_quarterly['tract_geoID'] == tract_geoID) & (df_quarterly['year'] == year) & (df_quarterly['quarter'] == quarter)).any():
            missing_quarters.append((tract_geoID, year, quarter))

if missing_quarters:
    print("The following census tracts are missing at least one quarter between 2014 and 2023:")
    for missing in missing_quarters:
        print("Tract GEOID:", missing[0], "- Year:", missing[1], "- Quarter:", missing[2])
else:
    print("All census tracts have data for every quarter between 2014 and 2023.")


KeyboardInterrupt: 

### Tables and Figures

In [None]:
quarterly_deaths = df_quarterly.groupby(['year', 'quarter'])['deaths_sum'].sum().reset_index()

quarterly_deaths['Sparsity Rate'] = (df_quarterly['deaths_sum'] == 0).groupby([df_quarterly['year'], df_quarterly['quarter']]).transform('mean') * 100

filtered_quarters = quarterly_deaths[(quarterly_deaths['year'] != 2023)]

for year in range(2014, 2023):
    yearly_data = filtered_quarters[filtered_quarters['year'] == year]
    print(f"Year: {year}")
    print(yearly_data)
    print()


In [None]:
quarter_labels = ['Q{} {}'.format(q, str(y)[-2:]) for y, q in zip(filtered_quarters['year'], filtered_quarters['quarter'])]

plt.figure(figsize=(10, 6))
plt.plot(filtered_quarters.index, filtered_quarters['Sparsity Rate'], color='darkblue')
plt.xlabel('Quarter')
plt.ylabel('Sparsity Rate (%)')
plt.title('Quarterly Sparsity Rate')
plt.xticks(filtered_quarters.index, quarter_labels, rotation='vertical')  # Set the custom x-axis labels and rotate vertically
plt.grid(True)
plt.show()
