In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import seaborn as sns
import pandas as pd
import sklearn

In [2]:
%store -r d
%store -r CC_nodeaths

In [4]:
d['year'] = d['DEATH_DATE'].dt.year
d['quarter'] = d['DEATH_DATE'].dt.quarter
df_quarterly = d.groupby(['tract_geoID', 'year', 'quarter']).size().reset_index(name='deaths_sum')

In [None]:
for index, row in CC_nodeaths.iterrows():
    tract_geoID = row['GEOID']
    
    # Iterate over each year from 2014 to 2023
    for year in range(2014, 2024):
        for quarter in range(1, 5):
            existing_row = df_quarterly[(df_quarterly['tract_geoID'] == tract_geoID) & (df_quarterly['year'] == year) & (df_quarterly['quarter'] == quarter)]
            
            if existing_row.empty:
                new_row = pd.DataFrame([[tract_geoID, year, quarter, 0]], columns=df_quarterly.columns)
                df_quarterly = pd.concat([df_quarterly, new_row], ignore_index=True)


In [None]:
all_quarters = []

for year in range(2014, 2024):
    for quarter in range(1, 5):
        all_quarters.append((year, quarter))

for quarter in all_quarters:
    if not ((df_quarterly['year'] == quarter[0]) & (df_quarterly['quarter'] == quarter[1])).any():
        df_quarterly = df_quarterly.append({'year': quarter[0], 'quarter': quarter[1], 'deaths_sum': 0}, ignore_index=True)

df_quarterly =df_quarterly.sort_values(by=['tract_geoID', 'year', 'quarter']).reset_index(drop=True)

### Tables and Figures

In [None]:
quarterly_deaths = df_quarterly.groupby(['year', 'quarter'])['deaths_sum'].sum().reset_index()

quarterly_deaths['Sparsity Rate'] = (df_quarterly['deaths_sum'] == 0).groupby([df_quarterly['year'], df_quarterly['quarter']]).transform('mean') * 100

filtered_quarters = quarterly_deaths[(quarterly_deaths['year'] != 2023)]

for year in range(2014, 2023):
    yearly_data = filtered_quarters[filtered_quarters['year'] == year]
    print(f"Year: {year}")
    print(yearly_data)
    print()


In [None]:
quarter_labels = ['Q{} {}'.format(q, str(y)[-2:]) for y, q in zip(filtered_quarters['year'], filtered_quarters['quarter'])]

plt.figure(figsize=(10, 6))
plt.plot(filtered_quarters.index, filtered_quarters['Sparsity Rate'], color='darkblue')
plt.xlabel('Quarter')
plt.ylabel('Sparsity Rate (%)')
plt.title('Quarterly Sparsity Rate')
plt.xticks(filtered_quarters.index, quarter_labels, rotation='vertical')  # Set the custom x-axis labels and rotate vertically
plt.grid(True)
plt.show()
