In [38]:
%load_ext autoreload
%autoreload 2
import geopandas as gpd
import numpy as np
import pandas as pd

# local import
from make_datasets import make_data
from models import all_zeroes_model

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:

# Path to a geopandas datafile
data_path='./clean_annual_tract/'
data_gdf = gpd.read_file(data_path)

Process dataframe into a data frame with a Multiindex on location and time

In [19]:

# Name the important columns
timestep_col = 'timestep'
geography_col = 'geoid'
outcome_col = 'deaths'

# These are the columns we could possibly want in the X dataframe
x_idx_cols = [geography_col, 'lat', 'lon', timestep_col,
              'theme_1_pc', 'theme_2_pc', 'theme_3_pc', 'theme_4_pc',
              'svi_pctile', 'year',
              'neighbor_t', 'deaths']

# These are the columns we could want in the Y dataframe
y_idx_cols = [geography_col, timestep_col, outcome_col]

# These are the features we want
features_only = ['deaths']
add_spacetime = True
add_svi = True
if add_spacetime:
    features_only += ['lat', 'lon', timestep_col]
if add_svi:
    features_only += ['theme_1_pc', 'theme_2_pc', 'theme_3_pc', 'theme_4_pc', 'svi_pctile']

    
first_test_year = 2019
last_test_year = 2020
first_test_timestep = 19
last_test_timestep = 20
lookback_years=5

In [4]:
# Create the multiindex
multiindexed_gdf = data_gdf.set_index([geography_col, timestep_col])

# re-add the timestep column as a feature because it's useful
multiindexed_gdf[timestep_col] = multiindexed_gdf.index.get_level_values(timestep_col)

# Track number of locations
num_geoids = len(data_gdf[geography_col].unique())

In [11]:
x_BSF, y_BS = make_data(multiindexed_gdf, first_test_year, last_test_year, lookback_years,
          features_only, num_geoids)

In [12]:
x_BSF.shape

TensorShape([2, 1620, 45])

In [13]:
y_BS.shape

TensorShape([2, 1620])

In [40]:
bpr_over_time_zeroes = all_zeroes_model(multiindexed_gdf,
                                        first_test_timestep, last_test_timestep,
                                        num_geoids)

In [48]:
print(f"2019 Average: {np.mean(bpr_over_time_zeroes[0])}")

bpr_samples_both_years = (np.array(bpr_over_time_zeroes[0]) + \
                          np.array(bpr_over_time_zeroes[1]))/2
                        
print(f"""Zeroes model (Mean, 95% CI): {np.mean(bpr_samples_both_years)*100:.1f},
      ({np.percentile(bpr_samples_both_years,2.5)*100:.1f}-
       {np.percentile(bpr_samples_both_years,97.5)*100:.1f})""")


2019 Average: 0.25949707969445374
Zeroes model (Mean, 95% CI): 25.5,
      (25.1-
       25.8)


In [27]:
np.array(bpr_over_time_zeroes[0])+np.array(bpr_over_time_zeroes[1])

array([0.48629182, 0.48559692, 0.48705123, 0.48498266, 0.484696  ,
       0.4873533 , 0.48697484, 0.48675582, 0.48734671, 0.48648371,
       0.48578552, 0.4873418 , 0.48901325, 0.48755974, 0.48740589,
       0.4882732 , 0.48567307, 0.48605499, 0.48985796, 0.48621481,
       0.48739747, 0.48636345, 0.48664498, 0.48752345, 0.48890475,
       0.4845332 , 0.48582379, 0.48640119, 0.48660364, 0.48638195,
       0.48910175, 0.48875888, 0.4875153 , 0.48829089, 0.48819095,
       0.48857556, 0.4868849 , 0.48639409, 0.48513304, 0.4871981 ,
       0.48719583, 0.48846208, 0.48926455, 0.48683662, 0.48664287,
       0.48672532, 0.48646148, 0.48786413, 0.48924681, 0.48803656])