In [1]:
import pandas as pd
import numpy as np

In [2]:
pairs = np.load('../data/pairs_full_12.npy', allow_pickle=True).item()

In [None]:
from tqdm import tqdm

poi2cbg = {}

for cbg in tqdm(pairs):
    for poi in pairs[cbg]:
            if poi not in poi2cbg:
                poi2cbg[poi] = {}
            poi2cbg[poi][cbg] = pairs[cbg][poi]

In [4]:
CBG_data = pd.read_csv('../data/census_cbg_with_predicted_hesitancy_vaccincation.csv', error_bad_lines=False)
prediction_vac = pd.read_csv('../data/vac_inferred_lvm.csv')
CBG_data['FIPS Code'] = CBG_data['census_block_group'] // 10000000
CBG_data = CBG_data.merge(prediction_vac, on='census_block_group')

CBG_data['vac_rate_inferred_times_total_population'] = CBG_data['vac_rate_inferred'] * CBG_data['total_population']

CBG_data_sum = CBG_data.groupby('FIPS Code')[['vac_rate_inferred_times_total_population', 'total_population']].sum()
CBG_data_sum = CBG_data_sum.reset_index()
CBG_data_sum['county_level_weighted_average'] = CBG_data_sum['vac_rate_inferred_times_total_population'] / CBG_data_sum['total_population']

CBG_data = CBG_data.merge(CBG_data_sum[['FIPS Code', 'county_level_weighted_average']], on='FIPS Code')

CBG_data['E_estimate_unsure'] = 1 - CBG_data['vac_rate_inferred'] / 100.0
CBG_data['Estimated hesitant or unsure'] = 1 - CBG_data['county_level_weighted_average'] / 100.0

CBG_data['E_estimate_unsure'] = np.minimum(CBG_data['E_estimate_unsure'], 1.0)
CBG_data['E_estimate_unsure'] = np.maximum(CBG_data['E_estimate_unsure'], 0.0)

CBG_data['Estimated hesitant or unsure'] = np.minimum(CBG_data['Estimated hesitant or unsure'], 1.0)
CBG_data['Estimated hesitant or unsure'] = np.maximum(CBG_data['Estimated hesitant or unsure'], 0.0)

vaccine = CBG_data

In [5]:
cbg2population = {}

for i, r in vaccine[['census_block_group', 'total_population']].iterrows():
    cbg2population[r['census_block_group']] = r['total_population']

In [7]:
pois = set([poi for poi in poi2cbg if len(poi2cbg[poi]) >= 1])

cbgs = [cbg for cbg in cbg2population if cbg2population[cbg] > 1]
cbgs.sort()
cbgs = set(cbgs)

poi2idx = {}

for poi in pois:
    poi2idx[poi] = len(poi2idx)
    
cbg2idx = {}

for cbg in cbgs:
    cbg2idx[cbg] = len(cbg2idx)
    
# del pairs

import gc
gc.collect()

rows = []
cols = []
vals = []

for poi in poi2cbg:
    if poi in pois:
        for cbg in poi2cbg[poi]:
            if cbg in cbgs:
                rows.append(poi2idx[poi])
                cols.append(cbg2idx[cbg])
                vals.append(poi2cbg[poi][cbg])

In [None]:
poi2areas = np.load('../data/poi2area.npy', allow_pickle=True).item()
poi2dwell_corrects_total = np.load('../data/poi2dwell_corrects_total.npy', allow_pickle=True).item()

poi_areas = np.array([poi2areas[poi] for poi in poi2idx])
poi_dwell_time_correction_factors = np.array([poi2dwell_corrects_total[poi] for poi in poi2idx])


In [8]:
from scipy.sparse import csr_matrix
bipartite = csr_matrix((vals, (rows, cols)), shape=(len(poi2idx), len(cbg2idx)))

# Remember to gc some memory :)

In [15]:
dict_param = {}

In [30]:
dict_param['all_states'] = {}
dict_param['all_hours'] = {}
dict_param['cbg_idx_groups_to_track'] = {}
dict_param['cbg_day_prop_out'] = {}
dict_param['intervention_cost'] = {}
dict_param['poi_subcategory_types'] = {}
dict_param['cbgs_idxs'] = {}


In [31]:
centrality_scores_array = np.array(bipartite_normed_product.sum(axis=1))[:, 0]
centrality_scores = {ii: centrality_scores_array[ii] for ii in range(len(centrality_scores_array))}
centrality_scores = list(reversed(sorted(centrality_scores.items(), key=lambda x: x[1])))

In [36]:
import copy
dict = {}

dict_param['poi_cbg_visits_list'] = [bipartite]
dict_param['poi_time_counts'] = np.array([np.ones(len(poi2idx))]).T

poi2areas = np.load('../data/poi2area.npy', allow_pickle=True).item()
dict_param['poi_areas'] = np.array([poi2areas[poi] for poi in poi2idx])

poi2dwell_corrects_total = np.load('../data/poi2dwell_corrects_total_12.npy', allow_pickle=True).item()

dict_param['poi_dwell_time_correction_factors'] = np.array([poi2dwell_corrects_total[poi] 
                            if poi in poi2dwell_corrects_total else 0.0
                            for poi in poi2idx])
cbg2population = {}

for i, r in vaccine[['census_block_group', 'total_population']].iterrows():
    cbg2population[r['census_block_group']] = r['total_population']

dict_param['all_unique_cbgs'] = list(cbgs)
dict_param['cbg_sizes'] = np.array([cbg2population[int(cbg)] for cbg in dict_param['all_unique_cbgs']])


dict_param['poi_cbg_proportions'] = [{1: 0.9}] * len(poi2idx)


unvax = np.ones(bipartite.shape[1]) * np.median(vaccine['E_estimate_unsure'])

for i, r in vaccine.iterrows():
    cbg = r['census_block_group']
    if cbg in cbg2idx:
        unvax[cbg2idx[cbg]] = r['E_estimate_unsure']


dict_param['unvax'] = copy.deepcopy(unvax)

dict_param['cbgs_to_idxs']= {}

for cbg in dict_param['all_unique_cbgs']:
    dict_param['cbgs_to_idxs'][cbg] = len(dict_param['cbgs_to_idxs'])



In [17]:
np.save('../data/dict_param_all_12.npy', dict_param)


# clear the memory and delete some data first

In [None]:
pois = set([poi for poi in poi2cbg if len(poi2cbg[poi]) >= 1])

cbgs = [cbg for cbg in cbg2population if cbg2population[cbg] > 1]
cbgs.sort()
cbgs = set(cbgs)

poi2idx = {}

for poi in pois:
    poi2idx[poi] = len(poi2idx)
    
cbg2idx = {}

for cbg in cbgs:
    cbg2idx[cbg] = len(cbg2idx)
    
# del pairs

import gc
gc.collect()

rows = []
cols = []
vals = []

for poi in poi2cbg:
    if poi in pois and poi in poi2dwell_corrects_total and poi in poi2areas:
        for cbg in poi2cbg[poi]:
            if cbg in cbgs:
                rows.append(poi2idx[poi])
                cols.append(cbg2idx[cbg])
                vals.append(poi2cbg[poi][cbg] * np.sqrt(poi2dwell_corrects_total[poi] / poi2areas[poi]))
    
print(vals)

from scipy.sparse import csr_matrix

bipartite = csr_matrix((vals, (rows, cols)), shape=(len(poi2idx), len(cbg2idx)))
# np.save('bipartite_weight_12.npy', bipartite)

In [3]:
right = (bipartite @ np.ones(214697))
bipartite_normed_product = bipartite.T @ right
np.save('../results/centrality_files/bipartite_normed_product_all_12.npy', bipartite_normed_product)