In [26]:
"""Run a zero-inflated GP on opioid data"""
import os
import sys
from glob import glob

import numpy as np
import pandas as pd
idx = pd.IndexSlice
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator

import copy

import geopandas as gpd

from sklearn.metrics import mean_absolute_error

from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor

import gpflow
import tensorflow as tf
import sys
code_dir = '/cluster/home/kheuto01/code/zero-inflated-gp/'
sys.path.append(code_dir)
from math import radians, cos, sin, asin, sqrt
from onoffgpf import OnOffSVGP, OnOffLikelihood

import pickle

from math import radians, cos, sin, asin, sqrt

def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance in kilometers between two points
    on the earth (specified in decimal degrees)
    https://stackoverflow.com/a/4913653/1748679
    """
    # convert decimal degrees to radians
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a))
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles. Determines return value units.
    return c * r

In [4]:
data_dir = '/cluster/tufts/hugheslab/datasets/NSF_OD/'
result_dir = os.path.join(data_dir, 'results_20220606_update')
mass_shapefile = os.path.join(data_dir,'shapefiles','MA_2021')

In [6]:
town_death_file = os.path.join(result_dir, 'clean_annual_town')
town_deaths = gpd.read_file(town_death_file)

In [12]:
tract_death_file = os.path.join(result_dir, 'clean_annual_tract')
tract_deaths = gpd.read_file(tract_death_file)

In [13]:
just_towns = town_deaths[town_deaths['timestep']==town_deaths['timestep'].min()]
just_tracts = tract_deaths[tract_deaths['timestep']==tract_deaths['timestep'].min()]

In [18]:
%%capture
town_tract_map = pd.DataFrame()
for town_id in just_towns.geoid.unique():
    
    this_town = just_towns[(just_towns['geoid']==town_id)]
    town_geo =this_town['geometry'].values[0]
    
    tracts_containing_town = just_tracts[just_tracts.geometry.intersects(town_geo.centroid)]
    tracts_in_town = just_tracts[just_tracts.geometry.centroid.intersects(town_geo)]

    child_tracts = set(np.append(tracts_containing_town.geoid.values,
                                 tracts_in_town.geoid.values))
    
    these_tracts_mapped = pd.DataFrame({'child_tracts':list(child_tracts), 'parent_town':town_id})
    town_tract_map =town_tract_map.append(these_tracts_mapped)
    


In [23]:
town_tract_map.to_csv(os.path.join(result_dir, 'town_tract_map.csv'), index=False)

Unnamed: 0,child_tracts,parent_town
0,25001012900,2500103690
1,25001012800,2500103690
2,25001013002,2500103690
3,25001013200,2500103690
4,25001012502,2500103690
...,...,...
1620,25027730802,2502782000
1621,25027731900,2502782000
1622,25027731700,2502782000
1623,25027730402,2502782000


In [27]:
svi_file = os.path.join(result_dir, 'svi_month')
svi_gdf = gpd.read_file(svi_file)
# Call it "grid_squar" because geopandas only supports len 10 columns
svi_gdf = svi_gdf.rename(columns={'INTPTLAT': 'lat', 'INTPTLON': 'lon', 'GEOID': 'geoid'})
# Make lat and lon floats
svi_gdf.loc[:, 'lat'] = svi_gdf.lat.astype(float)
svi_gdf.loc[:, 'lon'] = svi_gdf.lon.astype(float)
deaths_gdf = svi_gdf


# m
five_aggregate = {}
just_grid = deaths_gdf.loc[
        (deaths_gdf['year'] == 2000) & (deaths_gdf['month'] == 1), ['geoid', 'geometry', 'lat', 'lon']]

filtering_list = just_grid.copy()
for r, row in just_grid.iterrows():
    filtering_list.loc[:, 'haversine'] = filtering_list.apply(lambda x: haversine(row['lon'], row['lat'],
                                                                        x['lon'], x['lat']),
                                                    axis=1)
    sorted_dist = filtering_list.sort_values('haversine')
    closest_five = sorted_dist.iloc[:5].geoid.unique()
    five_aggregate[r] = closest_five.tolist()
    filtering_list = filtering_list[~filtering_list['geoid'].isin(closest_five)]
    just_grid.loc[just_grid['geoid'].isin(closest_five), 'grouping'] = r
    
    if len(filtering_list) == 0:
        break
        

In [36]:
just_grid.to_file(os.path.join(result_dir, 'tract_group_map'))