In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import folium
import seaborn as sns
import plotly.express as px
import pyarrow.parquet as pq
import matplotlib.patches as mpatches

In [None]:
vision_providers_minimal = pq.read_table(source='../data/vision_providers_minimal.parquet').to_pandas()
vision_providers_minimal

In [None]:
counties = gpd.read_file("/srv/data/my_shared_data_folder/pb-data/county.gpkg")
counties

In [None]:
vision_providers_minimal = vision_providers_minimal[vision_providers_minimal["Entity Type Code"] == 1].reset_index(drop=True)
vision_providers_minimal

In [None]:
vision_providers_minimal = gpd.GeoDataFrame(vision_providers_minimal, geometry=gpd.points_from_xy(vision_providers_minimal.Longitude, vision_providers_minimal.Latitude), crs="EPSG:4326")
vision_providers_minimal

In [None]:
counties = counties.to_crs(vision_providers_minimal.crs)

joined = gpd.sjoin(vision_providers_minimal, counties, predicate='within')
joined

In [None]:
joined['Optometry'] = joined['Taxonomy'].apply(lambda x: any(code.startswith('152') for code in x.split('|')))
joined['Ophthalmology'] = joined['Taxonomy'].apply(lambda x: any(code.startswith('207') for code in x.split('|')))
joined['Others'] = joined['Taxonomy'].apply(lambda x: any(code.startswith('156') for code in x.split('|')))

joined

In [None]:
grouped = joined.groupby('GEOID')[['Optometry', 'Ophthalmology', 'Others']].sum().reset_index()
grouped

In [None]:
grouped_geo = pd.merge(counties, grouped, how='left', left_on='GEOID', right_on='GEOID').reset_index(drop=True)
grouped_geo['Total_Providers'] = grouped_geo['Optometry'] + grouped_geo['Ophthalmology'] + grouped_geo['Others']

grouped_geo['Optometry'].fillna(0, inplace=True)
grouped_geo['Ophthalmology'].fillna(0, inplace=True)
grouped_geo['Others'].fillna(0, inplace=True)
grouped_geo['Total_Providers'].fillna(0, inplace=True)

grouped_geo = grouped_geo.astype({'Optometry': 'int', 'Ophthalmology': 'int', 'Others': 'int', 'Total_Providers': 'int'})

grouped_geo

In [None]:
grouped_geo.to_csv('counties_counts.csv', index=False)

In [None]:
grouped_geo_OK = grouped_geo[grouped_geo["STATEFP"] == '40'].reset_index(drop=True)
grouped_geo_OK

In [None]:
grouped_geo_IL = grouped_geo[grouped_geo["STATEFP"] == '17'].reset_index(drop=True)
grouped_geo_IL

In [None]:
grouped_geo_IL.to_csv('counties_counts_IL.csv', index=False)

In [None]:
fig, ax = plt.subplots(figsize=(10, 4))

grouped_geo_OK = grouped_geo[grouped_geo["STATEFP"] == '40'].reset_index(drop=True)

grouped_geo_OK.plot(column='Total_Providers', 
               cmap='Reds', 
               linewidth=0.8, 
               ax=ax, 
               edgecolor='0.8',
               legend=True,
               missing_kwds={'color': 'lightgrey'})

ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')



plt.title('Oklahoma: Providers by Tract')
plt.show()

In [None]:
grouped_geo_OK['INTPTLAT'] = pd.to_numeric(grouped_geo_OK['INTPTLAT'], errors='coerce')
grouped_geo_OK['INTPTLON'] = pd.to_numeric(grouped_geo_OK['INTPTLON'], errors='coerce')

fig = px.choropleth_mapbox(grouped_geo_OK, 
                           geojson = grouped_geo_OK.geometry, 
                           locations = grouped_geo_OK.index, 
                           color = 'Total_Providers',
                           color_continuous_scale = "greens",
                           mapbox_style = "carto-positron",
                           zoom = 5, 
                           center = {"lat": grouped_geo_OK.INTPTLAT.mean(), "lon": grouped_geo_OK.INTPTLON.mean()},
                           opacity = 0.8,
                           hover_data = ["NAMELSAD"],
                           labels = {"Total_Providers":'Total Providers'}
                          )

fig.update_layout(margin = {"r":0,"t":0,"l":0,"b":0})

fig.show()


In [None]:
from libpysal import weights

w = weights.Queen.from_dataframe(grouped_geo_IL)

# Standardize the weights matrix so that the weights of each location's neighbors sum to 1.
w.transform = 'R'

# Calculate the spatial lag of the Total Providers count.
grouped_geo_IL['lag_Total_Providers'] = weights.lag_spatial(w, grouped_geo_IL['Total_Providers'])
grouped_geo_IL

In [None]:
from pysal.explore import esda

y = grouped_geo_IL['Total_Providers']

# Compute the spatial lag of your variable
ylag = weights.lag_spatial(w, y)

# Compute local Moran's I, which gives us our LISA statistics
lisa = esda.Moran_Local(y, w)

# Create a new column in your GeoDataFrame for the LISA values
grouped_geo_IL['lisa_values'] = lisa.Is

# Plot the LISA values
fig, ax = plt.subplots(figsize=(12,10))
grouped_geo_IL.plot(column='lisa_values', scheme='quantiles', cmap='Reds', legend=True, ax=ax)
plt.show()


In [None]:
# Define the significance level, for example, 0.05
alpha = 0.05

# Create a binary variable indicating whether the p-value is less than alpha
# These are your significant clusters
grouped_geo_IL['significant_clusters'] = lisa.p_sim < alpha

# Plot the significant clusters
fig, ax = plt.subplots(figsize=(12,10))
grouped_geo_IL.plot(column='significant_clusters', legend=True, ax=ax)
plt.show()


In [None]:
# Create a variable for the type of spatial pattern
grouped_geo_IL['lisa_type'] = 'Not significant'
grouped_geo_IL.loc[(lisa.q==1) & (lisa.p_sim < alpha), 'lisa_type'] = 'High-High'
grouped_geo_IL.loc[(lisa.q==2) & (lisa.p_sim < alpha), 'lisa_type'] = 'Low-High'
grouped_geo_IL.loc[(lisa.q==3) & (lisa.p_sim < alpha), 'lisa_type'] = 'Low-Low'
grouped_geo_IL.loc[(lisa.q==4) & (lisa.p_sim < alpha), 'lisa_type'] = 'High-Low'
grouped_geo_IL

In [None]:
grouped_geo_IL.to_csv('counties__IL.csv', index=False)

In [None]:
fig, ax = plt.subplots(figsize=(10, 4))

#tracts_OK.plot(color='lightgrey', ax=ax, edgecolor='grey')  

grouped_geo_OK.plot(column='lag_Total_Providers', 
               cmap='Reds', 
               linewidth=0.8, 
               ax=ax, 
               edgecolor='0.8',
               #scheme='quantiles',
               legend=True,
               missing_kwds={'color': 'lightgrey'})

ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')

plt.title('Illinois: Lag Providers by Tract')
plt.show()

In [None]:
grouped_geo_IL = grouped_geo[grouped_geo["STATEFP"] == '17'].reset_index(drop=True)

grouped_geo_IL['INTPTLAT'] = pd.to_numeric(grouped_geo_IL['INTPTLAT'], errors='coerce')
grouped_geo_IL['INTPTLON'] = pd.to_numeric(grouped_geo_IL['INTPTLON'], errors='coerce')

fig = px.choropleth_mapbox(grouped_geo_IL, 
                           geojson = grouped_geo_IL.geometry, 
                           locations = grouped_geo_IL.index, 
                           color = 'Total_Providers',
                           color_continuous_scale = "greens",
                           mapbox_style = "carto-positron",
                           zoom = 5, 
                           center = {"lat": grouped_geo_IL.INTPTLAT.mean(), "lon": grouped_geo_IL.INTPTLON.mean()},
                           opacity = 0.8,
                           hover_data = ["NAMELSAD"],
                           labels = {"Total_Providers":'Total Providers'}
                          )

fig.update_layout(margin = {"r":0,"t":0,"l":0,"b":0})

fig.show()
