In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import plotly.express as px
import pyarrow.parquet as pq
from libpysal import weights
from pysal.explore import esda

In [None]:
vision_providers_minimal = pq.read_table(source='../data/vision_providers_minimal.parquet').to_pandas()
vision_providers_minimal

In [None]:
vision_providers_minimal = vision_providers_minimal[vision_providers_minimal["Entity Type Code"] == 1].reset_index(drop=True)
vision_providers_minimal

In [None]:
vision_providers_minimal = gpd.GeoDataFrame(vision_providers_minimal, geometry=gpd.points_from_xy(vision_providers_minimal.Longitude, vision_providers_minimal.Latitude), crs="EPSG:4326")
vision_providers_minimal

States

In [None]:
states = gpd.read_file("../data/US_State_Boundaries.zip")
states

In [None]:
states = states.drop([21, 22], axis=0)
states = states.reset_index()
states

In [None]:
states = states.to_crs(vision_providers_minimal.crs)

joined_states = gpd.sjoin(vision_providers_minimal, states, predicate='within')
joined_states

In [None]:
joined_states['Optometry'] = joined_states['Taxonomy'].apply(lambda x: any(code.startswith('152') for code in x.split('|')))
joined_states['Ophthalmology'] = joined_states['Taxonomy'].apply(lambda x: any(code.startswith('207') for code in x.split('|')))
joined_states['Others'] = joined_states['Taxonomy'].apply(lambda x: any(code.startswith('156') for code in x.split('|')))

joined_states

In [None]:
grouped_states = joined_states.groupby('NAME')[['Optometry', 'Ophthalmology', 'Others']].sum().reset_index()
grouped_states.head()

In [None]:
grouped_geo_states = pd.merge(states, grouped, how='inner', left_on='NAME', right_on='NAME').reset_index(drop=True)

grouped_geo_states['Optometry'].fillna(0, inplace=True)
grouped_geo_states['Ophthalmology'].fillna(0, inplace=True)
grouped_geo_states['Others'].fillna(0, inplace=True)

grouped_geo_states = grouped_geo_states.astype({'Optometry': 'int', 'Ophthalmology': 'int', 'Others': 'int'})

grouped_geo_states

In [None]:
grouped_geo_states['Optometrists_Per_100000'] = grouped_geo_states['Optometry'] / grouped_geo['POP'] * 100000
grouped_geo_states['Ophthalmologists_Per_100000'] = grouped_geo_states['Ophthalmology'] / grouped_geo['POP'] * 100000
grouped_geo_states

In [None]:
w = weights.Queen.from_dataframe(grouped_geo_states)

w.transform = 'R'

# Calculate the spatial lag of the Total Providers count.
grouped_geo_states['lag_Optometry'] = weights.lag_spatial(w, grouped_geo_states['Optometry'])
grouped_geo_states['lag_Ophthalmology'] = weights.lag_spatial(w, grouped_geo_states['Ophthalmology'])

grouped_geo_states

In [None]:
y = grouped_geo_states['Optometry']

# Compute the spatial lag of your variable
ylag = weights.lag_spatial(w, y)

# Compute local Moran's I, which gives us our LISA statistics
lisa = esda.Moran_Local(y, w)

# Create a new column in your GeoDataFrame for the LISA values
grouped_geo_states['lisa_values'] = lisa.Is

In [None]:
# Plot the LISA values
fig, ax = plt.subplots(figsize=(12,10))
grouped_geo_states.plot(column='lisa_values', scheme='quantiles', cmap='Reds', legend=True, ax=ax)
plt.show()

In [None]:
# Define the significance level, for example, 0.05
alpha = 0.05

# Create a binary variable indicating whether the p-value is less than alpha
# These are the significant clusters
grouped_geo_states['significant_clusters'] = lisa.p_sim < alpha

# Plot the significant clusters
fig, ax = plt.subplots(figsize=(12,10))
grouped_geo_states.plot(column='significant_clusters', legend=True, ax=ax)
plt.show()

In [None]:
grouped_geo_states['lisa_type'] = 'Not significant'
grouped_geo_states.loc[(lisa.q==1) & (lisa.p_sim < alpha), 'lisa_type'] = 'High-High'
grouped_geo_states.loc[(lisa.q==2) & (lisa.p_sim < alpha), 'lisa_type'] = 'Low-High'
grouped_geo_states.loc[(lisa.q==3) & (lisa.p_sim < alpha), 'lisa_type'] = 'Low-Low'
grouped_geo_states.loc[(lisa.q==4) & (lisa.p_sim < alpha), 'lisa_type'] = 'High-Low'

In [None]:
fig, ax = plt.subplots(figsize=(12,10))
grouped_geo_states.plot(column='lisa_type', legend=True, cmap = 'coolwarm', ax=ax)
plt.show()

Counties

In [None]:
counties = gpd.read_file("/srv/data/my_shared_data_folder/pb-data/county.gpkg")
counties

In [None]:
counties = counties.to_crs(vision_providers_minimal.crs)

joined_counties = gpd.sjoin(vision_providers_minimal, counties, predicate='within')
joined_counties

In [None]:
joined_counties['Optometry'] = joined_counties['Taxonomy'].apply(lambda x: any(code.startswith('152') for code in x.split('|')))
joined_counties['Ophthalmology'] = joined_counties['Taxonomy'].apply(lambda x: any(code.startswith('207') for code in x.split('|')))
joined_counties['Others'] = joined_counties['Taxonomy'].apply(lambda x: any(code.startswith('156') for code in x.split('|')))

joined_counties

In [None]:
grouped_counties = joined_counties.groupby('GEOID')[['Optometry', 'Ophthalmology', 'Others']].sum().reset_index()
grouped_counties

In [None]:
grouped_geo_counties = pd.merge(counties, grouped_counties, how='left', left_on='GEOID', right_on='GEOID').reset_index(drop=True)

grouped_geo_counties['Optometry'].fillna(0, inplace=True)
grouped_geo_counties['Ophthalmology'].fillna(0, inplace=True)
grouped_geo_counties['Others'].fillna(0, inplace=True)

grouped_geo_counties = grouped_geo_counties.astype({'Optometry': 'int', 'Ophthalmology': 'int', 'Others': 'int'})

grouped_geo_counties

In [None]:
grouped_geo_counties['INTPTLAT'] = pd.to_numeric(grouped_geo_counties['INTPTLAT'], errors='coerce')
grouped_geo_counties['INTPTLON'] = pd.to_numeric(grouped_geo_counties['INTPTLON'], errors='coerce')
''''
fig = px.choropleth_mapbox(grouped_geo_counties, 
                           geojson = grouped_geo_counties.geometry, 
                           locations = grouped_geo_counties.index, 
                           color = 'Optometry',
                           color_continuous_scale = "greens",
                           mapbox_style = "carto-positron",
                           zoom = 5, 
                           center = {"lat": grouped_geo_counties.INTPTLAT.mean(), "lon": grouped_geo_counties.INTPTLON.mean()},
                           opacity = 0.8,
                           hover_data = ["NAMELSAD"],
                           labels = {"Optometry":'Optometry'}
                          )

fig.update_layout(margin = {"r":0,"t":0,"l":0,"b":0})

fig.show()
'''

In [None]:
w = weights.Queen.from_dataframe(grouped_geo_counties)

w.transform = 'R'

# Calculate the spatial lag of the Total Providers count.
grouped_geo_counties['lag_Optometry'] = weights.lag_spatial(w, grouped_geo_counties['Optometry'])
grouped_geo_counties['lag_Ophthalmology'] = weights.lag_spatial(w, grouped_geo_counties['Ophthalmology'])
grouped_geo_counties

In [None]:
y = grouped_geo_counties['Optometry']

# Compute the spatial lag of your variable
ylag = weights.lag_spatial(w, y)

# Compute local Moran's I, which gives us our LISA statistics
lisa = esda.Moran_Local(y, w)

# Create a new column in your GeoDataFrame for the LISA values
grouped_geo_counties['lisa_values'] = lisa.Is

In [None]:
# Plot the LISA values
fig, ax = plt.subplots(figsize=(12,10))
grouped_geo_counties.plot(column='lisa_values', scheme='quantiles', cmap='Reds', legend=True, ax=ax)
plt.show()

In [None]:
# Define the significance level, for example, 0.05
alpha = 0.05

# Create a binary variable indicating whether the p-value is less than alpha
# These are the significant clusters
grouped_geo_counties['significant_clusters'] = lisa.p_sim < alpha

# Plot the significant clusters
fig, ax = plt.subplots(figsize=(12,10))
grouped_geo_counties.plot(column='significant_clusters', legend=True, ax=ax)
plt.show()

In [None]:
grouped_geo_counties['lisa_type'] = 'Not significant'
grouped_geo_counties.loc[(lisa.q==1) & (lisa.p_sim < alpha), 'lisa_type'] = 'High-High'
grouped_geo_counties.loc[(lisa.q==2) & (lisa.p_sim < alpha), 'lisa_type'] = 'Low-High'
grouped_geo_counties.loc[(lisa.q==3) & (lisa.p_sim < alpha), 'lisa_type'] = 'Low-Low'
grouped_geo_counties.loc[(lisa.q==4) & (lisa.p_sim < alpha), 'lisa_type'] = 'High-Low'

In [None]:
fig, ax = plt.subplots(figsize=(12,10))
grouped_geo_counties.plot(column='lisa_type', legend=True, cmap = 'coolwarm', ax=ax)
plt.show()

Census Tracts

In [None]:
tracts = gpd.read_file("../data/ILCensusTracts.zip")
tracts

In [None]:
tracts = tracts.to_crs(vision_providers_minimal.crs)

joined = gpd.sjoin(vision_providers_minimal, tracts, predicate='within')
joined

In [None]:
joined_tracts['Optometry'] = joined_tracts['Taxonomy'].apply(lambda x: any(code.startswith('152') for code in x.split('|')))
joined_tracts['Ophthalmology'] = joined_tracts['Taxonomy'].apply(lambda x: any(code.startswith('207') for code in x.split('|')))
joined_tracts['Others'] = joined_tracts['Taxonomy'].apply(lambda x: any(code.startswith('156') for code in x.split('|')))

joined_tracts

In [None]:
grouped_tracts = joined_tracts.groupby('GEOID')[['Optometry', 'Ophthalmology', 'Others']].sum().reset_index()
grouped_tracts

In [None]:
grouped_geo_tracts = pd.merge(tracts, grouped_tracts, how='left', left_on='GEOID', right_on='GEOID').reset_index(drop=True)

grouped_geo_tracts['Optometry'].fillna(0, inplace=True)
grouped_geo_tracts['Ophthalmology'].fillna(0, inplace=True)
grouped_geo_tracts['Others'].fillna(0, inplace=True)

grouped_geo_tracts = grouped_geo_tracts.astype({'Optometry': 'int', 'Ophthalmology': 'int', 'Others': 'int'})

grouped_geo_tracts

In [None]:
w = weights.Queen.from_dataframe(grouped_geo_tracts)

w.transform = 'R'

# Calculate the spatial lag of the Total Providers count.
grouped_geo_tracts['lag_Optometry'] = weights.lag_spatial(w, grouped_geo_tracts['Optometry'])
grouped_geo_tracts['lag_Ophthalmology'] = weights.lag_spatial(w, grouped_geo_tracts['Ophthalmology'])

grouped_geo_tracts

In [None]:
y = grouped_geo_tracts['Optometry']

# Compute the spatial lag of your variable
ylag = weights.lag_spatial(w, y)

# Compute local Moran's I, which gives us our LISA statistics
lisa = esda.Moran_Local(y, w)

# Create a new column in your GeoDataFrame for the LISA values
grouped_geo_tracts['lisa_values'] = lisa.Is

In [None]:
# Plot the LISA values
fig, ax = plt.subplots(figsize=(12,10))
grouped_geo_tracts.plot(column='lisa_values', scheme='quantiles', cmap='Reds', legend=True, ax=ax)
plt.show()

In [None]:
# Define the significance level, for example, 0.05
alpha = 0.05

# Create a binary variable indicating whether the p-value is less than alpha
# These are the significant clusters
grouped_geo_tracts['significant_clusters'] = lisa.p_sim < alpha

# Plot the significant clusters
fig, ax = plt.subplots(figsize=(12,10))
grouped_geo_tracts.plot(column='significant_clusters', legend=True, ax=ax)
plt.show()

In [None]:
grouped_geo_tracts['lisa_type'] = 'Not significant'
grouped_geo_tracts.loc[(lisa.q==1) & (lisa.p_sim < alpha), 'lisa_type'] = 'High-High'
grouped_geo_tracts.loc[(lisa.q==2) & (lisa.p_sim < alpha), 'lisa_type'] = 'Low-High'
grouped_geo_tracts.loc[(lisa.q==3) & (lisa.p_sim < alpha), 'lisa_type'] = 'Low-Low'
grouped_geo_tracts.loc[(lisa.q==4) & (lisa.p_sim < alpha), 'lisa_type'] = 'High-Low'

In [None]:
fig, ax = plt.subplots(figsize=(12,10))
grouped_geo_tracts.plot(column='lisa_type', legend=True, cmap = 'coolwarm', ax=ax)
plt.show()