In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import folium
import seaborn as sns
import plotly.express as px
import pyarrow.parquet as pq
import matplotlib.patches as mpatches

In [None]:
tracts = gpd.read_file("../data/ILCensusTracts.zip")
tracts

In [None]:
vision_providers_minimal = pq.read_table(source='../data/vision_providers_minimal.parquet').to_pandas()
vision_providers_minimal

In [None]:
tracts_IL = tracts[tracts['STATEFP'] == '17'].reset_index()
tracts_IL = tracts_IL.drop('index', axis = 1)
tracts_IL

In [None]:
vision_providers_minimal = vision_providers_minimal[vision_providers_minimal["Entity Type Code"] == 1].reset_index(drop=True)
vision_providers_illinois = vision_providers_minimal[vision_providers_minimal["Provider Business Mailing Address State Name"] == "IL"].reset_index(drop=True)
vision_providers_illinois

In [None]:
vision_providers_illinois = gpd.GeoDataFrame(vision_providers_illinois, geometry=gpd.points_from_xy(vision_providers_illinois.Longitude, vision_providers_illinois.Latitude), crs="EPSG:4326")
vision_providers_illinois

In [None]:
tracts_IL = tracts_IL.to_crs(vision_providers_illinois.crs)

joined = gpd.sjoin(vision_providers_illinois, tracts_IL, predicate='within')
joined

In [None]:
def classify_provider(taxonomy):
    codes = taxonomy.split('|')
    provider_types = set()  # using a set to avoid duplicates
    for code in codes:
        if code.startswith('152'):
            provider_types.add('Optometry')
        if code.startswith('207'):
            provider_types.add('Ophthalmology')
        if code.startswith('156'):
            provider_types.add('Other')
    # generating all combinations
    types_list = list(provider_types)
    if len(types_list) == 2:
        return ', '.join(sorted(types_list))
    elif types_list:
        return types_list[0]
    return 'Unknown'

joined['Provider Type'] = joined['Taxonomy'].apply(classify_provider)

joined

In [None]:
#joined.to_csv('IL_tracts_scatter.csv', index=False)


In [None]:
joined['Optometry'] = joined['Taxonomy'].apply(lambda x: any(code.startswith('152') for code in x.split('|')))
joined['Ophthalmology'] = joined['Taxonomy'].apply(lambda x: any(code.startswith('207') for code in x.split('|')))
joined['Others'] = joined['Taxonomy'].apply(lambda x: any(code.startswith('156') for code in x.split('|')))

joined

In [None]:
grouped = joined.groupby('GEOID')[['Optometry', 'Ophthalmology', 'Others']].sum().reset_index()
grouped

In [None]:
grouped_geo = pd.merge(tracts_IL, grouped, how='left', left_on='GEOID', right_on='GEOID').reset_index(drop=True)
grouped_geo['Total_Providers'] = grouped_geo['Optometry'] + grouped_geo['Ophthalmology'] + grouped_geo['Others']

grouped_geo['Optometry'].fillna(0, inplace=True)
grouped_geo['Ophthalmology'].fillna(0, inplace=True)
grouped_geo['Others'].fillna(0, inplace=True)
grouped_geo['Total_Providers'].fillna(0, inplace=True)

grouped_geo = grouped_geo.astype({'Optometry': 'int', 'Ophthalmology': 'int', 'Others': 'int', 'Total_Providers': 'int'})

grouped_geo

In [None]:
#grouped_geo.to_csv('IL_tracts_v2.csv', index=False)

In [None]:
fig, ax = plt.subplots(figsize=(8, 8))

tracts_IL.plot(color='lightgrey', ax=ax, edgecolor='grey')  

grouped_geo.plot(column='Total_Providers', 
               cmap='Reds', 
               linewidth=0.8, 
               ax=ax, 
               edgecolor='0.8',
               #scheme='quantiles',
               legend=True,
               missing_kwds={'color': 'lightgrey'})

ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')

plt.title('Illinois: Providers by Tract')
plt.show()

In [None]:
from libpysal import weights

w = weights.Queen.from_dataframe(grouped_geo)

# Standardize the weights matrix so that the weights of each location's neighbors sum to 1.
w.transform = 'R'

# Calculate the spatial lag of the Total Providers count.
grouped_geo['lag_Total_Providers'] = weights.lag_spatial(w, grouped_geo['Total_Providers'])
grouped_geo

In [None]:
fig, ax = plt.subplots(figsize=(10, 4))

#tracts_OK.plot(color='lightgrey', ax=ax, edgecolor='grey')  

grouped_geo.plot(column='lag_Total_Providers', 
               cmap='Reds', 
               linewidth=0.8, 
               ax=ax, 
               edgecolor='0.8',
               #scheme='quantiles',
               legend=True,
               missing_kwds={'color': 'lightgrey'})

ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')

plt.title('Illinois: Lag Providers by Tract')
plt.show()

In [None]:
from pysal.explore import esda

y = grouped_geo['Total_Providers']

# Compute the spatial lag of your variable
ylag = weights.lag_spatial(w, y)

# Compute local Moran's I, which gives us our LISA statistics
lisa = esda.Moran_Local(y, w)

# Create a new column in your GeoDataFrame for the LISA values
grouped_geo['lisa_values'] = lisa.Is

# Plot the LISA values
fig, ax = plt.subplots(figsize=(12,10))
grouped_geo.plot(column='lisa_values', scheme='quantiles', cmap='Reds', legend=True, ax=ax)
plt.show()


In [None]:
# Define the significance level, for example, 0.05
alpha = 0.05

# Create a binary variable indicating whether the p-value is less than alpha
# These are your significant clusters
grouped_geo['significant_clusters'] = lisa.p_sim < alpha

# Plot the significant clusters
fig, ax = plt.subplots(figsize=(12,10))
grouped_geo.plot(column='significant_clusters', legend=True, ax=ax)
plt.show()


In [None]:
# Create a variable for the type of spatial pattern
grouped_geo['lisa_type'] = 'Not significant'
grouped_geo.loc[(lisa.q==1) & (lisa.p_sim < alpha), 'lisa_type'] = 'High-High'
grouped_geo.loc[(lisa.q==2) & (lisa.p_sim < alpha), 'lisa_type'] = 'Low-High'
grouped_geo.loc[(lisa.q==3) & (lisa.p_sim < alpha), 'lisa_type'] = 'Low-Low'
grouped_geo.loc[(lisa.q==4) & (lisa.p_sim < alpha), 'lisa_type'] = 'High-Low'
grouped_geo

In [None]:
grouped_geo.to_csv('IL_tracts_clusters.csv', index=False)

In [None]:
'''fig, ax = plt.subplots(figsize=(12,10))
grouped_geo.plot(column='lisa_type', legend=True, cmap = 'coolwarm', ax=ax)
plt.show()
'''

grouped_geo['INTPTLAT'] = pd.to_numeric(grouped_geo['INTPTLAT'], errors='coerce')
grouped_geo['INTPTLON'] = pd.to_numeric(grouped_geo['INTPTLON'], errors='coerce')

fig = px.choropleth_mapbox(grouped_geo, 
                           geojson = grouped_geo.geometry, 
                           locations = grouped_geo.index, 
                           color = 'lisa_type',
                           color_continuous_scale = "coolwarm",
                           mapbox_style = "carto-positron",
                           zoom = 5, 
                           center = {"lat": grouped_geo.INTPTLAT.mean(), "lon": grouped_geo.INTPTLON.mean()},
                           opacity = 0.8,
                           hover_data = ["NAMELSAD"],
                           labels = {"lisa_type":'lisa type'}
                          )

fig.update_layout(margin = {"r":0,"t":0,"l":0,"b":0})

fig.show()


In [None]:
# Create a copy of the DataFrame first
significant_grouped_geo = grouped_geo.copy()
significant_grouped_geo = significant_grouped_geo[significant_grouped_geo['lisa_type'] != 'Not significant']

# Then reset the index
significant_grouped_geo.reset_index(drop = True, inplace=True)

# Now you can safely make changes
significant_grouped_geo['INTPTLAT'] = pd.to_numeric(significant_grouped_geo['INTPTLAT'], errors='coerce')
significant_grouped_geo['INTPTLON'] = pd.to_numeric(significant_grouped_geo['INTPTLON'], errors='coerce')
significant_grouped_geo

In [None]:
fig, ax = plt.subplots(figsize=(12,10))
significant_grouped_geo.plot(column='lisa_type', legend=True, cmap = 'coolwarm', ax=ax)
plt.show()

In [None]:
fig = px.choropleth_mapbox(significant_grouped_geo, 
                           geojson = significant_grouped_geo.geometry, 
                           locations = significant_grouped_geo.index, 
                           color = 'lisa_type',
                           color_continuous_scale = "coolwarm",
                           mapbox_style = "carto-positron",
                           zoom = 5, 
                           center = {"lat": significant_grouped_geo.INTPTLAT.mean(), "lon": significant_grouped_geo.INTPTLON.mean()},
                           opacity = 0.8,
                           hover_data = ["NAMELSAD"],
                           labels = {"lisa_type":'LISA Type'}
                          )

fig.update_layout(margin = {"r":0,"t":0,"l":0,"b":0})

fig.show()
