In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import folium
import seaborn as sns
import plotly.express as px
import pyarrow.parquet as pq
import matplotlib.patches as mpatches

In [None]:
vision_providers_minimal = pq.read_table(source='../data/vision_providers_minimal.parquet').to_pandas()
vision_providers_minimal

In [None]:
states = gpd.read_file("../data/US_State_Boundaries.zip")
states

In [None]:
states = states.drop([21, 22], axis=0)
states = states.reset_index()
states

In [None]:
vision_providers_minimal = vision_providers_minimal[vision_providers_minimal["Entity Type Code"] == 1].reset_index(drop=True)
vision_providers_minimal

In [None]:
vision_providers_minimal = gpd.GeoDataFrame(vision_providers_minimal, geometry=gpd.points_from_xy(vision_providers_minimal.Longitude, vision_providers_minimal.Latitude), crs="EPSG:4326")
vision_providers_minimal

In [None]:
states = states.to_crs(vision_providers_minimal.crs)

joined = gpd.sjoin(vision_providers_minimal, states, predicate='within')
joined

In [None]:
joined['Optometry'] = joined['Taxonomy'].apply(lambda x: any(code.startswith('152') for code in x.split('|')))
joined['Ophthalmology'] = joined['Taxonomy'].apply(lambda x: any(code.startswith('207') for code in x.split('|')))
joined['Others'] = joined['Taxonomy'].apply(lambda x: any(code.startswith('156') for code in x.split('|')))

joined

In [None]:
grouped = joined.groupby('NAME')[['Optometry', 'Ophthalmology', 'Others']].sum().reset_index()
grouped.head()

In [None]:
grouped_geo = pd.merge(states, grouped, how='inner', left_on='NAME', right_on='NAME').reset_index(drop=True)
grouped_geo['Total_Providers'] = grouped_geo['Optometry'] + grouped_geo['Ophthalmology'] + grouped_geo['Others']

grouped_geo['Optometry'].fillna(0, inplace=True)
grouped_geo['Ophthalmology'].fillna(0, inplace=True)
grouped_geo['Others'].fillna(0, inplace=True)
grouped_geo['Total_Providers'].fillna(0, inplace=True)

grouped_geo = grouped_geo.astype({'Optometry': 'int', 'Ophthalmology': 'int', 'Others': 'int', 'Total_Providers': 'int'})

grouped_geo

In [None]:
grouped_geo['Optometrists_Per_100000'] = grouped_geo['Optometry'] / grouped_geo['POP'] * 100000
grouped_geo

In [None]:
grouped_geo['Ophthalmologists_Per_100000'] = grouped_geo['Ophthalmology'] / grouped_geo['POP'] * 100000
grouped_geo