In [1]:
import json

In [2]:
import geopandas as gpd
import numpy as np
import pandas as pd

## Open State Data

In [3]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_gdf = gpd.read_file(file_path)

In [4]:
states_gdf = states_gdf[["STUSPS", "NAME", "geometry"]]

In [5]:
states_gdf = states_gdf[
    ~states_gdf["STUSPS"].isin(["MP", "PR", "VI", "GU"])
].reset_index()[["geometry"]]

## Open 2023 Population Estimates

In [25]:
county_populations = pd.read_csv(
    "data/co-est2023-alldata.csv", sep=",", encoding="latin-1"
)

In [26]:
county_populations["FIPS"] = county_populations.apply(
    lambda row: f"{row['STATE']:02d}{row['COUNTY']:03d}", axis=1
)

In [27]:
county_populations_2023_df = county_populations[["POPESTIMATE2023", "FIPS"]].dropna()

## Open County Data (Better Geometryy Values)

In [28]:
file_path = "data/cb_2018_us_county_500k/cb_2018_us_county_500k.shp"
counties__better_geo_gdf = gpd.read_file(file_path)

In [29]:
usa_union = counties__better_geo_gdf.geometry.union_all()

## Open County Data (Correct CT Values)

In [11]:
file_path = "data/tl_2024_us_county/tl_2024_us_county.shp"
counties_gdf = gpd.read_file(file_path)

In [12]:
counties_gdf = counties_gdf.rename(columns={"GEOID": "FIPS"})

In [13]:
county_populations_2023_gdf = counties_gdf.merge(
    county_populations_2023_df, on="FIPS", how="left"
)

## Clip Counties

In [17]:
clipped_counties_gdf = gpd.clip(county_populations_2023_gdf, mask=usa_union)

## Get McDonalds Data

In [30]:
mcdonalds_gdf = gpd.read_file("data/mcdonalds_va_h3_5.gpkg")
mcdonalds_gdf = mcdonalds_gdf.to_crs(clipped_counties_gdf.crs)

## Intersect Data

In [32]:
county_populations_2023_gdf = county_populations_2023_gdf.to_crs(mcdonalds_gdf.crs.srs)

In [33]:
county_with_counts_gdf = county_populations_2023_gdf.join(
    gpd.sjoin(mcdonalds_gdf, county_populations_2023_gdf)
    .groupby("index_right")
    .size()
    .rename("stores"),
    how="left",
)

In [34]:
county_with_counts_gdf = county_with_counts_gdf.fillna(0)

In [35]:
county_with_counts_gdf["stores"] = county_with_counts_gdf["stores"].astype(int)

In [36]:
county_with_counts_gdf["stores"] = county_with_counts_gdf["stores"].astype(int)
county_with_counts_gdf = county_with_counts_gdf[
    ["STATEFP", "FIPS", "geometry", "stores", "NAME", "POPESTIMATE2023"]
]

In [44]:
county_with_counts_gdf["per_capita"] = county_with_counts_gdf["POPESTIMATE2023"].div(
    county_with_counts_gdf["stores"]
)

In [45]:
county_with_counts_gdf.replace(np.inf, 0, inplace=True)
county_with_counts_gdf = county_with_counts_gdf.dropna()

In [46]:
county_with_counts_gdf["per_capita"] = county_with_counts_gdf["per_capita"].astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [47]:
county_with_counts_gdf["per_100k"] = county_with_counts_gdf["stores"] / (
    county_with_counts_gdf["POPESTIMATE2023"] / 100_000
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [52]:
county_with_counts_gdf.replace([np.inf, -np.inf], np.nan, inplace=True)

In [56]:
county_with_counts_gdf["per_100k"].std()

np.float64(3.7198902821990356)

In [None]:
county_with_counts_gdf = county_with_counts_gdf.to_crs(9311)

In [None]:
county_with_counts_gdf

In [None]:
len(county_with_counts_gdf)

In [None]:
county_with_counts_clipped_gdf = gpd.clip(
    county_with_counts_gdf,
    mask=states_gdf.to_crs(county_with_counts_gdf.crs).union_all(),
    keep_geom_type=True,
)

In [105]:
county_with_counts_clipped_gdf = county_with_counts_clipped_gdf.dissolve(by="FIPS")

In [106]:
county_with_counts_clipped_gdf.to_file("data/counties_per_capita.gpkg")

In [None]:
len(county_with_counts_gdf)