In [4]:
import geopandas as gpd

## Get County Data

In [5]:
file_path = "data/cb_2018_us_county_500k/cb_2018_us_county_500k.shp"
counties_gdf = gpd.read_file(file_path)

In [6]:
counties_gdf = counties_gdf.to_crs(9311)

## Open Dollar General Data

In [7]:
dollar_general_gdf = gpd.read_file("data/dollar_generals.gpkg")
dollar_general_gdf = dollar_general_gdf.to_crs(9311)

## Open Hunt Brothers Data

In [8]:
hunt_brothers_gdf = gpd.read_file("data/hunt_brothers.gpkg")
hunt_brothers_gdf = hunt_brothers_gdf.to_crs(9311)

## Intersect Data

In [9]:
counties_dollar_general_count_gdf = counties_gdf.join(
    gpd.sjoin(dollar_general_gdf, counties_gdf)
    .groupby("index_right")
    .size()
    .rename("stores"),
    how="left",
)

In [10]:
counties_dollar_general_count_gdf = counties_dollar_general_count_gdf.fillna(0)
counties_dollar_general_count_gdf["dg"] = counties_dollar_general_count_gdf[
    "stores"
].astype(int)
counties_dollar_general_count_gdf = counties_dollar_general_count_gdf[
    ["GEOID", "geometry", "dg", "NAME"]
]

In [11]:
counties_hunt_brothers_count_gdf = counties_gdf.join(
    gpd.sjoin(hunt_brothers_gdf, counties_gdf)
    .groupby("index_right")
    .size()
    .rename("stores"),
    how="left",
)

In [12]:
counties_hunt_brothers_count_gdf = counties_hunt_brothers_count_gdf.fillna(0)
counties_hunt_brothers_count_gdf["hb"] = counties_hunt_brothers_count_gdf[
    "stores"
].astype(int)
counties_hunt_brothers_count_gdf = counties_hunt_brothers_count_gdf[
    ["GEOID", "geometry", "hb", "NAME"]
]

#### Merge Data now

In [13]:
dg_hb_gdf = counties_dollar_general_count_gdf.merge(
    counties_hunt_brothers_count_gdf[["GEOID", "hb"]],
    on="GEOID",
    how="left",
)

In [14]:
def dg_hb_classification(row) -> str:
    dg = row["dg"]
    hb = row["hb"]
    if hb == 0 and dg == 0:
        return "no stores"
    if dg == hb:
        return "same amount"
    if dg > hb:
        return "more dollar generals"
    if dg < hb:
        return "more hunt brothers"

In [15]:
dg_hb_gdf["dg_or_hb"] = dg_hb_gdf.apply(lambda row: dg_hb_classification(row), axis=1)

In [16]:
dg_hb_gdf = dg_hb_gdf.to_crs(9311)

In [17]:
dg_hb_gdf.to_file("data/dg_hb_gdf.gpkg")