In [1]:
import json
import os
import random
import time

In [2]:
import h3
import geopandas as gpd
import pandas as pd

In [3]:
from kroger_api import KrogerAPI
from kroger_api.utils.env import load_and_validate_env, get_zip_code
from shapely.geometry import Point
from tqdm.notebook import tqdm

## Get State Data

In [4]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [5]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get Population data

In [6]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [7]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [8]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

## Get Store Data

In [9]:
load_and_validate_env(["KROGER_CLIENT_ID", "KROGER_CLIENT_SECRET"])
zip_code = get_zip_code(default="10001")

In [10]:
kroger = KrogerAPI()
token_info = kroger.authorization.get_token_with_client_credentials("product.compact")

Found saved token, will test if it's still valid...
Token appears invalid, requesting a new one


In [11]:
res = 3
distance = 0.5

usa_geom = states_df[~states_df["STUSPS"].isin(["PR", "VI", "GU", "MP", "AS"])][
    "geometry"
].union_all()

h3_shape = h3.geo_to_h3shape(usa_geom.buffer(distance=distance))
h3_cells = h3.h3shape_to_cells(h3_shape, res=res)
geojson = h3.cells_to_geo(h3_cells)

In [12]:
stores = []
for cell in tqdm(h3_cells, desc="Parsing Stores"):
    lat, lng = h3.cell_to_latlng(cell)
    locations = kroger.location.search_locations(
        lat=lat, lon=lng, radius_in_miles=100, limit=200
    )
    if len(locations["data"]):
        for location in locations["data"]:
            location["geometry"] = Point(
                location["geolocation"]["longitude"],
                location["geolocation"]["latitude"],
            )
            location["STUSPS"] = location["address"]["state"]
            stores.append(location)
    else:
        continue
    time.sleep(random.uniform(0.01, 0.5))

Parsing Stores:   0%|          | 0/911 [00:00<?, ?it/s]

In [45]:
stores_gdf = gpd.GeoDataFrame(stores, crs=4326)
stores_gdf = stores_gdf.drop_duplicates("locationId")
stores_gdf.to_file("data/all_stores.gpkg")

In [100]:
stores_gdf = gpd.read_file("data/all_stores.gpkg")

In [101]:
kroger_gdf = stores_gdf[stores_gdf["chain"] == "KROGER"]
kroger_gdf = kroger_gdf[
    kroger_gdf["name"].str.split(" - ").str[0].isin(["Kroger", "Kroger Marketplace"])
]
kroger_gdf = kroger_gdf[
    ~kroger_gdf["name"].str.split(" ").str[-1].isin(["Spoke", "Fuel", "Shed"])
]
kroger_gdf = kroger_gdf[~kroger_gdf["departments"].isna()].reset_index(drop=True)

In [102]:
kroger_gdf = kroger_gdf.to_crs(2274)
kroger_gdf.to_file("data/stores.gpkg")

In [103]:
kroger_count_df = kroger_gdf.groupby("STUSPS").size().reset_index(name="COUNT")

## Combine With States

In [104]:
store_count_gdf = states_with_population_df.merge(
    kroger_count_df, on="STUSPS", how="left"
)

In [105]:
store_count_gdf = store_count_gdf.dropna()

In [106]:
store_count_gdf["per_100k"] = (
    store_count_gdf["COUNT"] / (store_count_gdf["POPULATION"] / 100_000)
).round(decimals=2)
store_count_gdf["per_1m"] = (
    store_count_gdf["COUNT"] / (store_count_gdf["POPULATION"] / 1_000_000)
).round(decimals=2)

In [107]:
store_count_gdf = store_count_gdf.to_crs(2274)
store_count_gdf.to_file(f"data/Krogers_Per_State.gpkg")

In [108]:
store_count_gdf.sort_values("per_1m", ascending=False)

Unnamed: 0,STUSPS,NAME,POPULATION,geometry,COUNT,per_100k,per_1m
47,KY,Kentucky,4550595.0,"MULTIPOLYGON (((968007.228 816363.1, 970232.76...",107.0,2.35,23.51
4,WV,West Virginia,1770495.0,"POLYGON ((2934150.498 1413209.602, 2934202.45 ...",37.0,2.09,20.9
48,OH,Ohio,11824034.0,"MULTIPOLYGON (((2865718.708 2665958.06, 286619...",196.0,1.66,16.58
23,TN,Tennessee,7148304.0,"POLYGON ((677977.337 272663.652, 678190.718 27...",115.0,1.61,16.09
18,GA,Georgia,11064432.0,"MULTIPOLYGON (((3446707.799 -1066701.233, 3447...",165.0,1.49,14.91
53,IN,Indiana,6880131.0,"POLYGON ((1362895.418 1306544.785, 1363858.43 ...",101.0,1.47,14.68
6,MI,Michigan,10083356.0,"MULTIPOLYGON (((2735910.388 2820304.362, 27363...",116.0,1.15,11.5
0,MS,Mississippi,2943172.0,"MULTIPOLYGON (((1174302.608 -1491153.928, 1177...",29.0,0.99,9.85
52,AR,Arkansas,3069463.0,"POLYGON ((-561356.152 899942.221, -561302.258 ...",26.0,0.85,8.47
3,VA,Virginia,8734685.0,"MULTIPOLYGON (((4928230.479 1420441.13, 492842...",67.0,0.77,7.67
