In [1]:
import cloudscraper
import geopandas as gpd
import pandas as pd

In [23]:
from shapely.geometry import Point

## Get State Data

In [24]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [25]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get Population data

In [26]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [27]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [28]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

## Get Sheetz Data

In [29]:
scraper = cloudscraper.create_scraper()

In [30]:
url = "https://orders.sheetz.com/anybff/api/stores/search"
params = {"latitude": 40.47275, "longitude": -78.42507, "page": 1, "size": 100}

headers = {
    "accept": "application/json, text/plain, */*",
    "accept-language": "en-US,en;q=0.9,ru-RU;q=0.8,ru;q=0.7",
    "client-version": "2.48.0-4570",
    "content-type": "application/json",
    "priority": "u=1, i",
    "sec-ch-ua": '"Google Chrome";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
    "sec-ch-ua-mobile": "?0",
    "sec-ch-ua-platform": '"Windows"',
    "sec-fetch-dest": "empty",
    "sec-fetch-mode": "cors",
    "sec-fetch-site": "same-origin",
    "cookie": "",  # Get from https://orders.sheetz.com/findASheetz
    "Referer": "https://orders.sheetz.com/findASheetz",
    "Referrer-Policy": "strict-origin-when-cross-origin",
}

In [38]:
store_dicts = []
# If more sheetz change page limit to 9 or 10
for i in range(8):
    params = {"latitude": 40.47275, "longitude": -78.42507, "page": i, "size": 100}
    r = scraper.post(url, headers=headers, data="{}", params=params)
    stores_json = r.json()["stores"]
    for store in stores_json:
        store_id = store["storeNumber"]
        state_code = store["state"]
        point = Point(store["longitude"], store["latitude"])
        store_dicts.append({"geometry": point, "STUSPS": state_code, "ID": store_id})
    print(i, len(stores_json))

0 100
1 100
2 100
3 100
4 100
5 100
6 100
7 75


In [47]:
sheetz_locations_gdf = gpd.GeoDataFrame(store_dicts, crs=4326)

In [48]:
sheetz_locations_gdf.to_file("data/sheetz.gpkg")

In [49]:
sheetz_state_counts_df = (
    sheetz_locations_gdf.groupby("STUSPS")
    .size()
    .reset_index()
    .rename(columns={0: "sheetz"})
)

## Combine With States

In [50]:
sheetz_state_counts_gdf = states_with_population_df.merge(
    sheetz_state_counts_df, on="STUSPS", how="left"
)

In [51]:
sheetz_state_counts_gdf = sheetz_state_counts_gdf.fillna(0)
sheetz_state_counts_gdf["sheetz"] = sheetz_state_counts_gdf["sheetz"].astype(int)

In [52]:
sheetz_state_counts_gdf["per_100k"] = sheetz_state_counts_gdf["sheetz"] / (
    sheetz_state_counts_gdf["POPULATION"] / 100_000
)
sheetz_state_counts_gdf["per_1m"] = sheetz_state_counts_gdf["sheetz"] / (
    sheetz_state_counts_gdf["POPULATION"] / 1_000_000
)

In [53]:
sheetz_state_counts_gdf = sheetz_state_counts_gdf.fillna(0)

In [54]:
sheetz_state_counts_gdf = sheetz_state_counts_gdf.to_crs(3968)
sheetz_state_counts_gdf.to_file(f"data/sheetz_states.gpkg")