In [1]:
import json
import os
import random
import time

In [2]:
import cloudscraper
import geojson
import geopandas as gpd
import h3
import pandas as pd

In [3]:
from shapely.geometry import Point
from tqdm.notebook import tqdm

## Get State Data

In [4]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [5]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get Population data

In [6]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [7]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [8]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

## Get Sheetz Data

In [10]:
scraper = cloudscraper.create_scraper()

In [28]:
url = "https://royalfarms.com/wp-admin/admin-ajax.php"
params = {
    "action": "store_search",
    "lat": 38.91207,
    "lng": -77.01902,
    "max_results": 50,
    "search_radius": 500,
}

In [29]:
r = scraper.get(url=url, params=params)

In [30]:
len(r.json())

50

In [10]:
def format_dictionary(store_json: dict) -> dict:
    try:
        store_dict = {
            "ID": store_json["storeNumber"],
            "ADDRESS": store_json["address"]["address"],
            "CITY": store_json["address"]["city"],
            "STUSPS": store_json["address"]["state"],
            "ZIP": store_json["address"]["zip"],
            "geometry": Point(
                store_json["coordinates"]["longitude"],
                store_json["coordinates"]["latitude"],
            ),
        }
        return store_dict
    except Exception as e:
        print(e)

In [11]:
url = "https://www.wawa.com/api/bff"
headers = {
    "accept": "application/json",
    "accept-language": "en-US,en;q=0.9,ru-RU;q=0.8,ru;q=0.7",
    "content-type": "application/json",
    "priority": "u=1, i",
    "sec-ch-ua": '"Google Chrome";v="137", "Chromium";v="137", "Not/A)Brand";v="24"',
    "sec-ch-ua-mobile": "?0",
    "sec-ch-ua-platform": '"Windows"',
    "sec-fetch-dest": "empty",
    "sec-fetch-mode": "cors",
    "sec-fetch-site": "same-origin",
    "cookie": "goose_media_id=a3a4a09d-6606-45d7-bcf7-2a277780c9fd; visid_incap_2023331=uFP4rvVcQySotU5k9QYt+ucfQ2gAAAAAQUIPAAAAAACYUnnVYJv6xY4Wfj9mOjZ2; nlbi_2023331=D/KaUQDN9CNkq7qTeuoTvQAAAACoe94tN3ArQ0JqGcKFYKHj; _gcl_au=1.1.1616234870.1749229544; dtm_token_sc=AQAIhIq0ZrNS3AFWmw-DAQBCagABAQCWRzWnqQEBAJZHNaep; _ga=GA1.1.469775265.1749229544; dtm_token=AQAIhIq0ZrNS3AFWmw-DAQBCagABAQCWRzWnqQEBAJZHNaep; _pin_unauth=dWlkPU1EZGtPVFEwTkdVdE5XUTFOaTAwWlRJNExUbGpaamt0Wm1JMlpqSXhPV1UwWmpjeQ; __adroll_fpc=8644c1d590b818b99a9937fe8eb7c547-1749229544370; _fbp=fb.1.1749229544647.547007493338514311; incap_ses_543_2023331=t++zBjYhPTQ1Irlf8x+JB/FTQ2gAAAAAX7p2jD1rxnxJV9T3ULsyrQ==; reese84=3:U2WnShWOBEusi6oiSkI92Q==:WtaBRDDGmy/Hibp1umWuBXCkXW8TfaVHCXPO+r7pCrZdVaj9UP7vM4RI3/ngdq1UaNmOw7CtfQ+F5UG3t+CQfsZSxGSPtSFXZeo4VYtZ4Q+NoJ5MlGNwrDNkhgmTB9RMUrrmYwama19GBpodGmI2sowwLd+py/wt2V+is6/I2dxZz8ckZpc/uqJDzLXDRGY6LHmjeA83EgNpA6mqsZXMGxEAJGnZgoCYrBlPzm9elbwoFtlUk/IFo+Z7lSugtausV+AxMrcSGWJUzCnuA78leF2kZRId95/wUod7H3Unse+0fFboAR3iwCUFJPecvmNeFzv1f1tVs7sHR7uIiwpzCWeDV93LqgDN9I5lWtZPPz/9t/9iO3nRQIRcynSa5kpSI5l/sWBEVErpghKVTbMD9MRmMKAd00xgkTe49ncNJyc6hjv9JxDeNDo/fRmxYfDwMek1rrv+3rnrb48CKEzr2g==:wZ5bK+b9YTcVgQQZTmUofqbUBfX/H3u/iRBBlxoWhIg=; visid_incap_2960641=BeBQ73zCQy+ILXNxDvFMMSFZQ2gAAAAAQUIPAAAAAADThK1W4BG0hRm7q4D5+Aah; nlbi_2960641=1ISyc8DX6Bb5M/pWdg1VMgAAAABFiibk5eLfSUw//KcQbTyV; incap_ses_543_2960641=Q6XcdUlpGQXnxLtf8x+JByJZQ2gAAAAAxD4aouhH+KC6ZMeeiupuwQ==; nlbi_2960641_2147483392=32rwfWj6DUqqERiBdg1VMgAAAADYt+w3a8zWL0KlrzP4Uk0s; ajs_anonymous_id=5c3374b3-f431-487b-8362-bf01255882d5; locationData=city%3DPuyallup%26zip%3D98374%26state%3DWashington; incap_ses_731_2023331=SYH0V76F3XG0FU7e9gglCtRaQ2gAAAAAtY7XIUMoHNF0inqCtSFqAA==; _uetsid=7b94f24042f811f0b8faa926e9883e98; _uetvid=7b94ed0042f811f0b03159b2bf32b2aa; __ar_v4=UXVY5L7DHBCKFFRG7O4SIP%3A20250606%3A21%7CYCCJXQGXA5A3XDTUGOEF2R%3A20250606%3A21; nlbi_2023331_2888890=/5qHQFeiHw5NQjrveuoTvQAAAACmBblv5Ea82CxMjoQJ5uoo; nlbi_2023331_2147483392=CuLLeOmlTlDdS1RteuoTvQAAAABJ+n8O4Sf9K8y+KsTJP442; _ga_KE0Z84815N=GS2.1.s1749242866$o2$g1$t1749244635$j52$l0$h0; _ga_QT0F48BMBE=GS2.1.s1749242866$o2$g1$t1749244635$j52$l0$h0",
    "Referer": "https://www.wawa.com/locations/store-locator",
    "Referrer-Policy": "strict-origin-when-cross-origin",
}

In [17]:
r.url

'https://www.wawa.com/api/bff'

In [14]:
store_list = []
for i in tqdm(range(10000), desc="IDs"):
    body = json.loads(
        '{"query":"query StoreSearchById($storeId: String!) {\\n  storeLocation(storeId: $storeId) {\\n    name\\n    isActive\\n    isStoreOpen\\n    storeOpen\\n    storeClose\\n    scheduleType\\n    storeNumber\\n    coordinates {\\n      latitude\\n      longitude\\n    }\\n    address {\\n      address\\n      city\\n      state\\n      zip\\n    }\\n  }\\n}\\n","variables":{"storeId":"8060"}}'
    )
    body["variables"] = {"storeId": "{:04d}".format(i)}
    r = scraper.post(url=url, data=json.dumps(body), headers=headers)
    resp_json = r.json()
    if "errors" not in resp_json:
        store_json = resp_json["data"]["storeLocation"]
        store_dict = format_dictionary(store_json)
        store_list.append(store_dict)

    time.sleep(random.uniform(0.01, 0.5))

IDs:   0%|          | 0/1000 [00:00<?, ?it/s]

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [47]:
sheetz_locations_gdf = gpd.GeoDataFrame(store_dicts, crs=4326)

In [48]:
sheetz_locations_gdf.to_file("data/sheetz.gpkg")

In [49]:
sheetz_state_counts_df = (
    sheetz_locations_gdf.groupby("STUSPS")
    .size()
    .reset_index()
    .rename(columns={0: "sheetz"})
)

## Combine With States

In [50]:
sheetz_state_counts_gdf = states_with_population_df.merge(
    sheetz_state_counts_df, on="STUSPS", how="left"
)

In [51]:
sheetz_state_counts_gdf = sheetz_state_counts_gdf.fillna(0)
sheetz_state_counts_gdf["sheetz"] = sheetz_state_counts_gdf["sheetz"].astype(int)

In [52]:
sheetz_state_counts_gdf["per_100k"] = sheetz_state_counts_gdf["sheetz"] / (
    sheetz_state_counts_gdf["POPULATION"] / 100_000
)
sheetz_state_counts_gdf["per_1m"] = sheetz_state_counts_gdf["sheetz"] / (
    sheetz_state_counts_gdf["POPULATION"] / 1_000_000
)

In [53]:
sheetz_state_counts_gdf = sheetz_state_counts_gdf.fillna(0)

In [54]:
sheetz_state_counts_gdf = sheetz_state_counts_gdf.to_crs(3968)
sheetz_state_counts_gdf.to_file(f"data/sheetz_states.gpkg")