In [92]:
import json
import os
import random
import time

In [93]:
import cloudscraper
import geojson
import geopandas as gpd
import h3
import pandas as pd

In [94]:
from geojson import Feature, FeatureCollection, Polygon
from shapely.geometry import Point
from tqdm.notebook import tqdm

## Get State Data

In [95]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [96]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get Population data

In [97]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [98]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [99]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

## Get 7/11

In [100]:
scraper = cloudscraper.create_scraper()

In [101]:
url = "https://apis.7-eleven.com//v5/stores/graphql"
headers = {
    "accept": "*/*",
    "accept-language": "en-US,en;q=0.9,ru-RU;q=0.8,ru;q=0.7",
    "authorization": "Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzbDNyZ2RVNWM1WnZzWWo5NUZHSXVleGF1NU50N0o1T1RmN1ZSUGZWIiwic2NvcGUiOiJyZWFkX3N0b3JlcyByZWFkX2NvbmZpZyByZXNldF9wYXNzd29yZCBlbWFpbF9zdWJzY3JpcHRpb25zIGNvbXBsaWFuY2VfcmVxdWVzdCIsImdyYW50X3R5cGUiOiJjbGllbnRfY3JlZGVudGlhbCIsImV4cCI6MTc0ODcxMzU0NCwiaWF0IjoxNzQ4NjI3MTQ0fQ.JISMOfgXgJaNtCWckBy7U9irc_B62D_8Pcv5KvNx8hgync2nU17cX9bvMwMXuIx-UvzDj0qld5YEDvIqfv0oaKWkkvfFN7EWQKn4NSnMOGpUJJcZuF7D8s-ggy-tWN2RYTqii3j1B0VQPy1AMqNx_rKUWptWJ56AvuLVel729LwTjqQucjlCmIKguqp3LLAJbp1tRIEuJxGZruwT_tnBwDm0M7Vqdm2VcicWfARIUMK0Giqku2G6eDpxsOOYwnrR0lrzS1Wx9ql9QoXI_UjkXukB_s2g8ocYkrOdP32iuVPMhQgT1HEmq55xJddtgfpkeGdwF5gX5LD-XQwhsXMMDQ",
    "content-type": "application/json",
    "sec-ch-ua": '"Chromium";v="136", "Google Chrome";v="136", "Not.A/Brand";v="99"',
    "sec-ch-ua-mobile": "?0",
    "sec-ch-ua-platform": '"Windows"',
    "sec-fetch-dest": "empty",
    "sec-fetch-mode": "cors",
    "sec-fetch-site": "same-site",
    "x-sei-device-id": "e9d07f02-6251-47b4-9f35-4426ae736397",
    "x-sei-platform": "us_web",
    "x-sei-tag-id": "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzbDNyZ2RVNWM1WnZzWWo5NUZHSXVleGF1NU50N0o1T1RmN1ZSUGZWIiwic2NvcGUiOiJyZWFkX3N0b3JlcyByZWFkX2NvbmZpZyByZXNldF9wYXNzd29yZCBlbWFpbF9zdWJzY3JpcHRpb25zIGNvbXBsaWFuY2VfcmVxdWVzdCIsImdyYW50X3R5cGUiOiJjbGllbnRfY3JlZGVudGlhbCIsImV4cCI6MTc0ODcxMzU0NCwiaWF0IjoxNzQ4NjI3MTQ0fQ.JISMOfgXgJaNtCWckBy7U9irc_B62D_8Pcv5KvNx8hgync2nU17cX9bvMwMXuIx-UvzDj0qld5YEDvIqfv0oaKWkkvfFN7EWQKn4NSnMOGpUJJcZuF7D8s-ggy-tWN2RYTqii3j1B0VQPy1AMqNx_rKUWptWJ56AvuLVel729LwTjqQucjlCmIKguqp3LLAJbp1tRIEuJxGZruwT_tnBwDm0M7Vqdm2VcicWfARIUMK0Giqku2G6eDpxsOOYwnrR0lrzS1Wx9ql9QoXI_UjkXukB_s2g8ocYkrOdP32iuVPMhQgT1HEmq55xJddtgfpkeGdwF5gX5LD-XQwhsXMMDQ",
    "x-sei-trip-id": "MDUwMzIyNmEzZTkwMDEwYWU3YzJkOTE4YWY2NDFjOGE=",
    "x-sei-tz": "-07:00",
    "x-sei-version": "3.6.0",
    "Referer": "https://www.7-eleven.com/",
    "Referrer-Policy": "strict-origin-when-cross-origin",
}

In [102]:
def h3_to_geojson_feature(h3_cell):
    # Get boundary coordinates (as [lat, lng])
    boundary = h3.cell_to_boundary(h3_cell)

    # Convert to [lng, lat] for GeoJSON
    coords = [[lon, lat] for lat, lon in boundary]

    # Close the polygon by repeating the first point
    coords.append(coords[0])

    return Feature(geometry=Polygon([coords]), properties={"h3_index": h3_cell})

In [113]:
national_restaurant_list = []
res = 3
for i in tqdm(range(len(states_df)), desc="States"):
    state_store_list = []

    state_code = states_df.iloc[i]["STUSPS"]

    # Skip territories
    if state_code in ["PR", "VI", "GU", "MP", "AS"]:
        continue

    # If File exists continue
    if os.path.isfile(f"data/states/{state_code}_{res}.gpkg"):
        continue

    state_shape = states_df.iloc[i]["geometry"]
    if state_code == "DC":
        distance = 1
    else:
        distance = 0.2
    h3_shape = h3.geo_to_h3shape(state_shape.buffer(distance=distance))
    h3_cells = h3.h3shape_to_cells(h3_shape, res=res)
    print(h3_cells)
    for h3_cell in tqdm(
        h3_cells, desc=f"Parsing state: {state_code} in Resolution: {res}"
    ):
        lat, lng = h3.cell_to_latlng(h3_cell)
        params = {
            "operationName": "stores",
            "variables": {
                "lat": str(lat),
                "lon": str(lng),
                "radius": 150,
                "limit": 10000,
                "curr_lat": str(lat),
                "curr_lon": str(lng),
                "filters": [],
            },
            "query": "query stores($lat: String, $lon: String, $radius: Float, $limit: Int, $curr_lat: String, $curr_lon: String, $filters: [String]) {\n  stores(\n    lat: $lat\n    lon: $lon\n    radius: $radius\n    limit: $limit\n    curr_lat: $curr_lat\n    curr_lon: $curr_lon\n    filters: $filters\n  ) {\n    address\n    brand {\n      slug\n      logo\n    }\n    distance_label\n    distance\n    lat\n    lon\n    hours\n    id\n    name\n    city\n    phone\n    state\n    country\n    postal_code\n    franchise\n    features {\n      slug\n      title\n    }\n    services {\n      slug\n      title\n    }\n    local_content\n    fuel_data\n    local_images\n  }\n}",
        }
        r = scraper.get(url=url, data=json.dumps(params), headers=headers)
        store_jsons = r.json()["data"]["stores"]
        for store_json in store_jsons:
            store_state_code = store_json["state"]
            if store_state_code == state_code:
                store_json["geometry"] = Point(store_json["lon"], store_json["lat"])
                state_store_list.append(store_json)
        time.sleep(random.uniform(0.01, 0.5))

    if state_store_list:
        state_gdf = gpd.GeoDataFrame(state_store_list, crs=4326)
        state_gdf = state_gdf.drop_duplicates("id")
        state_gdf = state_gdf[state_gdf["state"] == state_code]
        state_gdf.to_file(f"data/states/{state_code}_{res}.gpkg")

    print(state_code, len(state_store_list))

States:   0%|          | 0/56 [00:00<?, ?it/s]

In [114]:
store_gdfs = []
states_path = "data/states"
for file in tqdm(os.listdir("data/states"), desc="Parsing State gdfs"):
    constructed_path = os.path.join(states_path, file)
    if constructed_path.endswith(".gpkg"):
        store_gdf = gpd.read_file(constructed_path)
        store_gdfs.append(store_gdf)

Parsing State gdfs:   0%|          | 0/45 [00:00<?, ?it/s]

In [129]:
store_gdf = gpd.GeoDataFrame(pd.concat(store_gdfs, ignore_index=True))

In [130]:
def get_brand(row):
    brand = row["brand"]
    if not brand:
        return None
    data = json.loads(brand.replace("'", '"'))
    return data["slug"]

In [131]:
store_gdf["brand"] = store_gdf.apply(lambda row: get_brand(row), axis=1)

In [132]:
store_gdf = store_gdf.to_crs(9311)
store_gdf.to_file(f"data/711_national.gpkg")

In [133]:
store_gdf[store_gdf["brand"].isnull()].to_file("data/no_brand.gpkg")

In [134]:
store_711_gdf = store_gdf[store_gdf["brand"] == "7-eleven"]

In [138]:
store_state_df = (
    pd.DataFrame(store_711_gdf.groupby("state").size())
    .reset_index()
    .rename(columns={0: "7-elevens"})
)
store_state_df = store_state_df.rename(columns={"state": "STUSPS"})

## Combine With States

In [139]:
state_counts_gdf = states_with_population_df.merge(
    store_state_df, on="STUSPS", how="left"
)

In [140]:
state_counts_gdf = state_counts_gdf.fillna(0)
state_counts_gdf["7-elevens"] = state_counts_gdf["7-elevens"].astype(int)

In [141]:
state_counts_gdf["per_100k"] = state_counts_gdf["7-elevens"] / (
    state_counts_gdf["POPULATION"] / 100_000
)
state_counts_gdf["per_1m"] = state_counts_gdf["7-elevens"] / (
    state_counts_gdf["POPULATION"] / 1_000_000
)

In [142]:
state_counts_gdf = state_counts_gdf.fillna(0)

In [144]:
state_counts_gdf = state_counts_gdf[
    ["POPULATION", "geometry", "NAME", "per_100k", "per_1m", "7-elevens"]
]

In [145]:
state_counts_gdf = state_counts_gdf.to_crs(9311)
state_counts_gdf.to_file(f"data/7-elevens_per_state.gpkg")