In [1]:
import json
import os
import random
import time

In [2]:
import cloudscraper
from bs4 import BeautifulSoup
import geopandas as gpd
import pandas as pd

In [3]:
from shapely.geometry import Point
from tqdm.notebook import tqdm

## Get State Data

In [4]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [5]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get D&B Data

In [138]:
scraper = cloudscraper.create_scraper()

In [139]:
stores_r = scraper.get("https://www.daveandbusters.com/us/en/about/locations")

In [140]:
soup = BeautifulSoup(stores_r.text, "html.parser")
divs = soup.find_all("div", {"class": "accordion-item"})[:-2]

In [141]:
def parse_store(url: str) -> dict:
    store_r = scraper.get(url)
    soup = BeautifulSoup(store_r.text, "html.parser")
    body = soup.find("body", {"class": "page basicpage"})
    store_id = body.attrs["id"].split("-")[1]
    json_url = f"https://www.daveandbusters.com/content/dnb-request/datadetails.json?mode=location&entityId={store_id}"
    json_r = scraper.get(json_url)
    store_json = json_r.json()
    state_code = store_json["address"]["state"]
    p = Point(store_json["longitude"], store_json["latitude"])
    return {"STUSPS": state_code, "geometry": p}

In [142]:
stores_list = []
for div in tqdm(divs, desc="Parsing States"):
    state_name = div.find("p").text.title()
    state_as_list = div.find_all("a")
    for state_a in state_as_list:
        try:
            store_dict = parse_store(
                f"https://www.daveandbusters.com{state_a.attrs['href']}"
            )
            stores_list.append(store_dict)
        except Exception:
            print(state_a.attrs["href"])

Parsing States:   0%|          | 0/44 [00:00<?, ?it/s]

https://www.daveandbusters.com/us/en/about/locations/rancho-mirage
https://www.daveandbusters.com/us/en/about/locations/orlando


In [143]:
store_1 = parse_store(
    "https://www.daveandbusters.com/us/en/about/locations/rancho-mirage"
)
store_2 = parse_store("https://www.daveandbusters.com/us/en/about/locations/orlando")

In [144]:
stores_list = [*stores_list, store_1, store_2]

In [145]:
dave_and_busters_gdf = gpd.GeoDataFrame(stores_list, crs=4326)

In [147]:
dave_and_busters_gdf.to_file("data/d_b_locations.gpkg")

In [148]:
db_counts_df = (
    pd.DataFrame(dave_and_busters_gdf.groupby("STUSPS").size())
    .reset_index()
    .rename(columns={0: "DBs"})
)

## Get Population Data

In [149]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [150]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [151]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

In [152]:
states_with_population_df = states_with_population_df.dropna()

## Merge Data

In [131]:
dave_and_busters_counts_gdf = states_with_population_df.merge(
    db_counts_df, on="STUSPS", how="left"
)
dave_and_busters_counts_gdf = dave_and_busters_counts_gdf.fillna(0)

In [132]:
dave_and_busters_counts_gdf["per_100k"] = dave_and_busters_counts_gdf["DBs"] / (
    dave_and_busters_counts_gdf["POPULATION"] / 100000
)
dave_and_busters_counts_gdf["per_1m"] = dave_and_busters_counts_gdf["DBs"] / (
    dave_and_busters_counts_gdf["POPULATION"] / 1_000_000
)

In [133]:
dave_and_busters_counts_gdf = dave_and_busters_counts_gdf.to_crs(9311)

In [134]:
dave_and_busters_counts_gdf.to_file("data/dbs_per_state.gpkg")