In [43]:
import re
import json
import time

In [44]:
from bs4 import BeautifulSoup
import geopandas as gpd
import pandas as pd

In [45]:
from shapely.geometry import Point
from tqdm.notebook import tqdm

In [46]:
import requests

## Get Chuck E Cheese Data

In [47]:
headers = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36"
}

In [48]:
url = "https://locations.chuckecheese.com/browse/"

In [49]:
r = requests.get(url, headers=headers)

In [50]:
soup = BeautifulSoup(r.text, "html.parser")

In [51]:
divs = soup.find_all("div", {"class": "map-list-item is-single"})

In [52]:
def add_locations(locations_list: list, link: str):
    # Get locations
    r = requests.get(link, headers=headers)
    location_soup = BeautifulSoup(r.text, "html.parser")
    map_script = location_soup.find_all("div", {"id": "map-data-wrapper"})[0].find_all(
        "script"
    )[0]
    script_text = map_script.contents[0].string
    coords = script_text.split('"lat"')
    try:
        for coord in coords[1:]:
            lat = float(re.findall("-?\d+\.?\d*", coord.split("lng")[0])[0])
            lng = float(
                re.findall("-?\d+\.?\d*", coord.split("lng")[1].split("title")[0])[0]
            )
            p = Point(lng, lat)
            locations_list.append(p)
    except Exception as e:
        print(e)
        print(coords)

In [53]:
location_dict = {}
locations = []
for div in tqdm(divs, desc="Parsing Locations"):
    location_split = div.text.strip().split("(")
    location_dict[location_split[0].strip()] = int(location_split[1][:-1])

    try:
        add_locations(locations_list=locations, link=div.find("a").attrs["href"])
    except Exception as e:
        print(e)
        print(location_url)
    time.sleep(1)

Parsing Locations:   0%|          | 0/49 [00:00<?, ?it/s]

In [54]:
locations_gdf = gpd.GeoDataFrame(geometry=locations, crs=4326)
locations_gdf = locations_gdf.to_crs(9311)
locations_gdf.to_file("data/locations.gpkg")

In [55]:
cec_df = pd.DataFrame(list(location_dict.items()), columns=["NAME", "stores"])

## Get State Data

In [56]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [66]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get Population Data

In [67]:
state_populations = pd.read_excel(
    "data/NST-EST2023-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [68]:
state_populations_df = state_populations["NST-EST2023-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [69]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "POPULATION", "geometry", "NAME"]
]

In [70]:
states_with_population_df = states_with_population_df.dropna()

## Merge Data

In [110]:
cec_with_population_info_gdf = states_with_population_df.merge(
    cec_df, on="NAME", how="left"
)

In [111]:
cec_with_population_info_gdf

Unnamed: 0,STUSPS,POPULATION,geometry,NAME,stores
0,MS,2939690.0,"MULTIPOLYGON (((-88.50297 30.21524, -88.49176 ...",Mississippi,5.0
1,NC,10835491.0,"MULTIPOLYGON (((-75.72681 35.93584, -75.71827 ...",North Carolina,13.0
2,OK,4053824.0,"POLYGON ((-103.00256 36.52659, -103.00219 36.6...",Oklahoma,6.0
3,VA,8715698.0,"MULTIPOLYGON (((-75.74241 37.80835, -75.74151 ...",Virginia,15.0
4,WV,1770071.0,"POLYGON ((-82.6432 38.16909, -82.643 38.16956,...",West Virginia,1.0
5,LA,4573749.0,"MULTIPOLYGON (((-88.8677 29.86155, -88.86566 2...",Louisiana,9.0
6,MI,10037261.0,"MULTIPOLYGON (((-83.19159 42.03537, -83.18993 ...",Michigan,12.0
7,MA,7001399.0,"MULTIPOLYGON (((-70.23405 41.28565, -70.22361 ...",Massachusetts,6.0
8,ID,1964726.0,"POLYGON ((-117.24267 44.39655, -117.23484 44.3...",Idaho,1.0
9,FL,22610726.0,"MULTIPOLYGON (((-80.17628 25.52505, -80.17395 ...",Florida,31.0


In [112]:
cec_with_population_info_gdf["per_1000"] = cec_with_population_info_gdf["stores"] / (
    cec_with_population_info_gdf["POPULATION"] / 1000
)
cec_with_population_info_gdf["per_10k"] = cec_with_population_info_gdf["stores"] / (
    cec_with_population_info_gdf["POPULATION"] / 10_000
)
cec_with_population_info_gdf["per_100k"] = cec_with_population_info_gdf["stores"] / (
    cec_with_population_info_gdf["POPULATION"] / 100000
)
cec_with_population_info_gdf["per_500k"] = cec_with_population_info_gdf["stores"] / (
    cec_with_population_info_gdf["POPULATION"] / 500_000
)
cec_with_population_info_gdf["per_1m"] = cec_with_population_info_gdf["stores"] / (
    cec_with_population_info_gdf["POPULATION"] / 1_000_000
)
cec_with_population_info_gdf["per_capita"] = (
    cec_with_population_info_gdf["POPULATION"] / cec_with_population_info_gdf["stores"]
)

In [113]:
cec_with_population_info_gdf = cec_with_population_info_gdf.to_crs(9311)

In [109]:
cec_with_population_info_gdf.to_file("data/cec.gpkg")

In [57]:
3 / (3206000 / 1_000_000)

0.9357454772301934