In [90]:
import json
import os

In [124]:
from bs4 import BeautifulSoup
import cloudscraper
import geopandas as gpd
from geopy.geocoders import Nominatim
import pandas as pd
import shapely

In [125]:
from shapely.geometry import Point
from tqdm.notebook import tqdm

## Get State Data

In [93]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [94]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get Population data

In [95]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [96]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [97]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

## Geocoder

In [98]:
geolocator = Nominatim(user_agent="vinemapper_geocoder")

## Get Bucees Data

In [99]:
scraper = cloudscraper.create_scraper()

In [100]:
url = "https://buc-ees.com/locations/"
r = scraper.get(url)

In [101]:
soup = BeautifulSoup(r.text, "html.parser")
store_divs = soup.find_all("div", {"class": "bucees-location"})

In [111]:
stores_dicts = []
for store_div in tqdm(store_divs, desc="Parsing Stores"):
    try:
        address = store_div.find("strong").get_text(separator=",").strip()
        location = geolocator.geocode(address)
        p = Point(*location[-1])
        state_code = store_div.find("h4").text.split(", ")[1].strip()
        store_dict = {"STUSPS": state_code, "geometry": p}
        stores_dicts.append(store_dict)
    except Exception as e:
        print(address)

Parsing Stores:   0%|          | 0/51 [00:00<?, ?it/s]

20403 County Rd. 68,Robertsdale, Alabama 36567
5201 Nugget Road,Berthoud, CO 80513
200 World Commerce Pkwy,Saint Augustine, Florida 32092
601 Union Grove Rd. SE,Adairsville, Georgia 30103
1013 Buc-ee's Boulevard,Richmond, Kentucky 40475
3284 N Beaver Rd,Springfield, Missouri 65803
170 Buc-ee’s Blvd,Kodak, Tennessee 37764
780 Hwy-35 N Byp,Alvin, Texas 77511
9900 East Interstate 40,Amarillo, Texas 79118
801 N Brooks,Brazoria, Texas 77422
2800 S Interstate 35 E,Denton, Texas 76210
505 E Main St,Eagle Lake, Texas 77434
1402 South IH- 45,Ennis, Texas 75119
4231 E. Hwy 332,Freeport, Texas 77541
165 State Highway 77,Hillsboro, Texas 76645
101 N Hwy 2004,Lake Jackson, Texas 77566
598 Hwy 332,Lake Jackson, Texas 77566
10070 West IH 10,Luling, Texas 78648
205 IH-45 South,Madisonville, Texas 77864
1550 Central Texas Expressway,Melissa, Texas 75454
2541 S Main St,Pearland, Texas 77584
506 W. IH 20,Terrell, Texas 75160
6201 Gulf Fwy (IH 45),Texas City, Texas 77591
40900 US Hwy 290 Bypass,Waller, Te

#### Manually Add Bucees

In [140]:
for store_dict in stores_dicts:
    store_dict["geometry"] = Point(store_dict["geometry"].y, store_dict["geometry"].x)

In [142]:
bucees_added_stores = [
    {"STUSPS": "AL", "geometry": Point(-87.676603, 30.634139)},
    {"STUSPS": "CO", "geometry": Point(-104.983475, 40.333570)},
    {"STUSPS": "FL", "geometry": Point(-81.464112, 29.983560)},
    {"STUSPS": "GA", "geometry": Point(-84.917040, 34.440284)},
    {"STUSPS": "KY", "geometry": Point(-84.3079444426352, 37.67373894356118)},
    {"STUSPS": "MO", "geometry": Point(-93.18206519487988, 37.254000259704426)},
    {"STUSPS": "TN", "geometry": Point(-83.6048194795315, 35.98108931962975)},
    {
        "STUSPS": "TX",
        "geometry": Point(
            -95.22620368527583,
            29.43020320482343,
        ),
    },
    {"STUSPS": "TX", "geometry": Point(-101.7230606029954, 35.190679603560284)},
    {"STUSPS": "TX", "geometry": Point(-95.57202854752714, 29.055519738817992)},
    {"STUSPS": "TX", "geometry": Point(-97.10261671955992, 33.180736430307284)},
    {"STUSPS": "TX", "geometry": Point(-96.33162684583283, 29.58587524461079)},
    {"STUSPS": "TX", "geometry": Point(-96.60618997900579, 32.32325808964718)},
    {"STUSPS": "TX", "geometry": Point(-95.3367248158416, 28.981127282053727)},
    {"STUSPS": "TX", "geometry": Point(-97.09249792352892, 32.04676763777681)},
    {"STUSPS": "TX", "geometry": Point(-95.42731628989688, 29.063725839343086)},
    {"STUSPS": "TX", "geometry": Point(-95.43678878962275, 29.021594461711235)},
    {
        "STUSPS": "TX",
        "geometry": Point(
            -97.59244326097998,
            29.651147356335326,
        ),
    },
    {"STUSPS": "TX", "geometry": Point(-95.8800618784126, 30.965351743094768)},
    {"STUSPS": "TX", "geometry": Point(-96.59196650492181, 33.27145714673755)},
    {"STUSPS": "TX", "geometry": Point(-95.28517747359352, 29.560231359035207)},
    {"STUSPS": "TX", "geometry": Point(-96.32107210037005, 32.71661804306466)},
    {"STUSPS": "TX", "geometry": Point(-95.06337319005782, 29.428478230034607)},
    {"STUSPS": "TX", "geometry": Point(-95.93185082940926, 30.07151056527443)},
    {"STUSPS": "TX", "geometry": Point(-96.12354283881716, 29.325634226366244)},
]

In [147]:
bucees_stores_gdf = gpd.GeoDataFrame([*stores_dicts, *bucees_added_stores], crs=4326)

In [148]:
bucees_stores_gdf.to_file("data/bucees_locations.gpkg")

In [157]:
bucees_counts_df = (
    pd.DataFrame(bucees_stores_gdf.groupby("STUSPS").size())
    .reset_index()
    .rename(columns={0: "stores"})
)

## Combine With States

In [158]:
bucees_counts_gdf = states_with_population_df.merge(
    bucees_counts_df, on="STUSPS", how="left"
)

In [159]:
bucees_counts_gdf = bucees_counts_gdf.fillna(0)
bucees_counts_gdf["stores"] = bucees_counts_gdf["stores"].astype(int)

In [160]:
bucees_counts_gdf["per_1000"] = bucees_counts_gdf["stores"] / (
    bucees_counts_gdf["POPULATION"] / 1000
)
bucees_counts_gdf["per_10k"] = bucees_counts_gdf["stores"] / (
    bucees_counts_gdf["POPULATION"] / 10_000
)
bucees_counts_gdf["per_100k"] = bucees_counts_gdf["stores"] / (
    bucees_counts_gdf["POPULATION"] / 100000
)
bucees_counts_gdf["per_500k"] = bucees_counts_gdf["stores"] / (
    bucees_counts_gdf["POPULATION"] / 500_000
)
bucees_counts_gdf["per_1m"] = bucees_counts_gdf["stores"] / (
    bucees_counts_gdf["POPULATION"] / 1_000_000
)

In [161]:
bucees_counts_gdf = bucees_counts_gdf.fillna(0)

In [162]:
bucees_counts_gdf = bucees_counts_gdf.to_crs(9311)
bucees_counts_gdf.to_file(f"data/bucees_per_states.gpkg")