In [1]:
import json
import random
import time
import re

In [2]:
import cloudscraper

In [3]:
import geopandas as gpd
import pandas as pd

In [4]:
from bs4 import BeautifulSoup
from shapely.geometry import Point
from tqdm.notebook import tqdm

## Get State Data

In [5]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [6]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get Population data

In [7]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [8]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [9]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

## Get Store Data

In [10]:
scraper = cloudscraper.create_scraper()

In [11]:
headers = {
    "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
    "accept-language": "en-US,en;q=0.9,ru;q=0.8,el;q=0.7",
    "cache-control": "max-age=0",
    "content-type": "application/x-www-form-urlencoded",
    "priority": "u=0, i",
    "sec-ch-ua": '"Chromium";v="140", "Not=A?Brand";v="24", "Google Chrome";v="140"',
    "sec-ch-ua-arch": '"x86"',
    "sec-ch-ua-bitness": '"64"',
    "sec-ch-ua-full-version": '"140.0.7339.208"',
    "sec-ch-ua-full-version-list": '"Chromium";v="140.0.7339.208", "Not=A?Brand";v="24.0.0.0", "Google Chrome";v="140.0.7339.208"',
    "sec-ch-ua-mobile": "?0",
    "sec-ch-ua-model": '""',
    "sec-ch-ua-platform": '"Windows"',
    "sec-ch-ua-platform-version": '"10.0.0"',
    "sec-fetch-dest": "document",
    "sec-fetch-mode": "navigate",
    "sec-fetch-site": "same-origin",
    "upgrade-insecure-requests": "1",
    "Referer": "https://rv.campingworld.com/dealer/dothan-alabama?__cf_chl_tk=earasXPYGE9NFWGtmRtWbwWUbd8_yb3VFvfykJfFquY-1759510502-1.0.1.1-lqrxk4LibCxXX1ncfcnflOBcS_vwDSr7JEqwRV86Wec",
}

In [12]:
url = "https://rv.campingworld.com/state-directory"
r = scraper.get(url, headers=headers)

In [13]:
soup = BeautifulSoup(r.text, "html.parser")

In [14]:
store_divs = soup.find_all(
    "div",
    {
        "class": "MuiGrid-root MuiGrid-item MuiGrid-grid-xs-12 MuiGrid-grid-sm-6 MuiGrid-grid-md-4 MuiGrid-grid-lg-3 css-1ybf5in"
    },
)

In [16]:
store_divs

[<div class="MuiGrid-root MuiGrid-item MuiGrid-grid-xs-12 MuiGrid-grid-sm-6 MuiGrid-grid-md-4 MuiGrid-grid-lg-3 css-1ybf5in"><style data-emotion="css 16m0xvc">.css-16m0xvc{-webkit-text-decoration:underline;text-decoration:underline;text-decoration-color:rgba(18, 25, 26, 0.4);}.css-16m0xvc:hover{text-decoration-color:inherit;}</style><style data-emotion="css 1nlm80j">.css-1nlm80j{margin:0;font:inherit;color:#12191A;-webkit-text-decoration:underline;text-decoration:underline;text-decoration-color:rgba(18, 25, 26, 0.4);}.css-1nlm80j:hover{text-decoration-color:inherit;}</style><a class="MuiTypography-root MuiTypography-inherit MuiLink-root MuiLink-underlineAlways css-1nlm80j" href="/dealer/birmingham-alabama-calera"><style data-emotion="css 1r7tq8b">.css-1r7tq8b{margin:0;font-family:"Overpass",sans-serif;font-weight:bold;font-size:1.25rem;line-height:24px;letter-spacing:-0.25px;}</style><div class="MuiTypography-root MuiTypography-subtitle2 css-1r7tq8b">Calera</div></a><style data-emotion

In [17]:
store_urls = []
for i, store_div in enumerate(store_divs):
    store_a = store_div.find("a")
    if "href" in store_a.attrs:
        store_urls.append("https://rv.campingworld.com" + store_a.attrs["href"])

In [18]:
def create_store_dictionary(url: str):
    r_store = scraper.post(store_url, headers=headers)
    if "Just a moment" in r_store.text:
        time.sleep(5)
        r_store = scraper.get(store_url, headers=headers)
        if r_store.status_code != 200:
            raise Exception(f"Error: {r_store.status_code}, {r_store.reason}")

    soup_store = BeautifulSoup(r_store.text, "html.parser")
    script_tag = soup_store.find("script", {"type": "application/ld+json"})

    store_dict = json.loads(script_tag.text)
    state_code = store_dict["address"]["addressRegion"]
    if store_dict["geo"]["latitude"] != None:
        lat = float(store_dict["geo"]["latitude"])
        lon = float(store_dict["geo"]["longitude"])
        point = Point(lon, lat)
    else:
        pattern = r'\\"latitude\\":\s*(-?\d+(?:\.\d+)?)\s*,\\r\\n\\"longitude\\":\s*(-?\d+(?:\.\d+)?)'
        matches = re.findall(pattern, str(soup_store), re.S)
        if matches:
            valid_points = []
            for lat, lon in matches:
                if lat != "null" and lon != "null":
                    valid_points.append(Point(float(lon), float(lat)))

            if valid_points:
                point = valid_points[0]
            else:
                print(valid_points)
                print(f"No lat/lon found for {store_url}")
        else:
            return str(soup_store)
    return {"STUSPS": state_code, "geometry": point, "url": store_url}

In [20]:
stores = []
texts = []
failed_store_urls = []
for store_url in tqdm(store_urls, desc="Parsing Stores"):
    try:
        store_dict = create_store_dictionary(store_url)
        if isinstance(store_dict, str):
            texts.append(store_dict)
            continue
        stores.append(store_dict)
        time.sleep(random.randint(1, 5))
    except Exception as e:
        print(store_url, e)
        failed_store_urls.append(store_url)

Parsing Stores:   0%|          | 0/199 [00:00<?, ?it/s]

https://rv.campingworld.com/dealer/atlanta-georgia-oakwood Error: 429, Too Many Requests
https://rv.campingworld.com/dealer/west-hatfield-massachusetts Error: 429, Too Many Requests
https://rv.campingworld.com/dealer/forest-lake-minnesota Error: 429, Too Many Requests
https://rv.campingworld.com/dealer/little-falls-minnesota Error: 429, Too Many Requests
https://rv.campingworld.com/dealer/forestriver-berlin-new-jersey Error: 429, Too Many Requests
https://rv.campingworld.com/dealer/newport news-virginia Error: 429, Too Many Requests
https://rv.campingworld.com/dealer/roanoke-virginia Error: 429, Too Many Requests
https://rv.campingworld.com/dealer/greenbay-wisconsin Error: 429, Too Many Requests
https://rv.campingworld.com/dealer/onalaska-wisconsin Error: 429, Too Many Requests


In [21]:
len(failed_store_urls)

9

In [22]:
len(stores)

183

In [23]:
stores_v2 = []
failed_store_urls_v2 = []
texts = []
for store_url in tqdm(failed_store_urls, desc="Parsing Stores"):
    try:
        store_dict = create_store_dictionary(store_url)
        if isinstance(store_dict, str):
            texts.append(store_dict)
            continue
        stores_v2.append(store_dict)
        time.sleep(random.randint(1, 5))
    except Exception as e:
        print(store_url, e)
        failed_store_urls_v2.append(store_url)

Parsing Stores:   0%|          | 0/9 [00:00<?, ?it/s]

In [27]:
stores.extend(stores_v2)

In [40]:
store_gdf = gpd.GeoDataFrame(stores, crs=4326)
store_gdf.to_file("data/stores.gpkg")

In [34]:
store_counts_df = store_gdf.groupby("STUSPS").size().reset_index(name="COUNT")

## Combine With States

In [36]:
store_count_gdf = states_with_population_df.merge(
    store_counts_df, on="STUSPS", how="left"
)

In [37]:
store_count_gdf["per_100k"] = (
    store_count_gdf["COUNT"] / (store_count_gdf["POPULATION"] / 100_000)
).round(decimals=2)
store_count_gdf["per_1m"] = (
    store_count_gdf["COUNT"] / (store_count_gdf["POPULATION"] / 1_000_000)
).round(decimals=2)

In [38]:
store_count_gdf = store_count_gdf.to_crs(9311)
store_count_gdf.to_file(f"data/Camping_World_Locations_Per_State.gpkg")

In [39]:
store_count_gdf.sort_values("per_1m", ascending=False)

Unnamed: 0,STUSPS,NAME,POPULATION,geometry,COUNT,per_100k,per_1m
14,SD,South Dakota,918305.0,"POLYGON ((-318857.556 7726.411, -318299.501 77...",2.0,0.22,2.18
24,WY,Wyoming,585067.0,"POLYGON ((-865730.986 59352.206, -864993.905 5...",1.0,0.17,1.71
8,ID,Idaho,1971122.0,"POLYGON ((-1357016.205 78008.02, -1356337.908 ...",3.0,0.15,1.52
34,NH,New Hampshire,1402199.0,"MULTIPOLYGON (((2326343.717 202530.281, 232642...",2.0,0.14,1.43
52,AR,Arkansas,3069463.0,"POLYGON ((482029.856 -928936.291, 482046.307 -...",4.0,0.13,1.3
51,ND,North Dakota,789047.0,"POLYGON ((-296121.109 437082.483, -296107.817 ...",1.0,0.13,1.27
50,OR,Oregon,4253653.0,"MULTIPOLYGON (((-1781988.816 403834.726, -1781...",5.0,0.12,1.18
20,MO,Missouri,6208038.0,"POLYGON ((357039.741 -482422.694, 357435.659 -...",7.0,0.11,1.13
54,MN,Minnesota,5753048.0,"MULTIPOLYGON (((772408.659 380232.264, 772444....",6.0,0.1,1.04
0,MS,Mississippi,2943172.0,"MULTIPOLYGON (((1109997.773 -1565578.875, 1111...",3.0,0.1,1.02
