In [96]:
import os
import random
import re
import time

In [97]:
from bs4 import BeautifulSoup
import cloudscraper
import geopandas as gpd
import pandas as pd
import shapely

In [98]:
from shapely.geometry import Point
from tqdm.notebook import tqdm

## Get State Data

In [99]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [100]:
states_df = states_df[["STUSPS", "NAME", "geometry"]]

## Get Population data

In [101]:
state_populations = pd.read_excel(
    "data/NST-EST2024-POP.xlsx", sheet_name=None, engine="openpyxl"
)

In [102]:
state_populations_df = state_populations["NST-EST2024-POP"][
    [
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)",
        "Unnamed: 5",
    ]
]
state_populations_df = state_populations_df.rename(
    columns={
        "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)": "NAME",
        "Unnamed: 5": "POPULATION",
    }
)
state_populations_df["NAME"] = state_populations_df["NAME"].str[1:]

In [103]:
states_with_population_df = states_df.merge(state_populations_df, on="NAME", how="left")
states_with_population_df = states_with_population_df[
    ["STUSPS", "NAME", "POPULATION", "geometry"]
]

## Get Campground Data

In [104]:
scraper = cloudscraper.create_scraper()

In [105]:
url = "https://camping-usa.com/campgrounds/"
r = scraper.get(url)

In [106]:
soup = BeautifulSoup(r.text, "html.parser")
ps = soup.find_all("p")

In [107]:
def build_dict(url: str, state_name: str) -> dict:
    lat_pattern = r"var\s+lat\s*=\s*([-\d.]+)"
    lon_pattern = r"var\s+lon\s*=\s*([-\d.]+)"

    r_location = scraper.get(url)
    soup_location = BeautifulSoup(r_location.text, "html.parser")

    lat_match = re.search(lat_pattern, str(soup_location))
    lon_match = re.search(lon_pattern, str(soup_location))

    if not lat_match or not lon_pattern:
        return
    try:
        lat = float(lat_match.group(1))
        lon = float(lon_match.group(1))
    except Exception as e:
        print(url, e)
        return
    return {"geometry": Point(lon, lat), "name": state_name, "url": url}

In [108]:
stores_dicts = []
error_list = []
for p in tqdm(ps, desc="Parsing States"):
    # Get State code
    state_list = []

    a = p.find("a")
    state_name = a.text.lower()

    # If File exists continue
    if os.path.isfile(f'data/states/{state_name.replace(" ", "-")}.gpkg'):
        continue

    href = a.attrs["href"]
    r_state = scraper.get(href)
    soup_state = BeautifulSoup(r_state.text, "html.parser")
    location_ps = soup_state.find_all("p")

    for location_p in tqdm(location_ps, desc=f"Parsing locations in {state_name}"):
        location_url = location_p.find("a").attrs["href"]
        if not location_url.endswith("/"):
            location_url = location_url + "/"
        location_dict = build_dict(location_url, state_name)
        if location_dict:
            state_list.append(location_dict)
        else:
            # Try again
            time.sleep(1)
            location_dict = build_dict(location_url, state_name)
            if location_dict:
                state_list.append(location_dict)
            else:
                print(location_url, state_name)
                error_list.append({"url": location_url, "state_name": state_name})
        time.sleep(random.uniform(0.01, 0.5))

    if state_list:
        state_campground_gdf = gpd.GeoDataFrame(state_list, crs=4326)
        state_campground_gdf.to_file(f'data/states/{state_name.replace(" ", "-")}.gpkg')

Parsing States:   0%|          | 0/52 [00:00<?, ?it/s]

Parsing locations in alabama:   0%|          | 0/132 [00:00<?, ?it/s]

Parsing locations in alaska:   0%|          | 0/198 [00:00<?, ?it/s]

https://camping-usa.com/campdetails/12561/ alaska


Parsing locations in arizona:   0%|          | 0/495 [00:00<?, ?it/s]

Parsing locations in arkansas:   0%|          | 0/216 [00:00<?, ?it/s]

https://camping-usa.com/campdetails/45512/ arkansas
https://camping-usa.com/campdetails/45704/ arkansas


Parsing locations in colorado:   0%|          | 0/602 [00:00<?, ?it/s]

https://camping-usa.com/campdetails/45512/ colorado
https://camping-usa.com/campdetails/45704/ colorado


Parsing locations in connecticut:   0%|          | 0/77 [00:00<?, ?it/s]

https://camping-usa.com/campdetails/45512/ connecticut
https://camping-usa.com/campdetails/45704/ connecticut


Parsing locations in delaware:   0%|          | 0/20 [00:00<?, ?it/s]

Parsing locations in florida:   0%|          | 0/642 [00:00<?, ?it/s]

Parsing locations in georgia:   0%|          | 0/221 [00:00<?, ?it/s]

Parsing locations in hawaii:   0%|          | 0/12 [00:00<?, ?it/s]

Parsing locations in idaho:   0%|          | 0/484 [00:00<?, ?it/s]

Parsing locations in illinois:   0%|          | 0/214 [00:00<?, ?it/s]

Parsing locations in indiana:   0%|          | 0/180 [00:00<?, ?it/s]

Parsing locations in iowa:   0%|          | 0/230 [00:00<?, ?it/s]

Parsing locations in kansas:   0%|          | 0/109 [00:00<?, ?it/s]

Parsing locations in kentucky:   0%|          | 0/132 [00:00<?, ?it/s]

Parsing locations in louisiana:   0%|          | 0/143 [00:00<?, ?it/s]

Parsing locations in maine:   0%|          | 0/224 [00:00<?, ?it/s]

Parsing locations in maryland:   0%|          | 0/66 [00:00<?, ?it/s]

Parsing locations in massachusetts:   0%|          | 0/100 [00:00<?, ?it/s]

Parsing locations in michigan:   0%|          | 0/589 [00:00<?, ?it/s]

Parsing locations in minnesota:   0%|          | 0/369 [00:00<?, ?it/s]

Parsing locations in mississippi:   0%|          | 0/131 [00:00<?, ?it/s]

Parsing locations in missouri:   0%|          | 0/271 [00:00<?, ?it/s]

Parsing locations in montana:   0%|          | 0/453 [00:00<?, ?it/s]

Parsing locations in nebraska:   0%|          | 0/135 [00:00<?, ?it/s]

Parsing locations in nevada:   0%|          | 0/155 [00:00<?, ?it/s]

Parsing locations in new hampshire:   0%|          | 0/162 [00:00<?, ?it/s]

Parsing locations in new jersey:   0%|          | 0/112 [00:00<?, ?it/s]

Parsing locations in new mexico:   0%|          | 0/317 [00:00<?, ?it/s]

Parsing locations in new york:   0%|          | 0/326 [00:00<?, ?it/s]

Parsing locations in north carolina:   0%|          | 0/256 [00:00<?, ?it/s]

Parsing locations in north dakota:   0%|          | 0/70 [00:00<?, ?it/s]

Parsing locations in northern california:   0%|          | 0/974 [00:00<?, ?it/s]

https://camping-usa.com/campdetails/47141/ northern california


Parsing locations in ohio:   0%|          | 0/268 [00:00<?, ?it/s]

Parsing locations in oklahoma:   0%|          | 0/213 [00:00<?, ?it/s]

Parsing locations in oregon:   0%|          | 0/896 [00:00<?, ?it/s]

Parsing locations in pennsylvania:   0%|          | 0/323 [00:00<?, ?it/s]

Parsing locations in rhode island:   0%|          | 0/23 [00:00<?, ?it/s]

Parsing locations in south carolina:   0%|          | 0/124 [00:00<?, ?it/s]

https://camping-usa.com/campdetails/20006/ could not convert string to float: '-'
https://camping-usa.com/campdetails/20006/ could not convert string to float: '-'
https://camping-usa.com/campdetails/20006/ south carolina


Parsing locations in south dakota:   0%|          | 0/184 [00:00<?, ?it/s]

Parsing locations in southern california:   0%|          | 0/603 [00:00<?, ?it/s]

Parsing locations in tennessee:   0%|          | 0/200 [00:00<?, ?it/s]

https://camping-usa.com/campdetails/20184/ could not convert string to float: '-'
https://camping-usa.com/campdetails/20184/ could not convert string to float: '-'
https://camping-usa.com/campdetails/20184/ tennessee
https://camping-usa.com/campdetails/12798/ could not convert string to float: '-'
https://camping-usa.com/campdetails/12798/ could not convert string to float: '-'
https://camping-usa.com/campdetails/12798/ tennessee


Parsing locations in texas:   0%|          | 0/714 [00:00<?, ?it/s]

Parsing locations in utah:   0%|          | 0/408 [00:00<?, ?it/s]

Parsing locations in vermont:   0%|          | 0/97 [00:00<?, ?it/s]

Parsing locations in virgin islands:   0%|          | 0/1 [00:00<?, ?it/s]

Parsing locations in virginia:   0%|          | 0/152 [00:00<?, ?it/s]

Parsing locations in washington:   0%|          | 0/675 [00:00<?, ?it/s]

Parsing locations in west virginia:   0%|          | 0/91 [00:00<?, ?it/s]

Parsing locations in wisconsin:   0%|          | 0/404 [00:00<?, ?it/s]

Parsing locations in wyoming:   0%|          | 0/256 [00:00<?, ?it/s]

## Get missed sites

In [126]:
missed_campsites_df = pd.DataFrame(error_list)
missed_campsites_df.to_csv("data/missed_sites_05052025_1745.csv")

In [127]:
missed_campsites_v2_df.drop_duplicates("url")

Unnamed: 0,url,name
0,https://camping-usa.com/campdetails/12561/,alaska
1,https://camping-usa.com/campdetails/45512/,arkansas
2,https://camping-usa.com/campdetails/45704/,arkansas
7,https://camping-usa.com/campdetails/47141/,northern california
8,https://camping-usa.com/campdetails/20006/,south carolina
9,https://camping-usa.com/campdetails/20184/,tennessee
10,https://camping-usa.com/campdetails/12798/,tennessee


## Manually Add the rest

In [135]:
found_locations = [
    {
        "url": "https://camping-usa.com/campdetails/12561/",
        "name": "alaska",
        "geometry": Point(-150.107112, 61.662040),
    },
    {
        "url": "https://camping-usa.com/campdetails/45512/",
        "name": "arkansas",
        "geometry": Point(-92.584497, 36.029788),
    },
    {
        "url": "https://camping-usa.com/campdetails/45704/",
        "name": "northern california",
        "geometry": Point(-120.17670134417756, 39.895805384351014),
    },
    {
        "url": "https://camping-usa.com/campdetails/47141/",
        "name": "northern california",
        "geometry": Point(-122.69855331916162, 38.425356523133495),
    },
    {
        "url": "https://camping-usa.com/campdetails/20006/",
        "name": "south carolina",
        "geometry": Point(-83.10613992891594, 34.8687156236974),
    },
    {
        "url": "https://camping-usa.com/campdetails/20184/",
        "name": "tennessee",
        "geometry": Point(-85.12106129806321, 35.168486299082524),
    },
    {
        "url": "https://camping-usa.com/campdetails/12798/",
        "name": "tennessee",
        "geometry": Point(-86.6932979038491, 35.58926259794271),
    },
]

In [136]:
found_locations

[{'url': 'https://camping-usa.com/campdetails/12561/',
  'name': 'alaska',
  'geometry': <POINT (-150.107 61.662)>},
 {'url': 'https://camping-usa.com/campdetails/45512/',
  'name': 'arkansas',
  'geometry': <POINT (-92.584 36.03)>},
 {'url': 'https://camping-usa.com/campdetails/45704/',
  'name': 'northern california',
  'geometry': <POINT (-120.177 39.896)>},
 {'url': 'https://camping-usa.com/campdetails/47141/',
  'name': 'northern california',
  'geometry': <POINT (-122.699 38.425)>},
 {'url': 'https://camping-usa.com/campdetails/20006/',
  'name': 'south carolina',
  'geometry': <POINT (-83.106 34.869)>},
 {'url': 'https://camping-usa.com/campdetails/20184/',
  'name': 'tennessee',
  'geometry': <POINT (-85.121 35.168)>},
 {'url': 'https://camping-usa.com/campdetails/12798/',
  'name': 'tennessee',
  'geometry': <POINT (-86.693 35.589)>}]

## Merge all

In [137]:
campsites_gdfs = []
states_path = "data/states"
for file in os.listdir("data/states"):
    constructed_path = os.path.join(states_path, file)
    if constructed_path.endswith(".gpkg"):
        campsites_gdf = gpd.read_file(constructed_path)
        campsites_gdfs.append(campsites_gdf)

In [142]:
campsites_gdf = pd.concat(campsites_gdfs, ignore_index=True)

In [144]:
added_campsites_df = pd.DataFrame(found_locations)

In [145]:
campsites_gdf = pd.concat([campsites_gdf, added_campsites_df], ignore_index=True)

In [163]:
campsites_gdf = gpd.GeoDataFrame(
    campsites_gdf, geometry=campsites_gdf["geometry"], crs=4326
)

In [164]:
campsites_gdf.to_file("data/campsites.gpkg")

In [159]:
counts_raw_df = (
    pd.DataFrame(campsites_gdf.groupby("name").size())
    .reset_index()
    .rename(columns={0: "campsites"})
)

In [173]:
california_cs = counts_raw_df[
    (counts_raw_df["name"] == "northern california")
    | (counts_raw_df["name"] == "southern california")
]["campsites"].sum()
counts_raw_df.loc[-1] = ["california", california_cs]  # adding a row
counts_raw_df.index = counts_raw_df.index + 1  # shifting index
counts_raw_df = counts_raw_df.sort_index()  # sorting by index

## Combine With States

In [191]:
states_with_population_df["name"] = states_with_population_df["NAME"].str.lower()

In [192]:
campground_counts_gdf = states_with_population_df.merge(
    counts_raw_df, on="name", how="left"
)

In [193]:
campground_counts_gdf = campground_counts_gdf.fillna(0)
campground_counts_gdf["campsites"] = campground_counts_gdf["campsites"].astype(int)

In [194]:
campground_counts_gdf["per_100k"] = campground_counts_gdf["campsites"] / (
    campground_counts_gdf["POPULATION"] / 100000
)
campground_counts_gdf["per_1m"] = campground_counts_gdf["campsites"] / (
    campground_counts_gdf["POPULATION"] / 1_000_000
)

In [195]:
campground_counts_gdf = campground_counts_gdf.fillna(0)

In [196]:
campground_counts_gdf = campground_counts_gdf[
    ["POPULATION", "geometry", "NAME", "per_100k", "per_1m", "campsites"]
]

In [197]:
campground_counts_gdf = campground_counts_gdf.to_crs(9311)
campground_counts_gdf.to_file(f"data/campground_per_states.gpkg")