In [1]:
import requests

In [2]:
from bs4 import BeautifulSoup
import geopandas as gpd
import pandas as pd

## Open 2023 Population Estimates

In [4]:
county_populations = pd.read_csv(
    "data/co-est2023-alldata.csv", sep=",", encoding="latin-1"
)

In [5]:
county_populations["FIPS"] = county_populations.apply(
    lambda row: f"{row['STATE']:02d}{row['COUNTY']:03d}", axis=1
)

In [6]:
county_populations_2023_df = county_populations[["POPESTIMATE2023", "FIPS"]].dropna()

## Get 1790 Populations

In [7]:
headers = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36"
}
url = "http://www.virginiaplaces.org/population/pop1790numbers.html"

In [8]:
r = requests.get(url, headers=headers)

In [9]:
soup = BeautifulSoup(r.text, "html.parser")

In [10]:
population_table = soup.find_all("table")[0]

In [11]:
counties_1790_dict = {}
table_records = population_table.find_all("tr")[1:-1]
for table_record in table_records:
    tds = table_record.find_all("td")
    counties_1790_dict[tds[0].text.lower()] = int(tds[1].text.replace(",", ""))

In [12]:
county_populations_1790_df = pd.DataFrame(
    counties_1790_dict.items(), columns=["COUNTY_NAME", "POPULATION_1790"]
)

## Open GIS Data

In [13]:
file_path = "data/cb_2018_us_county_500k/cb_2018_us_county_500k.shp"
counties_df = gpd.read_file(file_path)

In [14]:
file_path = "data/cb_2018_us_state_500k/cb_2018_us_state_500k.shp"
states_df = gpd.read_file(file_path)

In [15]:
counties_gdf = counties_df.merge(
    states_df[["STATEFP", "NAME"]], on="STATEFP", how="left"
)
counties_gdf["FIPS"] = counties_gdf["STATEFP"] + counties_gdf["COUNTYFP"]

In [16]:
counties_gdf = counties_gdf[["STATEFP", "geometry", "NAME_x", "NAME_y", "FIPS"]]
counties_gdf = counties_gdf.rename(
    columns={"NAME_x": "COUNTY_NAME", "NAME_y": "STATE_NAME"}
)

In [17]:
virginia_counties_gdf = counties_gdf[
    (counties_gdf["STATE_NAME"] == "Virginia")
    | (counties_gdf["STATE_NAME"] == "West Virginia")
]
virginia_counties_gdf = virginia_counties_gdf.reset_index()[
    ["geometry", "COUNTY_NAME", "FIPS"]
]

In [18]:
virginia_counties_with_population_df = virginia_counties_gdf.merge(
    county_populations_2023_df, on="FIPS", how="left"
)

## Merge Data

In [21]:
county_populations_1790_df = county_populations_1790_df.replace("nansemond", "suffolk")
county_populations_1790_df = county_populations_1790_df.replace(
    "elizabeth city", "hampton"
)
county_populations_1790_df = county_populations_1790_df.replace(
    "princess anne", "virginia beach"
)
county_populations_1790_df = county_populations_1790_df.replace(
    "warwick", "newport news"
)

In [25]:
virginia_counties_with_population_df[
    "COUNTY_NAME"
] = virginia_counties_with_population_df["COUNTY_NAME"].str.lower()

In [28]:
virginia_counties_with_population_1790_2023_df = county_populations_1790_df.merge(
    virginia_counties_with_population_df, on="COUNTY_NAME", how="left"
)

In [30]:
virginia_counties_with_population_1790_2023_df["growth"] = (
    virginia_counties_with_population_1790_2023_df["POPESTIMATE2023"]
    > virginia_counties_with_population_1790_2023_df["POPULATION_1790"]
)

In [31]:
virginia_counties_with_population_1790_2023_df["difference"] = (
    virginia_counties_with_population_1790_2023_df["POPESTIMATE2023"]
    - virginia_counties_with_population_1790_2023_df["POPULATION_1790"]
)

In [38]:
virginia_counties_with_population_1790_2023_gdf = gpd.GeoDataFrame(
    virginia_counties_with_population_1790_2023_df
)

In [40]:
virginia_counties_with_population_1790_2023_gdf = (
    virginia_counties_with_population_1790_2023_gdf.to_crs(3968)
)

In [41]:
virginia_counties_with_population_1790_2023_gdf.to_file(
    "data/virginia_populations_diff.gpkg"
)