In [104]:
import geopandas as gpd
import pandas as pd

## Open GIS Data

In [105]:
file_path = "data/cb_2018_us_county_500k/cb_2018_us_county_500k.shp"
counties_df = gpd.read_file(file_path)

## Open 1900-1990 Census Data

In [106]:
cencounts_df = pd.read_csv("data/cencounts.csv")

In [107]:
cencounts_df["STATE"] = cencounts_df["name"].str.split(" ").str[0]
cencounts_df["COUNTYNAME"] = cencounts_df.apply(
    lambda row: " ".join(row["name"].split(" ")[1:]), axis=1
)

In [108]:
cencounts_df["FIPS"] = cencounts_df["fips"].astype(int)
cencounts_df["FIPS"] = cencounts_df.apply(lambda row: f"{row['FIPS']:05d}", axis=1)

## Open 2023 Population Estimates

In [109]:
county_populations = pd.read_csv(
    "data/co-est2023-alldata.csv", sep=",", encoding="latin-1"
)

In [110]:
county_populations["FIPS"] = county_populations.apply(
    lambda row: f"{row['STATE']:02d}{row['COUNTY']:03d}", axis=1
)

In [111]:
county_populations = county_populations[["POPESTIMATE2023", "FIPS"]].dropna()

## Merge data

In [112]:
population_df = cencounts_df[["FIPS", "pop1900"]].merge(
    county_populations, on="FIPS", how="left"
)

In [113]:
population_df = population_df.dropna()

In [114]:
population_df["pop1900"] = population_df["pop1900"].replace(".", "0")

In [115]:
population_df["diff"] = population_df["POPESTIMATE2023"].astype(int) - population_df[
    "pop1900"
].astype(int)

In [120]:
population_df = population_df.rename(columns={"FIPS": "GEOID"})

In [123]:
population_gdf = counties_df.merge(population_df, on="GEOID", how="left")[
    ["diff", "geometry"]
]

In [129]:
population_gdf["decreased"] = population_gdf["diff"] < 0

In [130]:
population_gdf = population_gdf.to_crs(5070)

In [131]:
population_gdf.to_file("data/population.gpkg")