In [1]:
import requests

In [2]:
import geopandas as gpd
import pandas as pd

## Open State data

In [3]:
file_path = "data/cb_2024_us_county_500k/cb_2024_us_county_500k.shp"
counties_gdf = gpd.read_file(file_path)

In [4]:
counties_gdf = counties_gdf[
    ~counties_gdf["STATEFP"].isin(["60", "66", "69", "72", "74", "78"])
].reset_index(drop=True)

## Get Ethnic Data

In [17]:
table = "S2402"

url = "https://api.census.gov/data/2023/acs/acs5/subject"
params = {
    "get": f"group({table})",
    "ucgid": "pseudo(0100000US$0500000)",
}
response = requests.get(url, params=params)

In [18]:
data = response.json()
columns = data[0]
rows = data[1:]
labor_df = pd.DataFrame(rows, columns=columns)

In [19]:
url = "http://api.census.gov/data/2023/acs/acs5/subject/variables.json"
response = requests.get(url)
variables = response.json()["variables"]

In [20]:
col_vars = {k: v["label"] for k, v in variables.items() if k.startswith(table)}

In [21]:
labor_df = labor_df.rename(columns=col_vars)
labor_df = labor_df[[*list(col_vars.values()), "ucgid"]]

In [22]:
rename_dict = {
    col: col.split("!!")[-1]
    for col in labor_df.columns
    if col.startswith(
        "Estimate!!Female!!Full-time, year-round civilian employed population 16 years and over"
    )
}

In [23]:
labor_df.rename(columns=rename_dict, inplace=True)

In [24]:
# labor_cols = list(rename_dict.values())
# labor_cols = [labor_col for labor_col in labor_cols if not labor_col.endswith(":")]
labor_cols = [
    "Management, business, science, and arts occupations:",
    "Service occupations:",
    "Sales and office occupations:",
    "Natural resources, construction, and maintenance occupations:",
    "Production, transportation, and material moving occupations:",
]
labor_df[labor_cols] = labor_df[labor_cols].astype(int)
# labor_cols.remove('Full-time, year-round civilian employed population 16 years and over')

In [25]:
labor_df = labor_df.rename(columns={"ucgid": "GEOIDFQ"})

In [26]:
labor_df = labor_df[["GEOIDFQ", *labor_cols]]

In [27]:
labor_df["LARGEST_SECTOR"] = labor_df[labor_cols].idxmax(axis=1)

In [28]:
labor_df.groupby("LARGEST_SECTOR").size().reset_index(name="COUNT")

Unnamed: 0,LARGEST_SECTOR,COUNT
0,"Management, business, science, and arts occupa...",3032
1,"Production, transportation, and material movin...",4
2,Sales and office occupations:,168
3,Service occupations:,18


## Merge Data

In [29]:
labor_gdf = counties_gdf.merge(labor_df, on="GEOIDFQ", how="inner")

In [30]:
labor_gdf = labor_gdf.to_crs(9311)
labor_gdf.to_file("data/labor_sector_per_county.gpkg")