In [19]:
import requests

In [20]:
import geopandas as gpd
import pandas as pd

## Open State data

In [21]:
file_path = "data/cb_2024_us_county_500k/cb_2024_us_county_500k.shp"
counties_gdf = gpd.read_file(file_path)

## Get Columm Data

In [22]:
r = requests.get("https://api.census.gov/data/2023/acs/acs5/subject/variables.json")
columns_obj = r.json()

In [23]:
columns_to_replace = {
    key: val["label"]
    for key, val in columns_obj["variables"].items()
    if key.startswith("S0802")
}

In [24]:
columns_obj["variables"]["S0802_C01_089E"]

{'label': 'Estimate!!Total!!Workers 16 years and over who did not work from home!!TRAVEL TIME TO WORK!!60 or more minutes',
 'concept': 'Means of Transportation to Work by Selected Characteristics',
 'predicateType': 'float',
 'group': 'S0802',
 'limit': 0,
 'attributes': 'S0802_C01_089EA,S0802_C01_089M,S0802_C01_089MA'}

In [25]:
columns_obj["variables"]["S0802_C01_089E"]

{'label': 'Estimate!!Total!!Workers 16 years and over who did not work from home!!TRAVEL TIME TO WORK!!60 or more minutes',
 'concept': 'Means of Transportation to Work by Selected Characteristics',
 'predicateType': 'float',
 'group': 'S0802',
 'limit': 0,
 'attributes': 'S0802_C01_089EA,S0802_C01_089M,S0802_C01_089MA'}

### Get columns to query and rename for later

In [26]:
columns = ["GEO_ID", "S0802_C01_089E", "S0802_C01_089M"]
columns_formatted = ",".join(columns)

In [27]:
response = requests.get(
    f"https://api.census.gov/data/2023/acs/acs5/subject?get={columns_formatted}&for=county:*"
)

In [28]:
data = response.json()
columns = data[0]
rows = data[1:]
commuting_df = pd.DataFrame(rows, columns=columns)

In [29]:
commuting_df = commuting_df[columns]
commuting_df[columns[1:]] = commuting_df[columns[1:]].astype(float)

In [30]:
columns

['GEO_ID', 'S0802_C01_089E', 'S0802_C01_089M', 'state', 'county']

In [31]:
def check_margin_error(row) -> str:
    value = row["S0802_C01_089E"]
    value_moe = row["S0802_C01_089M"]

    if value == 0.0:
        return 0.0

    rmoe_val = abs(value_moe / value)
    if rmoe_val < 0.50:
        return value
    else:
        return -99

In [32]:
commuting_df["PERCENT"] = commuting_df.apply(
    lambda row: check_margin_error(row), axis=1
)

In [33]:
columns_to_replace["GEO_ID"] = "GEOIDFQ"
commuting_df = commuting_df.rename(columns=columns_to_replace)

In [34]:
commuting_df.columns

Index(['GEOIDFQ',
       'Estimate!!Total!!Workers 16 years and over who did not work from home!!TRAVEL TIME TO WORK!!60 or more minutes',
       'S0802_C01_089M', 'state', 'county', 'PERCENT'],
      dtype='object')

## Merge Data

In [35]:
commuting_gdf = counties_gdf.merge(commuting_df, on="GEOIDFQ", how="inner")

In [36]:
commuting_gdf = commuting_gdf.to_crs(9311)
commuting_gdf.to_file("data/Commute_More_60_Minutes_Per_County.gpkg")