In [18]:
import geopandas as gpd
import pandas as pd

## Open State data

In [19]:
file_path = "data/cb_2018_us_county_500k/cb_2018_us_county_500k.shp"
counties_gdf = gpd.read_file(file_path)
counties_gdf = counties_gdf[counties_gdf["STATEFP"] != "09"][["geometry", "AFFGEOID"]]
counties_gdf = counties_gdf.to_crs(4326)

#### Format for CT

In [20]:
ct_gdf = gpd.read_file("data/CT_Planning_Regions.geojson")
ct_gdf["AFFGEOID"] = "0500000US" + ct_gdf["PlanningRegionFIPS_GEOID"]

In [21]:
counties_gdf = pd.concat(
    [ct_gdf[["geometry", "AFFGEOID"]], counties_gdf], ignore_index=True
)

## Get Internet Percents

In [31]:
acs_internet = pd.read_csv(
    "data/ACSST5Y2023.S2801_2025-04-30T190050/ACSST5Y2023.S2801-Data.csv",
    low_memory=False,
)

#### Get Column names

In [32]:
acs_column_names = pd.read_csv(
    "data/ACSST5Y2023.S2801_2025-04-30T190050/ACSST5Y2023.S2801-Column-Metadata.csv"
)

In [33]:
internet_cols = list(
    acs_column_names[
        (acs_column_names["Column Name"] == "S2801_C01_019E")
        | (acs_column_names["Column Name"] == "S2801_C02_019E")
    ]["Column Name"]
)

#### Back to Data

In [42]:
acs_internet_per_county_df = acs_internet[["GEO_ID", *home_cols]]
acs_internet_per_county_df.columns = acs_internet_per_county_df.iloc[0]
acs_internet_per_county_df = acs_internet_per_county_df[1:]
acs_internet_per_county_df = acs_internet_per_county_df.rename(
    columns={
        "Estimate!!Total!!Total households!!TYPE OF INTERNET SUBSCRIPTIONS!!Without an Internet subscription": "NO_INTERNET",
        "Estimate!!Percent!!Total households!!TYPE OF INTERNET SUBSCRIPTIONS!!Without an Internet subscription": "PERCENT",
        "Geography": "AFFGEOID",
    }
)

## Merge Data

In [43]:
acs_internet_per_county_gdf = counties_gdf.merge(
    acs_internet_per_county_df, on="AFFGEOID", how="left"
)

In [44]:
acs_internet_per_county_gdf["county_id"] = acs_internet_per_county_gdf["AFFGEOID"].str[
    9:
]
acs_internet_per_county_gdf["state_id"] = acs_internet_per_county_gdf["AFFGEOID"].str[
    9:11
]

In [45]:
acs_internet_per_county_gdf = acs_internet_per_county_gdf.fillna(0)
acs_internet_per_county_gdf["NO_INTERNET"] = acs_internet_per_county_gdf[
    "NO_INTERNET"
].astype(int)
acs_internet_per_county_gdf["PERCENT"] = acs_internet_per_county_gdf["PERCENT"].astype(
    float
)

In [55]:
acs_internet_per_county_gdf = acs_internet_per_county_gdf[
    acs_internet_per_county_gdf["state_id"] != "72"
].reset_index(drop=True)

In [56]:
acs_internet_per_county_gdf = acs_internet_per_county_gdf.to_crs(9311)
acs_internet_per_county_gdf.to_file("data/internet_subscription_per_county.gpkg")

In [58]:
acs_internet_per_county_gdf.sort_values("PERCENT", ascending=False)

Unnamed: 0,geometry,AFFGEOID,NO_INTERNET,PERCENT,county_id,state_id
362,"POLYGON ((822794.555 -1297291.23, 822849.068 -...",0500000US28055,195,51.9,28055,28
2723,"POLYGON ((633081.051 -1304870.626, 634513.324 ...",0500000US22027,2469,49.2,22027,22
216,"MULTIPOLYGON (((261305.708 -2003968.269, 26196...",0500000US48261,8,47.1,48261,48
841,"POLYGON ((-887635.984 -836071.585, -881761.2 -...",0500000US04001,9475,46.4,04001,04
1068,"POLYGON ((-2719625.094 3167492.219, -2718060.9...",0500000US02290,1074,46.4,02290,02
...,...,...,...,...,...,...
507,"POLYGON ((-8430822.962 4417673.222, -8430766.3...",0500000US69100,0,0.0,69100,69
1061,"MULTIPOLYGON (((3702731.704 -2212495.471, 3702...",0500000US78030,0,0.0,78030,78
1460,"MULTIPOLYGON (((-7799029.401 -3753736.013, -77...",0500000US60030,0,0.0,60030,60
505,"MULTIPOLYGON (((-7923986.54 -3608577.292, -792...",0500000US60020,0,0.0,60020,60
