In [None]:
import pandas as pd
import censusdata
import csv
import requests
import zipfile

from pathlib import Path

ACS_YEAR = 2019

DATA_PATH = Path.cwd().parent / "data"
FIPS_CSV_PATH = DATA_PATH / "fips_states_2010.csv"
OUTPUT_PATH = DATA_PATH / "dataset" / "housing_and_transportation_index"

GEOID_FIELD_NAME = "GEOID10"

In [None]:
# https://htaindex.cnt.org/download/download.php?focus=blkgrp&geoid=01

# Download each state / territory individually
dfs = []
with open(FIPS_CSV_PATH) as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=",")
    line_count = 0

    for row in csv_reader:
        if line_count == 0:
            line_count += 1
        else:
            fips = row[0].strip()

            print(f"Downloading data for state/territory with FIPS code {fips}")

            download = requests.get(
                f"https://htaindex.cnt.org/download/download.php?focus=blkgrp&geoid={fips}",
                verify=False,
            )
            file_contents = download.content
            zip_file_dir = DATA_PATH / "tmp" / "housing_and_transportation_index"

            # Make the directory if it doesn't exist
            zip_file_dir.mkdir(parents=True, exist_ok=True)
            zip_file_path = zip_file_dir / f"{fips}-downloaded.zip"
            zip_file = open(zip_file_path, "wb")
            zip_file.write(file_contents)
            zip_file.close()

            with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
                zip_ref.extractall(zip_file_dir)

            # New file name:
            tmp_csv_file_path = zip_file_dir / f"htaindex_data_blkgrps_{fips}.csv"
            tmp_df = pd.read_csv(filepath_or_buffer=tmp_csv_file_path)

            dfs.append(tmp_df)

df = pd.concat(dfs)

df.head()

In [None]:
# Rename and reformat block group ID
df.rename(columns={"blkgrp": GEOID_FIELD_NAME}, inplace=True)
df[GEOID_FIELD_NAME] = df[GEOID_FIELD_NAME].str.replace('"', "")

In [None]:
OUTPUT_PATH.mkdir(parents=True, exist_ok=True)

df.to_csv(path_or_buf=OUTPUT_PATH / "usa.csv", index=False)