In [1]:
import pandas as pd
from utilities.functions import *
from utilities.luts import *

In [2]:
# CSV with the list of anchorage neighborhoods to get data for
anc = pd.read_csv(
    "tbl/Anchorage_CensusTracts_2020.csv"
)

In [3]:
# to match the standard input table, we want these columns in the dataframe:
# id,name,alt_name,region,country,latitude,longitude,type,GEOIDFQ,PLACENAME,AREATYPE,COMMENT
# map GEOIDFQ to GEOIDFQ, TractNo to PLACENAME, and TrachName to name

anc = anc.rename(
    columns={"GEOIDFQ": "GEOIDFQ", "TractNo": "PLACENAME", "TractName": "name"}
)

# add the region, country, latitude, longitude, type, AREATYPE, COMMENT columns
anc["id"] = None
anc["alt_name"] = None
anc["region"] = "Alaska"
anc["country"] = "US"
anc["latitude"] = None
anc["longitude"] = None
anc["type"] = "neighborhood"
anc["AREATYPE"] = "Census tract"
anc["COMMENT"] = None

# make up some sequential IDs... we need these to for the data fetching functions
# ID's should start with "ANC"
anc["id"] = ["ANC" + str(i).zfill(3) for i in range(1, len(anc) + 1)]

# reorder the columns like so:
# id,name,alt_name,region,country,latitude,longitude,type,GEOIDFQ,PLACENAME,AREATYPE,COMMENT
anc = anc[
    [
        "id",
        "name",
        "alt_name",
        "region",
        "country",
        "latitude",
        "longitude",
        "type",
        "GEOIDFQ",
        "PLACENAME",
        "AREATYPE",
        "COMMENT",
    ]
]

In [4]:
anc.head()

Unnamed: 0,id,name,alt_name,region,country,latitude,longitude,type,GEOIDFQ,PLACENAME,AREATYPE,COMMENT
0,ANC001,Government Hill,,Alaska,US,,,neighborhood,1400000US02020000500,Census Tract 5,Census tract,
1,ANC002,West Mountain View/Ship Creek,,Alaska,US,,,neighborhood,1400000US02020000601,Census Tract 6.01,Census tract,
2,ANC003,East Mountain View,,Alaska,US,,,neighborhood,1400000US02020000602,Census Tract 6.02,Census tract,
3,ANC004,Ptarmigan Area,,Alaska,US,,,neighborhood,1400000US02020000701,Census Tract 7.01,Census tract,
4,ANC005,Northwest Muldoon,,Alaska,US,,,neighborhood,1400000US02020000702,Census Tract 7.02,Census tract,


In [5]:
# run the fetch and merge function to get the data for these neighborhoods
# even though there are no "combined places",
# we still need to run aggregate_results() in order to do the MOE > CI conversion

anc_results_df = run_fetch_and_merge(anc)
results = aggregate_results(anc_results_df)
results.head()

  return pd.concat(results)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[measure_col + "_adult_population_variance"].iloc[
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[measure_col + "_adult_population_variance"].iloc[
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[measure_col + "_adult_population_variance"].iloc[
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-vi

Unnamed: 0,id,name,areatype,placename,GEOID,total_population,pct_65_plus,pct_under_18,pct_under_5,pct_hispanic_latino,...,pct_no_hsdiploma_high,pct_no_hsdiploma_low,pct_below_150pov_high,pct_below_150pov_low,pct_minority_high,pct_minority_low,pct_single_parent_high,pct_single_parent_low,pct_unemployed_high,pct_unemployed_low
0,ANC001,Government Hill,Census tract,Census Tract 5,20000500,2124.0,11.53,20.9,6.36,10.55,...,9.2,1.4,46.1,19.1,66.2,36.6,15.0,1.6,19.9,4.1
1,ANC002,West Mountain View/Ship Creek,Census tract,Census Tract 6.01,20000601,4328.0,8.27,28.6,8.78,13.84,...,26.6,7.8,70.5,42.3,100.8,62.0,41.6,13.4,10.4,0.4
2,ANC003,East Mountain View,Census tract,Census Tract 6.02,20000602,2716.0,8.91,31.3,10.01,12.41,...,23.6,6.6,65.5,37.5,105.8,50.8,33.9,7.9,22.4,0.8
3,ANC004,Ptarmigan Area,Census tract,Census Tract 7.01,20000701,6986.0,9.98,26.15,8.56,11.95,...,18.9,3.9,37.3,18.1,75.1,48.3,2.6,0.0,11.4,4.2
4,ANC005,Northwest Muldoon,Census tract,Census Tract 7.02,20000702,5435.0,8.72,27.34,7.73,9.71,...,21.2,2.8,28.4,15.0,80.1,54.1,18.4,5.2,10.0,3.6


In [6]:
# # drop the rows if ID is "AK0" or "US0"
# we don't need the state / national comparison for this table
results = results[(results["id"] != "AK0") & (results["id"] != "US0")]

In [7]:
# save a copy of just the data
# to avoid fetching data again if CSV formatting needs to be revised

filepath = "tbl/anc_neighborhood_data.csv"
results.to_csv(
    filepath,
    index=False,
)

In [8]:
# load data from file
filepath = "tbl/anc_neighborhood_data.csv"

results = pd.read_csv(filepath)
results.head()

Unnamed: 0,id,name,areatype,placename,GEOID,total_population,pct_65_plus,pct_under_18,pct_under_5,pct_hispanic_latino,...,pct_no_hsdiploma_high,pct_no_hsdiploma_low,pct_below_150pov_high,pct_below_150pov_low,pct_minority_high,pct_minority_low,pct_single_parent_high,pct_single_parent_low,pct_unemployed_high,pct_unemployed_low
0,ANC001,Government Hill,Census tract,Census Tract 5,20000500,2124.0,11.53,20.9,6.36,10.55,...,9.2,1.4,46.1,19.1,66.2,36.6,15.0,1.6,19.9,4.1
1,ANC002,West Mountain View/Ship Creek,Census tract,Census Tract 6.01,20000601,4328.0,8.27,28.6,8.78,13.84,...,26.6,7.8,70.5,42.3,100.8,62.0,41.6,13.4,10.4,0.4
2,ANC003,East Mountain View,Census tract,Census Tract 6.02,20000602,2716.0,8.91,31.3,10.01,12.41,...,23.6,6.6,65.5,37.5,105.8,50.8,33.9,7.9,22.4,0.8
3,ANC004,Ptarmigan Area,Census tract,Census Tract 7.01,20000701,6986.0,9.98,26.15,8.56,11.95,...,18.9,3.9,37.3,18.1,75.1,48.3,2.6,0.0,11.4,4.2
4,ANC005,Northwest Muldoon,Census tract,Census Tract 7.02,20000702,5435.0,8.72,27.34,7.73,9.71,...,21.2,2.8,28.4,15.0,80.1,54.1,18.4,5.2,10.0,3.6


In [9]:
# reformat the dataframe to match the CSV output from the API
# we need to import some of the lookup tables from the API in order to do this

demographics_descriptions = {
    # population, age, and race
    "name": {
        "description": "",
        "source": "",
    },
    "comment": {
        "description": "",
        "source": "",
    },
    "total_population": {
        "description": "total_population is the total population of the community",
        "source": "U.S. Census Demographic and Housing Characteristics Survey for 2020",
    },
    "pct_under_18": {
        "description": "pct_under_18 is the percentage of the population under age 18; this value was calculated by summing the population count of multiple sex by age categories and expressing that sum as a percentage of the total population",
        "source": "U.S. Census Demographic and Housing Characteristics Survey for 2020",
    },
    "pct_under_5": {
        "description": "pct_under_5 is the percentage of the population under age 5; this value was calculated by summing the population count of multiple sex by age categories and expressing that sum as a percentage of the total population",
        "source": "U.S. Census Demographic and Housing Characteristics Survey for 2020",
    },
    "pct_65_plus": {
        "description": "pct_65_plus is the percentage of the population age 65 and older; this value was calculated by summing the population count of multiple sex by age categories and expressing that sum as a percentage of the total population",
        "source": "U.S. Census Demographic and Housing Characteristics Survey for 2020",
    },
    "pct_african_american": {
        "description": "pct_african_american is the percentage of the population that is African American; this value was calculated by taking the population count of African Americans and expressing that count as a percentage of the total population",
        "source": "U.S. Census Demographic and Housing Characteristics Survey for 2020",
    },
    "pct_amer_indian_ak_native": {
        "description": "pct_amer_indian_ak_native is the percentage of the population that is American Indian or Alaska Native; this value was calculated by taking the population count of American Indians or Alaska Natives and expressing that count as a percentage of the total population",
        "source": "U.S. Census Demographic and Housing Characteristics Survey for 2020",
    },
    "pct_asian": {
        "description": "pct_asian is the percentage of the population that is Asian; this value was calculated by taking the population count of Asians and expressing that count as a percentage of the total population",
        "source": "U.S. Census Demographic and Housing Characteristics Survey for 2020",
    },
    "pct_hawaiian_pacislander": {
        "description": "pct_hawaiian_pacislander is the percentage of the population that is Native Hawaiian and Pacific Islander; this value was calculated by taking the population count of Native Hawaiians and Pacific Islanders and expressing that count as a percentage of the total population",
        "source": "U.S. Census Demographic and Housing Characteristics Survey for 2020",
    },
    "pct_hispanic_latino": {
        "description": "pct_hispanic_latino is the percentage of the population that is Hispanic or Latino; this value was calculated by taking the population count of Hispanics or Latinos and expressing that count as a percentage of the total population",
        "source": "U.S. Census Demographic and Housing Characteristics Survey for 2020",
    },
    "pct_white": {
        "description": "pct_white is the percentage of the population that is White; this value was calculated by taking the population count of Whites and expressing that count as a percentage of the total population",
        "source": "U.S. Census Demographic and Housing Characteristics Survey for 2020",
    },
    "pct_multi": {
        "description": "pct_multi is the percentage of the population that is two or more races; this value was calculated by taking the population count of two or more races and expressing that count as a percentage of the total population",
        "source": "U.S. Census Demographic and Housing Characteristics Survey for 2020",
    },
    "pct_other": {
        "description": "pct_other is the percentage of the population that is other race; this value was calculated by taking the population count of other races and expressing that count as a percentage of the total population",
        "source": "U.S. Census Demographic and Housing Characteristics Survey for 2020",
    },
    # health conditions
    "pct_asthma": {
        "description": "pct_asthma is the percentage of adults aged >=18 years who report being diagnosed with and currently having asthma; this value is a crude prevalence rate",
        "source": "CDC PLACES dataset for 2024",
    },
    "pct_asthma_low": {
        "description": "pct_asthma_low is the lower bound of the 90% confidence interval for percentage of adults aged >=18 years who report being diagnosed with and currently having asthma",
        "source": "CDC PLACES dataset for 2024",
    },
    "pct_asthma_high": {
        "description": "pct_asthma_high is the upper bound of the 90% confidence interval for percentage of adults aged >=18 years who report being diagnosed with and currently having asthma",
        "source": "CDC PLACES dataset for 2024",
    },
    "pct_copd": {
        "description": "pct_copd is the percentage of adults aged >=18 years who report being diagnosed with chronic obstructive pulmonary disease (COPD), emphysema, or chronic bronchitis",
        "source": "CDC PLACES dataset for 2024",
    },
    "pct_copd_low": {
        "description": "pct_copd_low is the lower bound of the 90% confidence interval for percentage of adults aged >=18 years who report being diagnosed with chronic obstructive pulmonary disease (COPD), emphysema, or chronic bronchitis",
        "source": "CDC PLACES dataset for 2024",
    },
    "pct_copd_high": {
        "description": "pct_copd_high is the upper bound of the 90% confidence interval for percentage of adults aged >=18 years who report being diagnosed with chronic obstructive pulmonary disease (COPD), emphysema, or chronic bronchitis",
        "source": "CDC PLACES dataset for 2024",
    },
    "pct_diabetes": {
        "description": "pct_diabetes is the percentage of adults aged >=18 years who report being diagnosed with diabetes (excluding diabetes during pregnancy/gestational diabetes); this value is a crude prevalence rate",
        "source": "CDC PLACES dataset for 2024",
    },
    "pct_diabetes_low": {
        "description": "pct_diabetes_low is the lower bound of the 90% confidence interval for percentage of adults aged >=18 years who report being diagnosed with diabetes (excluding diabetes during pregnancy/gestational diabetes)",
        "source": "CDC PLACES dataset for 2024",
    },
    "pct_diabetes_high": {
        "description": "pct_diabetes_high is the upper bound of the 90% confidence interval for percentage of adults aged >=18 years who report being diagnosed with diabetes (excluding diabetes during pregnancy/gestational diabetes)",
        "source": "CDC PLACES dataset for 2024",
    },
    "pct_hd": {
        "description": "pct_hd is the percentage of adults aged >=18 years who report being diagnosed with coronary heart disease; this value is a crude prevalence rate",
        "source": "CDC PLACES dataset for 2024",
    },
    "pct_hd_low": {
        "description": "pct_hd_low is the lower bound of the 90% confidence interval for percentage of adults aged >=18 years who report being diagnosed with coronary heart disease",
        "source": "CDC PLACES dataset for 2024",
    },
    "pct_hd_high": {
        "description": "pct_hd_high is the upper bound of the 90% confidence interval for percentage of adults aged >=18 years who report being diagnosed with coronary heart disease",
        "source": "CDC PLACES dataset for 2024",
    },
    "pct_mh": {
        "description": "pct_mh is the percentage of adults aged >=18 years who report having 'frequent mental distress' (mental health including stress, depression, and problems with emotions, was not good for 14 or more days during the past 30 days); this value is a crude prevalence rate",
        "source": "CDC PLACES dataset for 2024",
    },
    "pct_mh_low": {
        "description": "pct_mh_low is the lower bound of the 90% confidence interval for percentage of adults aged >=18 years who report having 'frequent mental distress' (mental health including stress, depression, and problems with emotions, was not good for 14 or more days during the past 30 days)",
        "source": "CDC PLACES dataset for 2024",
    },
    "pct_mh_high": {
        "description": "pct_mh_high is the upper bound of the 90% confidence interval for percentage of adults aged >=18 years who report having 'frequent mental distress' (mental health including stress, depression, and problems with emotions, was not good for 14 or more days during the past 30 days)",
        "source": "CDC PLACES dataset for 2024",
    },
    "pct_stroke": {
        "description": "pct_stroke is the percentage of adults aged >=18 years who report having ever been told by a doctor, nurse, or other health professional that they have had a stroke; this value is a crude prevalence rate",
        "source": "CDC PLACES dataset for 2024",
    },
    "pct_stroke_low": {
        "description": "pct_stroke_low is the lower bound of the 90% confidence interval for percentage of adults aged >=18 years who report having ever been told by a doctor, nurse, or other health professional that they have had a stroke",
        "source": "CDC PLACES dataset for 2024",
    },
    "pct_stroke_high": {
        "description": "pct_stroke_high is the upper bound of the 90% confidence interval for percentage of adults aged >=18 years who report having ever been told by a doctor, nurse, or other health professional that they have had a stroke",
        "source": "CDC PLACES dataset for 2024",
    },
    "pct_emospt": {
        "description": "pct_emospt is the percentage of adults aged >=18 years who report 'lack of social and emotional support' (self-report sometimes, rarely, or never getting the social and emotional support needed); this value is a crude prevalence rate",
        "source": "CDC PLACES dataset for 2024",
    },
    "pct_emospt_low": {
        "description": "pct_emospt_low is the lower bound of the 90% confidence interval for percentage of adults aged >=18 years who report 'lack of social and emotional support' (self-report sometimes, rarely, or never getting the social and emotional support needed)",
        "source": "CDC PLACES dataset for 2024",
    },
    "pct_emospt_high": {
        "description": "pct_emospt_high is the upper bound of the 90% confidence interval for percentage of adults aged >=18 years who report 'lack of social and emotional support' (self-report sometimes, rarely, or never getting the social and emotional support needed)",
        "source": "CDC PLACES dataset for 2024",
    },
    # social determinants of health
    "pct_minority": {
        "description": "pct_minority is the percentage of the population of racial or ethnic minority status (including individuals who identified as any of the following: Hispanic or Latino (any race); Black and African American, non-Hispanic; American Indian and Alaska Native, non-Hispanic; Asian, non-Hispanic; Native Hawaiian and Other Pacific Islander, non-Hispanic; Two or More Races, non-Hispanic; Other Races, non-Hispanic)",
        "source": "CDC PLACES Social Determinants of Health dataset for 2024 (originally derived from ACS estimates 2017-2021)",
    },
    "pct_minority_low": {
        "description": "pct_minority_low is the lower bound of the 90% confidence interval for percentage of the population of racial or ethnic minority status (including individuals who identified as any of the following: Hispanic or Latino (any race); Black and African American, non-Hispanic; American Indian and Alaska Native, non-Hispanic; Asian, non-Hispanic; Native Hawaiian and Other Pacific Islander, non-Hispanic; Two or More Races, non-Hispanic; Other Races, non-Hispanic)",
        "source": "CDC PLACES Social Determinants of Health dataset for 2024 (originally derived from ACS estimates 2017-2021)",
    },
    "pct_minority_high": {
        "description": "pct_minority_high is the upper bound of the 90% confidence interval for percentage of the population of racial or ethnic minority status (including individuals who identified as any of the following: Hispanic or Latino (any race); Black and African American, non-Hispanic; American Indian and Alaska Native, non-Hispanic; Asian, non-Hispanic; Native Hawaiian and Other Pacific Islander, non-Hispanic; Two or More Races, non-Hispanic; Other Races, non-Hispanic)",
        "source": "CDC PLACES Social Determinants of Health dataset for 2024 (originally derived from ACS estimates 2017-2021)",
    },
    "pct_foodstamps": {
        "description": "pct_foodstamps is the percentage of adults aged >=18 years that received food stamps in the past 12 months",
        "source": "CDC PLACES dataset for 2024",
    },
    "pct_foodstamps_low": {
        "description": "pct_foodstamps_low is the lower bound of the 90% confidence interval for percentage of adults aged >=18 years that received food stamps in the past 12 months",
        "source": "CDC PLACES dataset for 2024",
    },
    "pct_foodstamps_high": {
        "description": "pct_foodstamps_high is the upper bound of the 90% confidence interval for percentage of adults aged >=18 years that received food stamps in the past 12 months",
        "source": "CDC PLACES dataset for 2024",
    },
    "pct_w_disability": {
        "description": "pct_w_disability is the percentage of the population with a reported disability (presence of six types of disability related to serious difficulty including: hearing, vision, concentrating, remembering or making decisions (i.e. cognition), walking or climbing stairs (i.e. mobility), dressing or bathing (i.e., self-care), and doing errands alone (i.e., independent living))",
        "source": "U.S. Census American Community Survey 5-year estimates for years 2019-2023",
    },
    "pct_w_disability_low": {
        "description": "pct_w_disability_low is the lower bound of the 90% confidence interval for percentage of the population with a reported disability (presence of six types of disability related to serious difficulty including: hearing, vision, concentrating, remembering or making decisions (i.e. cognition), walking or climbing stairs (i.e. mobility), dressing or bathing (i.e., self-care), and doing errands alone (i.e., independent living))",
        "source": "U.S. Census American Community Survey 5-year estimates for years 2019-2023",
    },
    "pct_w_disability_high": {
        "description": "pct_w_disability_high is the upper bound of the 90% confidence interval for percentage of the population with a reported disability (presence of six types of disability related to serious difficulty including: hearing, vision, concentrating, remembering or making decisions (i.e. cognition), walking or climbing stairs (i.e. mobility), dressing or bathing (i.e., self-care), and doing errands alone (i.e., independent living))",
        "source": "U.S. Census American Community Survey 5-year estimates for years 2019-2023",
    },
    "pct_insured": {
        "description": "pct_insured is the percentage of the population with health insurance",
        "source": "U.S. Census American Community Survey 5-year estimates for years 2019-2023",
    },
    "pct_insured_low": {
        "description": "pct_insured_low is the lower bound of the 90% confidence interval for percentage of the population with health insurance",
        "source": "U.S. Census American Community Survey 5-year estimates for years 2019-2023",
    },
    "pct_insured_high": {
        "description": "pct_insured_high is the upper bound of the 90% confidence interval for percentage of the population with health insurance",
        "source": "U.S. Census American Community Survey 5-year estimates for years 2019-2023",
    },
    "pct_uninsured": {
        "description": "pct_uninsured is the percentage of the population without health insurance",
        "source": "U.S. Census American Community Survey 5-year estimates for years 2019-2023",
    },
    "pct_uninsured_low": {
        "description": "pct_uninsured_low is the lower bound of the 90% confidence interval for percentage of the population without health insurance",
        "source": "U.S. Census American Community Survey 5-year estimates for years 2019-2023",
    },
    "pct_uninsured_high": {
        "description": "pct_uninsured_high is the upper bound of the 90% confidence interval for percentage of the population without health insurance",
        "source": "U.S. Census American Community Survey 5-year estimates for years 2019-2023",
    },
    "pct_no_bband": {
        "description": "pct_no_bband is the percentage of households with no broadband internet subscription",
        "source": "CDC PLACES Social Determinants of Health dataset for 2024 (originally derived from ACS estimates 2017-2021)",
    },
    "pct_no_bband_low": {
        "description": "pct_no_bband_low is the lower bound of the 90% confidence interval for percentage of households with no broadband internet subscription",
        "source": "CDC PLACES Social Determinants of Health dataset for 2024 (originally derived from ACS estimates 2017-2021)",
    },
    "pct_no_bband_high": {
        "description": "pct_no_bband_high is the upper bound of the 90% confidence interval for percentage of households with no broadband internet subscription",
        "source": "CDC PLACES Social Determinants of Health dataset for 2024 (originally derived from ACS estimates 2017-2021)",
    },
    "pct_no_hsdiploma": {
        "description": "pct_no_hsdiploma is the percentage of adults aged >=25 years with no high school diploma",
        "source": "CDC PLACES Social Determinants of Health dataset for 2024 (originally derived from ACS estimates 2017-2021)",
    },
    "pct_no_hsdiploma_low": {
        "description": "pct_no_hsdiploma_low is the lower bound of the 90% confidence interval for percentage of adults aged >=25 years with no high school diploma",
        "source": "CDC PLACES Social Determinants of Health dataset for 2024 (originally derived from ACS estimates 2017-2021)",
    },
    "pct_no_hsdiploma_high": {
        "description": "pct_no_hsdiploma_high is the upper bound of the 90% confidence interval for percentage of adults aged >=25 years with no high school diploma",
        "source": "CDC PLACES Social Determinants of Health dataset for 2024 (originally derived from ACS estimates 2017-2021)",
    },
    "pct_below_150pov": {
        "description": "pct_below_150pov is the percentage of population living below 150% of the federal poverty threshold",
        "source": "CDC PLACES Social Determinants of Health dataset for 2024 (originally derived from ACS estimates 2017-2021)",
    },
    "pct_below_150pov_low": {
        "description": "pct_below_150pov_low is the lower bound of the 90% confidence interval for percentage of population living below 150% of the federal poverty threshold",
        "source": "CDC PLACES Social Determinants of Health dataset for 2024 (originally derived from ACS estimates 2017-2021)",
    },
    "pct_below_150pov_high": {
        "description": "pct_below_150pov_high is the upper bound of the 90% confidence interval for percentage of population living below 150% of the federal poverty threshold",
        "source": "CDC PLACES Social Determinants of Health dataset for 2024 (originally derived from ACS estimates 2017-2021)",
    },
    "pct_crowding": {
        "description": "pct_crowding is the percentage of households with 'crowding' (occupied housing units with 1.01 to 1.50 and 1.51 or more occupants per room)",
        "source": "CDC PLACES Social Determinants of Health dataset for 2024 (originally derived from ACS estimates 2017-2021)",
    },
    "pct_crowding_low": {
        "description": "pct_crowding_low is the lower bound of the 90% confidence interval for percentage of households with 'crowding' (occupied housing units with 1.01 to 1.50 and 1.51 or more occupants per room)",
        "source": "CDC PLACES Social Determinants of Health dataset for 2024 (originally derived from ACS estimates 2017-2021)",
    },
    "pct_crowding_high": {
        "description": "pct_crowding_high is the upper bound of the 90% confidence interval for percentage of households with 'crowding' (occupied housing units with 1.01 to 1.50 and 1.51 or more occupants per room)",
        "source": "CDC PLACES Social Determinants of Health dataset for 2024 (originally derived from ACS estimates 2017-2021)",
    },
    "pct_hcost": {
        "description": "pct_hcost is the percentage of households with 'housing cost burden' (households with annual income less than $75,000 that spend 30% or more of their household income on housing)",
        "source": "CDC PLACES Social Determinants of Health dataset for 2024 (originally derived from ACS estimates 2017-2021)",
    },
    "pct_hcost_low": {
        "description": "pct_hcost_low is the lower bound of the 90% confidence interval for percentage of households with 'housing cost burden' (households with annual income less than $75,000 that spend 30% or more of their household income on housing)",
        "source": "CDC PLACES Social Determinants of Health dataset for 2024 (originally derived from ACS estimates 2017-2021)",
    },
    "pct_hcost_high": {
        "description": "pct_hcost_high is the upper bound of the 90% confidence interval for percentage of households with 'housing cost burden' (households with annual income less than $75,000 that spend 30% or more of their household income on housing)",
        "source": "CDC PLACES Social Determinants of Health dataset for 2024 (originally derived from ACS estimates 2017-2021)",
    },
    "pct_unemployed": {
        "description": "pct_unemployed is the percentage of the population >= 16 years in the civilian labor force who are unemployed (jobless but are available to work and have actively looked for work in the past 4 weeks)",
        "source": "CDC PLACES Social Determinants of Health dataset for 2024 (originally derived from ACS estimates 2017-2021)",
    },
    "pct_unemployed_low": {
        "description": "pct_unemployed_low is the lower bound of the 90% confidence interval for percentage of the population >= 16 years in the civilian labor force who are unemployed (jobless but are available to work and have actively looked for work in the past 4 weeks)",
        "source": "CDC PLACES Social Determinants of Health dataset for 2024 (originally derived from ACS estimates 2017-2021)",
    },
    "pct_unemployed_high": {
        "description": "pct_unemployed_high is the upper bound of the 90% confidence interval for percentage of the population >= 16 years in the civilian labor force who are unemployed (jobless but are available to work and have actively looked for work in the past 4 weeks)",
        "source": "CDC PLACES Social Determinants of Health dataset for 2024 (originally derived from ACS estimates 2017-2021)",
    },
    "pct_single_parent": {
        "description": "pct_single_parent is the percentage of single parent households (households with a male or female householder with no spouse or partner present with children of the householder)",
        "source": "CDC PLACES Social Determinants of Health dataset for 2024 (originally derived from ACS estimates 2017-2021)",
    },
    "pct_single_parent_low": {
        "description": "pct_single_parent_low is the lower bound of the 90% confidence interval for percentage of single parent households (households with a male or female householder with no spouse or partner present with children of the householder)",
        "source": "CDC PLACES Social Determinants of Health dataset for 2024 (originally derived from ACS estimates 2017-2021)",
    },
    "pct_single_parent_high": {
        "description": "pct_single_parent_high is the upper bound of the 90% confidence interval for percentage of single parent households (households with a male or female householder with no spouse or partner present with children of the householder)",
        "source": "CDC PLACES Social Determinants of Health dataset for 2024 (originally derived from ACS estimates 2017-2021)",
    },
}


# order of fields for demographics CSV (should match presentation of fields in NCR)
demographics_order = [
    # etc
    "comment",
    # population
    "total_population",
    # age by category
    "pct_under_5",
    "pct_under_18",
    "pct_65_plus",
    # race/ethnicity
    "pct_hispanic_latino",
    "pct_white",
    "pct_african_american",
    "pct_amer_indian_ak_native",
    "pct_asian",
    "pct_hawaiian_pacislander",
    "pct_other",
    "pct_multi",
    # health conditions
    "pct_asthma",
    "pct_asthma_low",
    "pct_asthma_high",
    "pct_copd",
    "pct_copd_low",
    "pct_copd_high",
    "pct_hd",
    "pct_hd_low",
    "pct_hd_high",
    "pct_stroke",
    "pct_stroke_low",
    "pct_stroke_high",
    "pct_diabetes",
    "pct_diabetes_low",
    "pct_diabetes_high",
    "pct_mh",
    "pct_mh_low",
    "pct_mh_high",
    # social determinants of health
    "pct_minority",
    "pct_minority_low",
    "pct_minority_high",
    "pct_no_hsdiploma",
    "pct_no_hsdiploma_low",
    "pct_no_hsdiploma_high",
    "pct_below_150pov",
    "pct_below_150pov_low",
    "pct_below_150pov_high",
    "pct_unemployed",
    "pct_unemployed_low",
    "pct_unemployed_high",
    "pct_foodstamps",
    "pct_foodstamps_low",
    "pct_foodstamps_high",
    "pct_single_parent",
    "pct_single_parent_low",
    "pct_single_parent_high",
    "pct_no_bband",
    "pct_no_bband_low",
    "pct_no_bband_high",
    "pct_crowding",
    "pct_crowding_low",
    "pct_crowding_high",
    "pct_hcost",
    "pct_hcost_low",
    "pct_hcost_high",
    "pct_emospt",
    "pct_emospt_low",
    "pct_emospt_high",
    "pct_w_disability",
    "pct_w_disability_low",
    "pct_w_disability_high",
    "pct_insured",
    "pct_insured_low",
    "pct_insured_high",
    "pct_uninsured",
    "pct_uninsured_low",
    "pct_uninsured_high",
]

In [10]:
# rename name field to "variable"
results = results.rename(columns={"name": "variable"})
# drop id, areatype, placename, and GEOID fields
results = results.drop(columns=["id", "areatype", "placename", "GEOID"])
# reorder the fields to match the order in the demographics_order list
# (this will ignore any missing fields, useful in case we want to drop fields later)
results = results[
    ["variable"]
    + [
        col
        for col in demographics_order
        if col in results.columns and col != "variable"
    ]
]

# transpose the table, and drop the first weird multi-index row
results = results.transpose()
results.columns = results.iloc[0]
results = results[1:]

In [11]:
# add a "description" and "source" field to the dataframe
# and populate it for each row using the demographics_descriptions dictionary
# use the index of the dataframe as the key to the dictionary
results["description"] = results.index.map(
    lambda x: demographics_descriptions[x]["description"]
)
results["source"] = results.index.map(lambda x: demographics_descriptions[x]["source"])

In [12]:
# reset index and rename its new column...
# we will drop the "variable" index column later
results.reset_index(inplace=True, names="variable")
results

variable,variable.1,Government Hill,West Mountain View/Ship Creek,East Mountain View,Ptarmigan Area,Northwest Muldoon,Northeast Muldoon,Wonder Park,Russian Jack,Merrill Field Vicinity,...,Klatt,Independence Park,Lower Hillside,Hillside East,Huffman,Rabbit Creek,Rabbit Creek/Bear Valley,Science Center/Bicentennial Park,description,source
0,comment,Data represent information from nearest census...,Data represent information from nearest census...,Data represent information from nearest census...,Data represent information from nearest census...,Data represent information from nearest census...,Data represent information from nearest census...,Data represent information from nearest census...,Data represent information from nearest census...,Data represent information from nearest census...,...,Data represent information from nearest census...,Data represent information from nearest census...,Data represent information from nearest census...,Data represent information from nearest census...,Data represent information from nearest census...,Data represent information from nearest census...,Data represent information from nearest census...,Data represent information from nearest census...,,
1,total_population,2124.0,4328.0,2716.0,6986.0,5435.0,5555.0,6814.0,4759.0,5090.0,...,4593.0,6519.0,8359.0,4796.0,5080.0,4135.0,5720.0,5.0,total_population is the total population of th...,U.S. Census Demographic and Housing Characteri...
2,pct_under_5,6.36,8.78,10.01,8.56,7.73,7.78,7.65,6.6,7.41,...,5.83,6.12,4.93,4.27,5.61,4.55,4.84,0.0,pct_under_5 is the percentage of the populatio...,U.S. Census Demographic and Housing Characteri...
3,pct_under_18,20.9,28.6,31.3,26.15,27.34,26.95,27.31,23.28,26.4,...,24.25,23.24,26.55,21.62,27.95,21.57,23.99,0.0,pct_under_18 is the percentage of the populati...,U.S. Census Demographic and Housing Characteri...
4,pct_65_plus,11.53,8.27,8.91,9.98,8.72,12.64,10.74,9.33,7.11,...,12.67,10.2,13.59,19.95,14.53,19.52,15.19,40.0,pct_65_plus is the percentage of the populatio...,U.S. Census Demographic and Housing Characteri...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65,pct_insured_low,78.5,83.4,81.6,73.0,86.1,85.5,77.5,75.5,81.1,...,79.4,83.1,89.1,93.0,83.1,88.4,93.0,,pct_insured_low is the lower bound of the 90% ...,U.S. Census American Community Survey 5-year e...
66,pct_insured_high,92.1,97.4,95.0,86.4,93.5,95.1,90.1,89.9,92.1,...,95.2,94.1,95.9,97.8,95.5,99.6,99.0,,pct_insured_high is the upper bound of the 90%...,U.S. Census American Community Survey 5-year e...
67,pct_uninsured,14.7,9.6,11.7,20.3,10.2,9.7,16.2,17.3,13.4,...,12.7,11.4,7.5,4.6,10.7,6.0,4.0,,pct_uninsured is the percentage of the populat...,U.S. Census American Community Survey 5-year e...
68,pct_uninsured_low,7.9,2.6,5.0,13.6,6.5,4.9,9.9,10.1,7.9,...,4.8,5.9,4.1,2.2,4.5,0.4,1.0,,pct_uninsured_low is the lower bound of the 90...,U.S. Census American Community Survey 5-year e...


In [13]:
# then open the output CSV and add a metadata string as header for the CSV
# and save the dataframe to CSV
out_filepath = (
    "/Users/joshpaul/epa-justice/repo/epa-justice/tbl/anc_area_data_to_export.csv"
)

header = "# Location: Anchorage Area\n# Demographic and health data for individual neighborhoods of Anchorage."

with open(out_filepath, "w") as file:
    file.write(header + "\n")
    results.to_csv(file, index=False)