## Imports

In [1]:
import numpy as np
import pandas as pd
import requests
from collections import OrderedDict

import plotly.figure_factory as ff
from plotly import offline
from plotly.graph_objs import *

pd.options.display.max_columns = 100

# Key is required to access census data
with open("./Census Key.txt", "r") as f:
    key = f.read()

## Functions

In [2]:
# Small proof of concept
fips = ["06021", "06023", "06027",
        "06029", "06033", "06059",
        "06047", "06049", "06051",
        "06055", "06061"]
values = range(len(fips))

fig = ff.create_choropleth(fips=fips, values=values)
fig.layout.template = None

fig.layout.plot_bgcolor = "#fff"
fig.layout.paper_bgcolor = "#fff"
# offline.plot(fig, filename="temp_out.html")
# fig.show()

In [3]:
def json_to_dataframe(response):
    """
        Convert json response to dataframe.
    """
    return pd.DataFrame(response.json()[1:], columns=response.json()[0])

In [4]:
def column_to_int(df, *column_names):
    """
        Convert data type to int.
    """
    for name in column_names:
        df[name] = df[name].astype(int)
        
    return df

In [5]:
def get_endpoints(df, var, num_endpoints=5):
    """ Automatically determine bins for plot legend. """
    endpoints = []
    step = round(1 / (num_endpoints + 2), 3)
    
    quant = step
    for i in range(num_endpoints):
        print(quant)
        endpoints.append(round(df[var].quantile(quant), 3))
        quant += step
        
    return endpoints

In [6]:
def county_map(df, var, endpoints=None):
    """ Generate county map. """
    
    fips = df.index
    values = df[var]
    if endpoints is None:
        endpoints = get_endpoints(df, var)
    
    # Blue
    colorscale = [
        "rgb(220, 220, 220)",
        "rgb(218,218,230)",
        "rgb(185, 185, 220)",
        "rgb(144,148,194)",
        "rgb(101,104,168)",
        "rgb(65, 53, 132)"
    ]
    
    # Red
#     colorscale = [
#         "rgb(220, 220, 220)",
#         "rgb(250, 210, 216)",
#         "rgb(245, 183, 177)",
#         "rgb(236, 112, 99)",
#         "rgb(203, 67, 53)",
#         "rgb(148, 49, 38)"
#     ]

    # Create plot
    fig = ff.create_choropleth(
        fips=fips, values=values,
        binning_endpoints=endpoints, colorscale=colorscale,
        state_outline={"color": "rgb(0, 0, 0)", "width": 1},
        county_outline={"color": "rgb(255,255,255)", "width": 0.5}, round_legend_values=False,
        legend_title=f"{var} ({year})", title=f"{var} by County ({year})",
        show_state_data=True,
        show_hover=True,
        
    )

#     config = dict({"scrollZoom": True})
    fig.layout.template = None

    # Set background
    fig.layout.plot_bgcolor = "#fff"
    fig.layout.paper_bgcolor = "#fff"

    # Output plot (include_plotlyjs required for hover info)
    offline.plot(fig, filename="temp_out.html", include_plotlyjs="https://cdn.plot.ly/plotly-1.42.3.min.js")

## Main

In [7]:
# Pull down data from census
year = "2019"

var_dict = OrderedDict({
    "NAME": "Full Name", 
    "B01001_002E": "Total Male",
    "B01001B_002E": "Black Men",
    "B01001B_012E": "Black Men 45-54",
    "B01001B_013E": "Black Men 55-64",
    "C23002B_010E": "Black Men Over 64", 
    "B01001_001E": "Total Pop", 
    "B02001_003E": "Total Black",
    "B02009_001E": "Partial Black",
    "B19013_001E": "Median household income",
    "B19301_001E": "Per capita income",
    "B08014_002E": "No vehicle",
    "B08014_009E": "Men no vehicle",
    "B08101_025E": "Public trans",
    "B27002_018E": "45-54 Male Priv HI Total",
    "B27002_021E": "55-64 Male Priv HI Total",
    "B27002_024E": "65-74 Male Priv HI Total",
    "B27002_027E": "75+ Male Priv HI Total",
    "B27002_019E": "45-54 Male with Priv",
    "B27002_022E": "55-64 Male with Priv",
    "B27002_025E": "65-74 Male with Priv",
    "B27002_028E": "75+ Male with Priv",
    "B27003_019E": "45-54 Male with Pub",
    "B27003_022E": "55-64 Male with Pub",
    "B27003_025E": "65-74 Male with Pub",
    "B27003_028E": "75+ Male with Pub",
    "B19001B_001E": "Black Householders",
    "B19001B_002E": "Black Householders less than 10K",
    "B19001B_003E": "Black Householders 10-15K",
    "B19001B_004E": "Black Householders 15-20K",
    "B19001B_005E": "Black Householders 20-25K", 
    "B19001B_006E": "Black Householders 25-30K",
    "B19001B_007E": "Black Householders 30-35K",
    "B19001B_008E": "Black Householders 35-40K",
    "B19037B_037E": "B19037B_037E",
    "B19037B_038E": "B19037B_038E",
    "B19037B_039E": "B19037B_039E",
    "B19037B_040E": "B19037B_040E",
    "B19037B_041E": "B19037B_041E",
    "B19037B_042E": "B19037B_042E",
    "B19037B_043E": "B19037B_043E",
    "B19037B_054E": "B19037B_054E",
    "B19037B_055E": "B19037B_055E",
    "B19037B_056E": "B19037B_056E",
    "B19037B_057E": "B19037B_057E",
    "B19037B_058E": "B19037B_058E",
    "B19037B_059E": "B19037B_059E",
    "B19037B_060E": "B19037B_060E",
})

var_string = ",".join(var_dict.keys())

url = f"https://api.census.gov/data/{year}/acs/acs5/?get={var_string}&for=county:*&in=state:*&key={key}"

response = requests.request("GET", url)

df = json_to_dataframe(response)
df = df.rename(columns=var_dict)


# Get "FIPS" code for state/county 
df["code"] = df["state"] + df["county"]
df = df.set_index("code")

# Split county and state names
temp = df["Full Name"].str.split(",", expand=True)
df["County Name"] = temp[0]
df["State Name"] = temp[1].str.strip()

In [8]:
# Process data
df = column_to_int(df, df.columns[1:-2])

# Convert to int64 to avoid overflow error
df["Total Pop"] = df["Total Pop"].astype(np.int64)
df["Per capita income"] = df["Per capita income"].astype(np.int64)

df["Black Men Over 44"] = df["Black Men 45-54"] + df["Black Men 55-64"] + df["Black Men Over 64"]
df["Percent Black Men Over 44"] = (df["Black Men Over 44"] / df["Total Pop"]) * 100
df["Percent Black"] = (df["Total Black"] / df["Total Pop"]) * 100
df["Percent Partial Black"] = (df["Partial Black"] / df["Total Pop"]) * 100

df["Total income"] = (df["Total Pop"] * df["Per capita income"])

df["44+ Male Priv HI Total"] = df[["45-54 Male Priv HI Total", "55-64 Male Priv HI Total", "65-74 Male Priv HI Total", "75+ Male Priv HI Total"]].sum(axis=1)
df["44+ Male with Priv"] = df[["45-54 Male with Priv", "55-64 Male with Priv", "65-74 Male with Priv", "75+ Male with Priv"]].sum(axis=1)
df["44+ Male with Pub"] = df[["45-54 Male with Pub", "55-64 Male with Pub", "65-74 Male with Pub", "75+ Male with Pub",]].sum(axis=1)

df["Percent 44+ Men with Priv"] = df["44+ Male with Priv"] / df["44+ Male Priv HI Total"]
df["Percent 44+ Men with Pub"] = df["44+ Male with Pub"] / df["44+ Male Priv HI Total"]

df["Black Householders under 30K"] = (
    df["Black Householders less than 10K"] + 
    df["Black Householders 10-15K"] + 
    df["Black Householders 15-20K"] + 
    df["Black Householders 20-25K"] + 
    df["Black Householders 25-30K"]
)

df["Black Householders under 40K"] = (
    df["Black Householders less than 10K"] + 
    df["Black Householders 10-15K"] + 
    df["Black Householders 15-20K"] + 
    df["Black Householders 20-25K"] + 
    df["Black Householders 25-30K"] + 
    df["Black Householders 30-35K"] + 
    df["Black Householders 35-40K"]
)

df["Black Householders over 44 under 30K"] = (
    df["B19037B_037E"] + 
    df["B19037B_038E"] + 
    df["B19037B_039E"] + 
    df["B19037B_040E"] + 
    df["B19037B_041E"] + 

    df["B19037B_054E"] +
    df["B19037B_055E"] +
    df["B19037B_056E"] +
    df["B19037B_057E"] +
    df["B19037B_058E"]

)

df["Black Householders over 44 under 40K"] = (
    df["B19037B_037E"] + 
    df["B19037B_038E"] + 
    df["B19037B_039E"] + 
    df["B19037B_040E"] + 
    df["B19037B_041E"] + 
    df["B19037B_042E"] + 
    df["B19037B_043E"] + 
    
    df["B19037B_054E"] +
    df["B19037B_055E"] +
    df["B19037B_056E"] +
    df["B19037B_057E"] +
    df["B19037B_058E"] +
    df["B19037B_059E"] +
    df["B19037B_060E"]
)

In [9]:
thresh = 30
black_df = df[df["Percent Black"] > thresh]
other_df = df[df["Percent Black"] <= thresh]

## Population Distribution

In [130]:
county_map(df, "Percent Black", [5, 10, 15, 20, 25])

In [95]:
county_map(df, "Percent Black Men Over 44", [1, 2, 3, 4, 5])

In [96]:
county_map(df, "Total Black", [1000, 5000, 10000, 15000, 20000])

In [10]:
print("Black county count by state")
df[df["Percent Black"] > 30]["State Name"].value_counts()

Black county count by state


Georgia                 67
Mississippi             56
Louisiana               34
North Carolina          32
Virginia                31
Alabama                 27
South Carolina          27
Arkansas                18
Puerto Rico              8
Tennessee                5
Florida                  5
Maryland                 4
New York                 2
Illinois                 2
District of Columbia     1
Texas                    1
Pennsylvania             1
Missouri                 1
New Jersey               1
Michigan                 1
Name: State Name, dtype: int64

In [11]:
interest_states = ["Georgia", "Mississippi", "Louisiana", "North Carolina", "Virginia", "South Carolina", "Alabama", "Arkansas"]
percent_black_in_interest_states = round(df[df["State Name"].isin(interest_states)]["Total Black"].astype(int).sum() / df["Total Black"].sum() * 100, 2)
print(f"The top eight states above contain {percent_black_in_interest_states}% of the total U.S. Black population.")

The top eight states above contain 30.86% of the total U.S. Black population.


## Income

In [12]:
black_per_cap = round(black_df["Total income"].sum() / black_df["Total Pop"].sum())
other_per_cap = round(other_df["Total income"].sum() / other_df["Total Pop"].sum())
print(f"Black county per capita income: {black_per_cap}")
print(f"Other county per capita income: {other_per_cap}")

Black county per capita income: 29843
Other county per capita income: 34307


## Transportation

In [13]:
black_no_vehicle = round(black_df["No vehicle"].sum() / black_df["Total Pop"].sum() * 100, 3)
other_no_vehicle = round(other_df["No vehicle"].sum() / other_df["Total Pop"].sum() * 100, 3)

print(f"Black county workers with no vehichle access: {black_no_vehicle}%")
print(f"Other county workers with no vehichle access: {other_no_vehicle}%")

Black county workers with no vehichle access: 4.859%
Other county workers with no vehichle access: 1.682%


In [14]:
black_men_no_vehicle = round(black_df["Men no vehicle"].sum() / black_df["Total Male"].sum() * 100, 3)
other_men_no_vehicle = round(other_df["Men no vehicle"].sum() / other_df["Total Male"].sum() * 100, 3)

print(f"Black county MALE workers with no vehichle access: {black_men_no_vehicle}%")
print(f"Other county MALE workers with no vehichle access: {other_men_no_vehicle}%")

Black county MALE workers with no vehichle access: 4.751%
Other county MALE workers with no vehichle access: 1.787%


In [15]:
# The census website claims the overall figure should be 5% which doesn't match this finding. 
black_public_trans = round(black_df["Public trans"].sum() / black_df["Total Pop"].sum() * 100, 3)
other_public_trans = round(other_df["Public trans"].sum() / other_df["Total Pop"].sum() * 100, 3)

print(f"Black county taking public transit to work: {black_public_trans}%")
print(f"Other county taking public transit to work: {other_public_trans}%")

Black county taking public transit to work: 5.823%
Other county taking public transit to work: 1.974%


## Health Insurance

In [16]:
black_older_men_with_priv = round(black_df["Percent 44+ Men with Priv"].mean() * 100, 3)
other_older_men_with_priv = round(other_df["Percent 44+ Men with Priv"].mean() * 100, 3)
black_older_men_with_pub = round(black_df["Percent 44+ Men with Pub"].mean() * 100, 3)
other_older_men_with_pub = round(other_df["Percent 44+ Men with Pub"].mean() * 100, 3)

print(f"Black county men over 44 with private health insurance: {black_older_men_with_priv}%")
print(f"Other county men over 44 with private health insurance: {other_older_men_with_priv}%")
print(f"Black county men over 44 with public health insurance: {black_older_men_with_pub}%")
print(f"Other county men over 44 with public health insurance: {other_older_men_with_pub}%")

Black county men over 44 with private health insurance: 61.205%
Other county men over 44 with private health insurance: 68.542%
Black county men over 44 with public health insurance: 54.152%
Other county men over 44 with public health insurance: 51.039%


## Householder Incomes

In [17]:
print(f"Black householders making under 30K: {df['Black Householders under 30K'].sum()}")
print(f"Black householders making under 40K: {df['Black Householders under 40K'].sum()}")
print(f"Black householders older than 44 making under 30K: {df['Black Householders over 44 under 30K'].sum()}")
print(f"Black householders older than 44 making under 40K: {df['Black Householders over 44 under 40K'].sum()}")

Black householders making under 30K: 5649512
Black householders making under 40K: 7265827
Black householders older than 44 making under 30K: 3406973
Black householders older than 44 making under 40K: 4285536
