# US Demographics Data

## Imports

In [None]:
import pandas as pd
import seaborn as sns
import ipyleaflet as ipyl
import geopandas as gpd
import matplotlib
import ipywidgets as widgets
from ipywidgets import HTML
from matplotlib import pyplot as plt
import numpy as np
from mpl_toolkits.axes_grid1 import make_axes_locatable
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from shapely.geometry import Polygon
from matplotlib.colors import LogNorm
from shapely.geometry import mapping, MultiPolygon, Polygon
from branca.colormap import linear, LinearColormap
import folium

## Load Demographics Data

Taken from the National Bureau of Economic Research Census data: 

https://data.nber.org/census/popest/countypopmonthasrh.csv

In [None]:
data = pd.read_csv('countypopmonthasrh.csv')

## Load County Shapes

Taken from the 2017 census TIGER files:

https://www2.census.gov/geo/tiger/TIGER2017/COUNTY/tl_2017_us_county.zip

In [None]:
county_shapes = gpd.read_file('tl_2017_us_county').dropna(subset=['GEOID'])
county_shapes.GEOID = county_shapes.GEOID.astype(np.int64)
county_shapes.geometry = county_shapes.geometry.simplify(.02)

In [None]:
data = gpd.GeoDataFrame(data.merge(county_shapes[['GEOID', 'geometry']], left_on='county', right_on='GEOID'))
data.crs = county_shapes.crs

## Preparing Data for 2016

In [None]:
data_2016 = data[data.year == 2016]
grouped = data_2016.groupby('county')
data_2016_agg = gpd.GeoDataFrame(grouped.sum())
data_2016_agg['stname'] = grouped.stname.first()
data_2016_agg['ctyname'] = grouped.ctyname.first()
data_2016_agg['geometry'] = grouped.geometry.first()
data_2016_agg.reset_index(inplace=True)
data_2016_agg.crs = data.crs

data_2016_agg['female_percentage'] = np.round(data_2016_agg.tot_female / data_2016_agg.tot_pop * 100, 2)

## Plotting Function

In [None]:
def bbox_to_polyon(min_x, min_y, max_x, max_y):
    return Polygon([(min_x, min_y), (max_x, min_y), (max_x, max_y), (min_x, max_y)])

def plot_geo_df(geo_df, value_col, cmap='RdBu', vmin=0, vmax=1, ax=None, cax=None, legend=False,
               plot_kwargs = {}):
    if ax is None:
        fig, ax = plt.subplots()
        
    if legend and cax is None:
        divider = make_axes_locatable(ax)
        cax = divider.append_axes("right", size="2%", pad=0.1)

    geo_df.plot(value_col, cmap=cmap, vmin=vmin, vmax=vmax, ax=ax, legend=legend, cax=cax, **plot_kwargs)
    geo_df.boundary.plot(ax=ax, color='black', linewidth=.25, alpha = .25)
    ax.axis('off')

In [None]:
def plot_us_data(geo_df, column, title, center=50, cmap='RdBu_r', logarithmic = False):
    fig, ax1 = plt.subplots(1, figsize=(20,20), dpi=200)
    fig.patch.set_facecolor('#f5f5f5'), fig.patch.set_alpha(1)
    ax1.set_title(title)
    divider = make_axes_locatable(ax1)
    cax = divider.append_axes("right", size="1.5%", pad=0.1)
    ax2 = inset_axes(ax1, width="30%", height="30%", loc=3)

    alaska = geo_df[geo_df.geometry.within(bbox_to_polyon(-179, 50, -125, 72))]
    continental_us = geo_df[geo_df.geometry.within(bbox_to_polyon(-126, 23, -66,  50))]

    if center is None:
        vmin, vmax = geo_df[column].min(), geo_df[column].max()
    else:
        rng = np.max(np.abs(geo_df[column] - center))
        vmin, vmax = center-rng, center+rng
    
    plot_kwargs = dict(norm=LogNorm(vmin=vmin, vmax=vmax)) if logarithmic else {}
    plot_geo_df(continental_us, column, ax=ax1, legend=True, cax=cax, vmin=vmin, vmax=vmax, cmap=cmap,
               plot_kwargs=plot_kwargs)
    plot_geo_df(alaska, column, ax=ax2, vmin=vmin, vmax=vmax, cmap=cmap,
               plot_kwargs=plot_kwargs)

## Percentage of female population

In [None]:
plot_us_data(data_2016_agg, 'female_percentage', 'Percentage of female population in US counties')

## Population

In [None]:
plot_us_data(data_2016_agg, 'tot_pop', 'Population per US county', cmap='viridis', center=None, logarithmic=True)

## Interactive

In [None]:
def get_choropleth_layer(geo_df: gpd.GeoDataFrame, choro_col: str, name_col: str, 
                         colormap, name: str = '', vmin=0, vmax=1, id_col: str='county'):

    def create_geo_data(df: gpd.GeoDataFrame, name_col: str):
        return ipyl.GeoData(geo_dataframe=df).data
    
    def get_style(feature):
        return {'fillColor': colormap(feature['properties'][choro_col]), 'color': 'grey', 'weight': .1, 
                'lineOpacity': .4, 'fillOpacity': .8}
    
    def get_hover_style(feature):
        return {'fillColor': colormap(feature['properties'][choro_col]), 
                'lineOpacity': 1, 'fillOpacity': 1}

    geo_json_data = create_geo_data(geo_df, name_col = name_col)
    tooltip=folium.features.GeoJsonTooltip(fields=[name_col, choro_col],
            aliases=[name_col.replace('_', ' '), choro_col.replace('_', ' ')])
    quantiles = np.quantile(geo_df[choro_col], (0, 0.01, .1, .25, .5, .75, .9, .99, 1))
    
    layer = folium.GeoJson(ipyl.GeoData(geo_dataframe=geo_df).data, highlight_function=get_hover_style,
        name=name, smooth_factor=0, tooltip=tooltip, style_function=get_style
    )


    return layer

def plot_us_data_map(df, choro_column = 'female_percentage', layers_column = 'year', m=None,
                    vmin = 0, vmax=1):
    if m is None:
        m = folium.Map(location=[48, -102], zoom_start=3, control_scale=True)
        
    colormap = LinearColormap(colors=['darkblue', 'white', 'darkred'], index=[vmin, (vmax+vmin)/2, vmax],
                              vmin=vmin,vmax=vmax)
        
    for layer_val in df[layers_column].unique():
        layer_df = df[df[layers_column] == layer_val]
        
        grouped = layer_df.groupby('county')
        layer_df_agg = gpd.GeoDataFrame(grouped.sum())
        layer_df_agg['ctyname'] = grouped.ctyname.first()
        layer_df_agg['geometry'] = grouped.geometry.first()

        layer_df_agg['female_percentage'] = np.round(layer_df_agg.tot_female / layer_df_agg.tot_pop * 100, 2)
        
        layer_df_agg.crs = df.crs
        layer_df_agg.reset_index(inplace=True)
        
        layer = get_choropleth_layer(layer_df_agg, choro_col=choro_column, name_col = 'ctyname',
                                    name = '{} {}'.format(layers_column, layer_val).replace('_', ' '),
                                    colormap=colormap, vmin=vmin, vmax=vmax)
        layer.show= layer_val == df[layers_column].values[0]
        layer.add_to(m)
        
    m.add_child(colormap)
    folium.LayerControl().add_to(m)
    
    return m

In [None]:
data_agg_year = data_2016.groupby('county').sum()
values = data_agg_year.tot_female / data_agg_year.tot_pop * 100
vmin, vmax = values.agg([np.min, np.max]).values
rng = np.max(np.abs(values-50))
m_2016 = plot_us_data_map(data_2016, vmin=50-rng, vmax=50+rng)

In [None]:
m_2016.save('gender_balance_2016.html')

In [None]:
data_agg_year = data.groupby(['county', 'year']).sum()
values = data_agg_year.tot_female / data_agg_year.tot_pop * 100
vmin, vmax = values.agg([np.min, np.max]).values
rng = np.max(np.abs(values-50))
m_years = plot_us_data_map(data, vmin=50-rng, vmax=50+rng)

In [None]:
m_years.save('gender_balance.html')