In [42]:
## Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import datetime as dt
import plotly.express as px
from capstone_functions import *

## Set options for displaying Pandas DataFrames
pd.options.display.max_columns = 50
pd.options.display.max_rows = 100

In [43]:
## Import and preview age demographics dataset
age_df = pd.read_csv('Data/NC_UC_2019_demog/age_demog_2019.csv')
age_df.head()

Unnamed: 0,geoid,name,B01001001,"B01001001, Error",B01001002,"B01001002, Error",B01001003,"B01001003, Error",B01001004,"B01001004, Error",B01001005,"B01001005, Error",B01001006,"B01001006, Error",B01001007,"B01001007, Error",B01001008,"B01001008, Error",B01001009,"B01001009, Error",B01001010,"B01001010, Error",B01001011,"B01001011, Error",B01001012,...,"B01001037, Error",B01001038,"B01001038, Error",B01001039,"B01001039, Error",B01001040,"B01001040, Error",B01001041,"B01001041, Error",B01001042,"B01001042, Error",B01001043,"B01001043, Error",B01001044,"B01001044, Error",B01001045,"B01001045, Error",B01001046,"B01001046, Error",B01001047,"B01001047, Error",B01001048,"B01001048, Error",B01001049,"B01001049, Error"
0,01000US,United States,328239523,0,161588973,34808,9938937,19654,10033518,51101,10987313,52483,6361859,15524,4541794,21312,2318283,27895,2257008,29886,6439169,32364,11817829,21300,11281470,...,39187,10157911,37688,10318659,18352,10389086,14048,11043795,38535,4493986,30201,6438897,36345,3916612,27886,5319290,26937,7618630,33692,5392000,29716,3709078,23709,4074137,26417
1,04000US37,North Carolina,10488084,0,5094327,7768,308544,5127,315965,7406,346689,7366,200951,4946,158488,5142,73745,4629,77086,5038,214451,7473,352172,4854,333734,...,8038,333367,8707,355133,4280,343563,3920,367738,7319,146356,5689,199672,6611,130484,5055,175237,5063,264193,5900,181367,5242,116777,4792,117196,5060
2,05000US37179,"Union County, NC",239859,0,117578,758,6556,591,7489,967,11211,1046,6620,494,3655,738,1932,814,2816,1070,2892,529,5475,698,5731,...,1311,8883,1301,10090,362,8939,122,8046,883,2077,700,4320,959,2292,648,3007,768,4990,947,3139,598,2242,530,1846,530


In [44]:
error_cols = []
for col in age_df.columns:
    if 'Error' in col:
        error_cols.append(col)
        
age_df.drop(columns=error_cols, inplace=True)
age_df.head()

Unnamed: 0,geoid,name,B01001001,B01001002,B01001003,B01001004,B01001005,B01001006,B01001007,B01001008,B01001009,B01001010,B01001011,B01001012,B01001013,B01001014,B01001015,B01001016,B01001017,B01001018,B01001019,B01001020,B01001021,B01001022,B01001023,...,B01001025,B01001026,B01001027,B01001028,B01001029,B01001030,B01001031,B01001032,B01001033,B01001034,B01001035,B01001036,B01001037,B01001038,B01001039,B01001040,B01001041,B01001042,B01001043,B01001044,B01001045,B01001046,B01001047,B01001048,B01001049
0,01000US,United States,328239523,161588973,9938937,10033518,10987313,6361859,4541794,2318283,2257008,6439169,11817829,11281470,10892040,10028675,10079567,10075795,10440265,4168435,5882735,3538792,4652319,6529918,4367764,...,2284092,166650550,9465898,9656919,10436166,6087175,4362696,2162759,2117048,6174413,11415470,11063706,10836219,10157911,10318659,10389086,11043795,4493986,6438897,3916612,5319290,7618630,5392000,3709078,4074137
1,04000US37,North Carolina,10488084,5094327,308544,315965,346689,200951,158488,73745,77086,214451,352172,333734,315351,322114,337091,327413,332743,131308,180801,110628,145759,221189,138247,...,67098,5393757,288039,299578,339998,194208,159419,71652,63307,188887,362854,352903,341829,333367,355133,343563,367738,146356,199672,130484,175237,264193,181367,116777,117196
2,05000US37179,"Union County, NC",239859,117578,6556,7489,11211,6620,3655,1932,2816,2892,5475,5731,7105,9715,8980,9121,8451,3098,3594,1532,2719,3938,2529,...,923,122281,7135,8229,9994,6097,4356,1656,1116,4010,5749,5892,8176,8883,10090,8939,8046,2077,4320,2292,3007,4990,3139,2242,1846


In [45]:
age_meta = pd.read_json('Data/NC_UC_2019_demog/age_metadata.json', typ='series')
age_meta

release    {'id': 'acs2019_1yr', 'name': 'ACS 2019 1-year...
tables     {'B01001': {'columns': {'B01001001': {'indent'...
dtype: object

In [46]:
col_names = []
for key in age_meta['tables']['B01001']['columns'].keys():
    col_names.append(age_meta['tables']['B01001']['columns'][key]['name'])
col_names

['Total:',
 'Male:',
 'Under 5 years',
 '5 to 9 years',
 '10 to 14 years',
 '15 to 17 years',
 '18 and 19 years',
 '20 years',
 '21 years',
 '22 to 24 years',
 '25 to 29 years',
 '30 to 34 years',
 '35 to 39 years',
 '40 to 44 years',
 '45 to 49 years',
 '50 to 54 years',
 '55 to 59 years',
 '60 and 61 years',
 '62 to 64 years',
 '65 and 66 years',
 '67 to 69 years',
 '70 to 74 years',
 '75 to 79 years',
 '80 to 84 years',
 '85 years and over',
 'Female:',
 'Under 5 years',
 '5 to 9 years',
 '10 to 14 years',
 '15 to 17 years',
 '18 and 19 years',
 '20 years',
 '21 years',
 '22 to 24 years',
 '25 to 29 years',
 '30 to 34 years',
 '35 to 39 years',
 '40 to 44 years',
 '45 to 49 years',
 '50 to 54 years',
 '55 to 59 years',
 '60 and 61 years',
 '62 to 64 years',
 '65 and 66 years',
 '67 to 69 years',
 '70 to 74 years',
 '75 to 79 years',
 '80 to 84 years',
 '85 years and over']

In [47]:
age_df.columns = ['geoid', 'name'] + col_names
age_df.drop(columns=['geoid', 'Total:'], inplace=True)
age_df.head()

Unnamed: 0,name,Male:,Under 5 years,5 to 9 years,10 to 14 years,15 to 17 years,18 and 19 years,20 years,21 years,22 to 24 years,25 to 29 years,30 to 34 years,35 to 39 years,40 to 44 years,45 to 49 years,50 to 54 years,55 to 59 years,60 and 61 years,62 to 64 years,65 and 66 years,67 to 69 years,70 to 74 years,75 to 79 years,80 to 84 years,85 years and over,Female:,Under 5 years.1,5 to 9 years.1,10 to 14 years.1,15 to 17 years.1,18 and 19 years.1,20 years.1,21 years.1,22 to 24 years.1,25 to 29 years.1,30 to 34 years.1,35 to 39 years.1,40 to 44 years.1,45 to 49 years.1,50 to 54 years.1,55 to 59 years.1,60 and 61 years.1,62 to 64 years.1,65 and 66 years.1,67 to 69 years.1,70 to 74 years.1,75 to 79 years.1,80 to 84 years.1,85 years and over.1
0,United States,161588973,9938937,10033518,10987313,6361859,4541794,2318283,2257008,6439169,11817829,11281470,10892040,10028675,10079567,10075795,10440265,4168435,5882735,3538792,4652319,6529918,4367764,2671396,2284092,166650550,9465898,9656919,10436166,6087175,4362696,2162759,2117048,6174413,11415470,11063706,10836219,10157911,10318659,10389086,11043795,4493986,6438897,3916612,5319290,7618630,5392000,3709078,4074137
1,North Carolina,5094327,308544,315965,346689,200951,158488,73745,77086,214451,352172,333734,315351,322114,337091,327413,332743,131308,180801,110628,145759,221189,138247,82760,67098,5393757,288039,299578,339998,194208,159419,71652,63307,188887,362854,352903,341829,333367,355133,343563,367738,146356,199672,130484,175237,264193,181367,116777,117196
2,"Union County, NC",117578,6556,7489,11211,6620,3655,1932,2816,2892,5475,5731,7105,9715,8980,9121,8451,3098,3594,1532,2719,3938,2529,1496,923,122281,7135,8229,9994,6097,4356,1656,1116,4010,5749,5892,8176,8883,10090,8939,8046,2077,4320,2292,3007,4990,3139,2242,1846


In [48]:
col_names = []
for col in age_df.columns:
    col = col.lower()
    col = col.replace(':', '')
    col = col.replace(' ', '_')
    col_names.append(col)
col_names

['name',
 'male',
 'under_5_years',
 '5_to_9_years',
 '10_to_14_years',
 '15_to_17_years',
 '18_and_19_years',
 '20_years',
 '21_years',
 '22_to_24_years',
 '25_to_29_years',
 '30_to_34_years',
 '35_to_39_years',
 '40_to_44_years',
 '45_to_49_years',
 '50_to_54_years',
 '55_to_59_years',
 '60_and_61_years',
 '62_to_64_years',
 '65_and_66_years',
 '67_to_69_years',
 '70_to_74_years',
 '75_to_79_years',
 '80_to_84_years',
 '85_years_and_over',
 'female',
 'under_5_years',
 '5_to_9_years',
 '10_to_14_years',
 '15_to_17_years',
 '18_and_19_years',
 '20_years',
 '21_years',
 '22_to_24_years',
 '25_to_29_years',
 '30_to_34_years',
 '35_to_39_years',
 '40_to_44_years',
 '45_to_49_years',
 '50_to_54_years',
 '55_to_59_years',
 '60_and_61_years',
 '62_to_64_years',
 '65_and_66_years',
 '67_to_69_years',
 '70_to_74_years',
 '75_to_79_years',
 '80_to_84_years',
 '85_years_and_over']