In [1]:
import pandas as pd
import numpy as np

In [None]:
fips_data = pd.read_csv('./data/counties.csv')
zip_data = pd.read_csv('./data/uszips.csv')

zip2fips = {}
for idx, row in zip_data.iterrows():
    zip2fips[row['zip']] = row['county_fips']
fips2name = {}
for idx, row in zip_data.iterrows():
    fips2name[row['county_fips']] = '%s, %s'%(row['county_name'], row['state_name'])
fips2state = {}
for idx, row in zip_data.iterrows():
    fips2state[row['county_fips']] = '%s'%(row['state_name'])

In [3]:
print(fips_data.columns)

Index(['FIPS', 'State', 'Area_Name', 'Rural-urban_Continuum Code_2013',
       'Urban_Influence_Code_2013', 'Economic_typology_2015',
       'POP_ESTIMATE_2018', 'N_POP_CHG_2018', 'Births_2018', 'Deaths_2018',
       ...
       'Total number of UCR (Uniform Crime Report) Index crimes excluding arson.',
       'Total number of UCR (Uniform Crime Report) index crimes reported including arson',
       'MURDER', 'RAPE', 'ROBBERY', 'Number of AGGRAVATED ASSAULTS', 'BURGLRY',
       'LARCENY', 'MOTOR VEHICLE THEFTS', 'ARSON'],
      dtype='object', length=347)


In [14]:
# Race ratio in the fips data
race_cols = ['WA_MALE', 'WA_FEMALE', 'BA_MALE', 'BA_FEMALE', 'IA_MALE', 'IA_FEMALE', 'AA_MALE', 'AA_FEMALE', 'NA_MALE', 'NA_FEMALE']

fips2demo = {}
for idx, row in fips_data.iterrows():
    fips = row['FIPS']
    if pd.isna(row['TOT_FEMALE']):
        continue
    fips2demo[fips] = {}
    fips2demo[fips]['white'] = row['WA_MALE'] + row['WA_FEMALE']
    fips2demo[fips]['black'] = row['BA_MALE'] + row['BA_FEMALE']
    fips2demo[fips]['indian'] = row['IA_MALE'] + row['IA_FEMALE']
    fips2demo[fips]['asian'] = row['AA_MALE'] + row['AA_FEMALE']
    fips2demo[fips]['native'] = row['NA_MALE'] + row['NA_FEMALE']
    
    fips2demo[fips]['population'] = row['TOT_MALE'] + row['TOT_FEMALE']
    
    fips2demo[fips]['male'] = row['TOT_MALE']
    fips2demo[fips]['female'] = row['TOT_FEMALE']
    
    fips2demo[fips]['nonhis'] = row['NH_MALE'] + row['NH_FEMALE']
    fips2demo[fips]['his'] = row['H_MALE'] + row['H_FEMALE']
  


In [15]:
national_average = {'gender': [0, 0], 'race': [0, 0, 0, 0, 0], 'ethnicity': [0, 0]}
total_fips = len(fips2demo)

gender_ratio_sum = [0, 0]  # [male, female]
race_ratio_sum = [0, 0, 0, 0, 0]  # [white, black, indian, asian, native]
ethnicity_ratio_sum = [0, 0]  # [nonhis, his]

for fips in fips2demo:
    # Calculate gender ratios
    male_ratio = fips2demo[fips]['male'] / fips2demo[fips]['population']
    female_ratio = fips2demo[fips]['female'] / fips2demo[fips]['population']
    gender_ratio_sum[0] += male_ratio
    gender_ratio_sum[1] += female_ratio

    # Calculate race ratios
    white_ratio = fips2demo[fips]['white'] / fips2demo[fips]['population']
    black_ratio = fips2demo[fips]['black'] / fips2demo[fips]['population']
    indian_ratio = fips2demo[fips]['indian'] / fips2demo[fips]['population']
    asian_ratio = fips2demo[fips]['asian'] / fips2demo[fips]['population']
    native_ratio = fips2demo[fips]['native'] / fips2demo[fips]['population']
    race_ratio_sum[0] += white_ratio
    race_ratio_sum[1] += black_ratio
    race_ratio_sum[2] += indian_ratio
    race_ratio_sum[3] += asian_ratio
    race_ratio_sum[4] += native_ratio

    # Calculate ethnicity ratios
    nonhis_ratio = fips2demo[fips]['nonhis'] / fips2demo[fips]['population']
    his_ratio = fips2demo[fips]['his'] / fips2demo[fips]['population']
    ethnicity_ratio_sum[0] += nonhis_ratio
    ethnicity_ratio_sum[1] += his_ratio

# Calculate national averages
national_average['gender'][0] = gender_ratio_sum[0] / total_fips  # Male
national_average['gender'][1] = gender_ratio_sum[1] / total_fips  # Female

national_average['race'][0] = race_ratio_sum[0] / total_fips  # White
national_average['race'][1] = race_ratio_sum[1] / total_fips  # Black
national_average['race'][2] = race_ratio_sum[2] / total_fips  # Indian
national_average['race'][3] = race_ratio_sum[3] / total_fips  # Asian
national_average['race'][4] = race_ratio_sum[4] / total_fips  # Native

national_average['ethnicity'][0] = ethnicity_ratio_sum[0] / total_fips  # Non-Hispanic
national_average['ethnicity'][1] = ethnicity_ratio_sum[1] / total_fips  # Hispanic

print(national_average)
fips2demo['national_average'] = national_average


for fips in fips2demo:
    # Convert to percentage
    if fips == 'national_average':
        continue
    fips2demo[fips]['white'] /= fips2demo[fips]['population']
    fips2demo[fips]['black'] /= fips2demo[fips]['population']
    fips2demo[fips]['indian'] /= fips2demo[fips]['population']
    fips2demo[fips]['asian'] /= fips2demo[fips]['population']
    fips2demo[fips]['native'] /= fips2demo[fips]['population']
    
    fips2demo[fips]['male'] /= fips2demo[fips]['population']
    fips2demo[fips]['female'] /= fips2demo[fips]['population']
    
    fips2demo[fips]['nonhis'] /= fips2demo[fips]['population']
    fips2demo[fips]['his'] /= fips2demo[fips]['population']

{'gender': [0.5011467794435968, 0.4988532205564033], 'race': [0.8446502328745275, 0.09343970569296889, 0.02353693830723962, 0.015703300379485082, 0.0014150119541177938], 'ethnicity': [0.9036306149630053, 0.09636938503699233]}


In [None]:
import pickle
pickle.dump(fips2demo, open('./data/fips2demo.pkl', 'wb'))
pickle.dump(fips2name, open('./data/fips2name.pkl', 'wb'))
pickle.dump(fips2state, open('./data/fips2state.pkl', 'wb'))
pickle.dump(zip2fips, open('./data/zip2fips.pkl', 'wb'))