# Clean Census Data on Economic Characteristics

## Purpose:
* Clean census data, simplify to median income per FIPS code

## Dependencies

In [1]:
import pandas as pd

In [2]:
## Read in data
inc0 = pd.read_csv("data/raw/ACSDP5Y2018.DP03_data_with_overlays_2020-12-01T155502.csv")

inc0.head()


Unnamed: 0,GEO_ID,NAME,DP03_0001E,DP03_0001M,DP03_0001PE,DP03_0001PM,DP03_0002E,DP03_0002M,DP03_0002PE,DP03_0002PM,...,DP03_0135PE,DP03_0135PM,DP03_0136E,DP03_0136M,DP03_0136PE,DP03_0136PM,DP03_0137E,DP03_0137M,DP03_0137PE,DP03_0137PM
0,id,Geographic Area Name,Estimate!!EMPLOYMENT STATUS!!Population 16 yea...,Margin of Error!!EMPLOYMENT STATUS!!Population...,Percent Estimate!!EMPLOYMENT STATUS!!Populatio...,Percent Margin of Error!!EMPLOYMENT STATUS!!Po...,Estimate!!EMPLOYMENT STATUS!!Population 16 yea...,Margin of Error!!EMPLOYMENT STATUS!!Population...,Percent Estimate!!EMPLOYMENT STATUS!!Populatio...,Percent Margin of Error!!EMPLOYMENT STATUS!!Po...,...,Percent Estimate!!PERCENTAGE OF FAMILIES AND P...,Percent Margin of Error!!PERCENTAGE OF FAMILIE...,Estimate!!PERCENTAGE OF FAMILIES AND PEOPLE WH...,Margin of Error!!PERCENTAGE OF FAMILIES AND PE...,Percent Estimate!!PERCENTAGE OF FAMILIES AND P...,Percent Margin of Error!!PERCENTAGE OF FAMILIE...,Estimate!!PERCENTAGE OF FAMILIES AND PEOPLE WH...,Margin of Error!!PERCENTAGE OF FAMILIES AND PE...,Percent Estimate!!PERCENTAGE OF FAMILIES AND P...,Percent Margin of Error!!PERCENTAGE OF FAMILIE...
1,0500000US51001,"Accomack County, Virginia",26638,119,26638,(X),14503,707,54.4,2.6,...,10.5,2.6,(X),(X),16.1,2.8,(X),(X),34.0,6.0
2,0500000US51003,"Albemarle County, Virginia",87026,217,87026,(X),53848,870,61.9,1.0,...,4.8,1.1,(X),(X),6.9,1.5,(X),(X),20.1,2.1
3,0500000US51005,"Alleghany County, Virginia",12824,86,12824,(X),6421,249,50.1,1.9,...,11.1,2.9,(X),(X),17.1,3.3,(X),(X),30.7,4.7
4,0500000US51007,"Amelia County, Virginia",10402,150,10402,(X),6419,331,61.7,3.2,...,13.1,5.7,(X),(X),11.2,3.8,(X),(X),26.3,8.3


In [3]:
# DP03_0062E: Median Income
# DP03_0063E: Mean Income

inc1 = inc0[['GEO_ID','NAME','DP03_0062E','DP03_0063E']].drop(inc0.index[0]).copy()

inc1.head()

Unnamed: 0,GEO_ID,NAME,DP03_0062E,DP03_0063E
1,0500000US51001,"Accomack County, Virginia",43210,57996
2,0500000US51003,"Albemarle County, Virginia",75394,107948
3,0500000US51005,"Alleghany County, Virginia",47794,60513
4,0500000US51007,"Amelia County, Virginia",58526,74185
5,0500000US51009,"Amherst County, Virginia",49170,61676


In [4]:
## Convert GEO_ID to FIPS
inc1['FIPS'] = inc1['GEO_ID'].str.strip().str[-5:]

inc1.head()

Unnamed: 0,GEO_ID,NAME,DP03_0062E,DP03_0063E,FIPS
1,0500000US51001,"Accomack County, Virginia",43210,57996,51001
2,0500000US51003,"Albemarle County, Virginia",75394,107948,51003
3,0500000US51005,"Alleghany County, Virginia",47794,60513,51005
4,0500000US51007,"Amelia County, Virginia",58526,74185,51007
5,0500000US51009,"Amherst County, Virginia",49170,61676,51009


In [5]:
## Standardize county names
inc1['Locality'] = inc1['NAME'].str.replace(", Virginia","")

inc1.head()

Unnamed: 0,GEO_ID,NAME,DP03_0062E,DP03_0063E,FIPS,Locality
1,0500000US51001,"Accomack County, Virginia",43210,57996,51001,Accomack County
2,0500000US51003,"Albemarle County, Virginia",75394,107948,51003,Albemarle County
3,0500000US51005,"Alleghany County, Virginia",47794,60513,51005,Alleghany County
4,0500000US51007,"Amelia County, Virginia",58526,74185,51007,Amelia County
5,0500000US51009,"Amherst County, Virginia",49170,61676,51009,Amherst County


In [6]:
## Descriptive statistics of median income
pd.to_numeric(inc1['DP03_0062E']).describe().T


count       133.000000
mean      58501.293233
std       21136.348958
min       28071.000000
25%       43532.000000
50%       53797.000000
75%       68438.000000
max      136268.000000
Name: DP03_0062E, dtype: float64

In [7]:
## Descriptive statistics of mean income
pd.to_numeric(inc1['DP03_0063E']).describe().T

count       133.000000
mean      75720.714286
std       25096.787058
min       43968.000000
25%       57996.000000
50%       67559.000000
75%       88006.000000
max      181724.000000
Name: DP03_0063E, dtype: float64

In [8]:
## Export for review in Excel
pop0 = pd.read_csv("data/raw/co-est2019-annres-51.csv")

pop0.head()

Unnamed: 0,Locality,Census,Estimates Base,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,Virginia,8001024,8001049,8023699,8101155,8185080,8252427,8310993,8361808,8410106,8463587,8501286,8535519
1,".Accomack County, Virginia",33164,33162,33148,33225,33268,32969,32971,32914,32871,32685,32581,32316
2,".Albemarle County, Virginia",98970,98998,99204,100213,101464,102270,103732,105117,106431,107768,108377,109330
3,".Alleghany County, Virginia",16250,16264,16203,16163,16034,15923,15605,15425,15387,15128,14986,14860
4,".Amelia County, Virginia",12690,12695,12746,12752,12745,12656,12713,12784,12801,12980,13057,13145


In [9]:
## Remove first observation representing the state
pop1 = pop0.iloc[1:].copy()

pop1.head()

Unnamed: 0,Locality,Census,Estimates Base,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
1,".Accomack County, Virginia",33164,33162,33148,33225,33268,32969,32971,32914,32871,32685,32581,32316
2,".Albemarle County, Virginia",98970,98998,99204,100213,101464,102270,103732,105117,106431,107768,108377,109330
3,".Alleghany County, Virginia",16250,16264,16203,16163,16034,15923,15605,15425,15387,15128,14986,14860
4,".Amelia County, Virginia",12690,12695,12746,12752,12745,12656,12713,12784,12801,12980,13057,13145
5,".Amherst County, Virginia",32353,32354,32387,32154,32630,32317,32158,31836,31887,31863,31684,31605


In [10]:
#3 Standardize county names
pop1["Locality"] = pop1["Locality"].str.replace(", Virginia","")
pop1["Locality"] = pop1["Locality"].str.replace(".","")

pop1

Unnamed: 0,Locality,Census,Estimates Base,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
1,Accomack County,33164,33162,33148,33225,33268,32969,32971,32914,32871,32685,32581,32316
2,Albemarle County,98970,98998,99204,100213,101464,102270,103732,105117,106431,107768,108377,109330
3,Alleghany County,16250,16264,16203,16163,16034,15923,15605,15425,15387,15128,14986,14860
4,Amelia County,12690,12695,12746,12752,12745,12656,12713,12784,12801,12980,13057,13145
5,Amherst County,32353,32354,32387,32154,32630,32317,32158,31836,31887,31863,31684,31605
...,...,...,...,...,...,...,...,...,...,...,...,...,...
129,Suffolk city,84585,84565,84813,84750,85251,85751,86882,88079,89194,90108,90975,92108
130,Virginia Beach city,437994,437903,438859,442583,445044,447706,448864,450304,450983,449896,449849,449974
131,Waynesboro city,21006,20998,20993,21103,21066,21167,21305,21542,21757,22242,22531,22630
132,Williamsburg city,14068,13700,13727,14109,14526,14617,14580,14800,14884,14994,15002,14954


In [11]:
## Limit to county and 2019 population
pop2 = pop1[['Locality','2019']].copy()

In [12]:
## Merge census population with census income
census = inc1.merge(pop2, how= 'inner', on= "Locality")

census = census[['FIPS','2019','DP03_0062E','DP03_0063E']]
census.head()

Unnamed: 0,FIPS,2019,DP03_0062E,DP03_0063E
0,51001,32316,43210,57996
1,51003,109330,75394,107948
2,51005,14860,47794,60513
3,51007,13145,58526,74185
4,51009,31605,49170,61676


In [13]:
## Post merge check
census.shape

(133, 4)

In [14]:
## Export data
census.to_csv('data/build/build4_census.csv', index= False)