## This notebook will combine the census blocks file with census data

In [1]:
import geopandas as gpd
import os
import getpass
import matplotlib.pyplot as plt
import re
import pandas as pd

In [11]:
# Find working directory, saves the path while appending the notebooks file name. 
# wd is now the file path to the park-equity-atx directory.
wd = os.getcwd()
wd = wd[:wd.find('notebooks')]
censusblocks = gpd.read_file(wd+"data/censusblocks2010/2010 Census Blocks.shp")
censustables = pd.read_excel(wd+"data/censustables/ACS_17_5yr_totalpop_allcounties.xlsx")

blocks = censusblocks.copy()
tables = censustables.copy()
print(blocks['GEOID10'].head)
print(tables['Id2'].head)

<bound method NDFrame.head of 0        480219505011084
1        480219505011016
2        480219505011066
3        480219505011039
4        480219505011017
              ...       
34248    484910205093008
34249    484910215082015
34250    484910215062017
34251    484910207081027
34252    484910215081023
Name: GEOID10, Length: 34253, dtype: object>
<bound method NDFrame.head of 0      482090101001
1      482090101002
2      482090102001
3      482090102002
4      482090102003
           ...     
898    484910216013
899    484910216021
900    484910216022
901    484910216031
902    484910216032
Name: Id2, Length: 903, dtype: int64>


In [27]:
blockgroup = blocks['GEOID10'].str.slice(start=0, stop=12)
blockgroup

tables['Id2'] = tables['Id2'].astype(str)

## Merge Dataframes:
According to the [documentation](https://www.census.gov/programs-surveys/geography/guidance/geo-identifiers.html) we have that the geocodes are each representing block data. Census tables will have a 12 digit geoID and the census blocks data will have a 15 digit block ID. Thus, in order to merge the files a new column will be created that will do a partial match.

In [28]:
# Convert the ID2 column to string data type. This will simplify the merge 
# by allowing pandas series regex commands. We will have to convert the 
# resulting column back...maybe. The GEOID in census 

merger = blocks.merge(tables, left_on=blockgroup, right_on='Id2', suffixes=('_blocks', '_tables'))

In [29]:
merger

Unnamed: 0,STATEFP10,COUNTYFP10,TRACTCE10,BLOCKCE10,GEOID10,NAME10,MTFCC10,UR10,UACE10,FUNCSTAT10,ALAND10,AWATER10,INTPTLAT10,INTPTLON10,geometry,Id,Id2,Estimate; Total
0,48,209,010905,1004,482090109051004,Block 1004,G5040,,,S,145197.0,0.0,+29.9705007,-097.8976351,"POLYGON ((3067669.226575226 9961819.154006734,...",1500000US482090109051,482090109051,3012
1,48,209,010905,1042,482090109051042,Block 1042,G5040,,,S,7078.0,0.0,+29.9374893,-097.8949551,"POLYGON ((3069096.858890548 9949507.756713554,...",1500000US482090109051,482090109051,3012
2,48,209,010905,1005,482090109051005,Block 1005,G5040,,,S,77099.0,0.0,+30.0238141,-097.9101115,"POLYGON ((3063472.982343048 9979983.445762068,...",1500000US482090109051,482090109051,3012
3,48,209,010905,1056,482090109051056,Block 1056,G5040,,,S,29732.0,0.0,+29.9727629,-097.8976176,"POLYGON ((3067669.226575226 9961819.154006734,...",1500000US482090109051,482090109051,3012
4,48,209,010905,1030,482090109051030,Block 1030,G5040,,,S,20034.0,0.0,+29.9575778,-097.8824548,"POLYGON ((3072891.729253545 9956595.258942723,...",1500000US482090109051,482090109051,3012
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28163,48,491,020405,1000,484910204051000,Block 1000,G5040,,,S,10788.0,0.0,+30.4594715,-097.7937593,"POLYGON ((3096912.985344797 10139365.09681439,...",1500000US484910204051,484910204051,1047
28164,48,491,020405,1003,484910204051003,Block 1003,G5040,,,S,6678.0,0.0,+30.4565531,-097.7926854,"POLYGON ((3097067.800323889 10138506.46992272,...",1500000US484910204051,484910204051,1047
28165,48,491,020405,1001,484910204051001,Block 1001,G5040,,,S,172449.0,0.0,+30.4585753,-097.7956252,"POLYGON ((3096834.297838137 10139333.10573664,...",1500000US484910204051,484910204051,1047
28166,48,491,020405,1002,484910204051002,Block 1002,G5040,,,S,74260.0,0.0,+30.4554673,-097.7951554,"POLYGON ((3096967.232283548 10138921.79422331,...",1500000US484910204051,484910204051,1047
