# How to Build a Geo Lookup Table
The purpose of this notebook is to illustrate how to build a Geo-Lookup table, to use for geocoding addresses. In this example Alabama county level Census Tigerline shapefiles will be used generate a lookup table.

In [1]:
from os.path import join, expanduser
import datetime as dt
import pandas as pd
import numpy as np
import warnings
import glob
import json
import sys
import os
import re

In [2]:
warnings.filterwarnings(action='once')
home = expanduser('~')

In [3]:
src_path = '{}/zest-race-predictor/playground/kam/zrp'.format(home)
sys.path.append(src_path)

Predefine paths & required parameters

In [4]:
support_files_path = "/d/shared/zrp/shared_data"
year = "2020"
st_cty_code = "01001"

Import Geo Lookup Functions

In [5]:
from prepare.geo_lookup import *

### Initialize `GeoLookUpBuilder`
This class constructs geographic lookup tables that enable geocoding.

In [6]:
geo_build = GeoLookUpBuilder(support_files_path = support_files_path, year = year)

### Run `GeoLookUpBuilder`
Provide the state-county fips code to build a county level lookup table.

In [7]:
%%time
output = geo_build.transform(st_cty_code, False)

Shapefile input: /d/shared/zrp/shared_data/raw/geo/2020
Lookup Table output: /d/shared/zrp/shared_data/processed/geo/2020

 ... Loading requirements


  dictList.append(next(src)["properties"])


 ... Creating lookup table
 ... Formatting lookup table
   [Start] Processing lookup data
     ...processing
   [Completed] Processing lookup data
No tables were saved
CPU times: user 6.86 s, sys: 171 ms, total: 7.03 s
Wall time: 7.22 s


### Preview 

In [8]:
output.head()

Unnamed: 0,TLID,TFID,ARIDL,ARIDR,LINEARID,ZEST_FULLNAME,FROMHN,TOHN,ZEST_ZIP,EDGE_MTFCC,...,PUMACE,RAW_ZEST_ZIP,RAW_ZEST_STATEFP,RAW_ZEST_COUNTYFP,RAW_ZEST_FULLNAME,RAW_ZEST_TRACTCE,RAW_ZEST_BLKGRPCE,GEOID_ZIP,GEOID_CT,GEOID_BG
0,645547480,265281980,4004702275423,40010888101733,1102971723827,WINCHESTER WAY,209,211,36067,S1400,...,NONE,36067,1,1,WINCHESTER WAY,20600,3,36067,1001020600,10010206003
1,618373091,265281980,4002641174315,4002641174318,110585080643,US HWY 82,298,170,36067,S1200,...,NONE,36067,1,1,US HWY 82,20600,3,36067,1001020600,10010206003
2,618373124,265281980,40010888100743,4003990900908,1102641174929,HEARTHSTONE DR,808,800,36067,S1400,...,NONE,36067,1,1,HEARTHSTONE DR,20600,3,36067,1001020600,10010206003
3,618373241,265281980,4005554316924,4005554316988,110585093561,MANTLEWOOD CT,112,100,36067,S1400,...,NONE,36067,1,1,MANTLEWOOD CT,20600,3,36067,1001020600,10010206003
4,2827302,265281980,4005599635001,4005552602396,110585091272,THOMAS LN,101,115,36067,S1400,...,NONE,36067,1,1,THOMAS LN,20600,3,36067,1001020600,10010206003


In [9]:
output.tail()

Unnamed: 0,TLID,TFID,ARIDL,ARIDR,LINEARID,ZEST_FULLNAME,FROMHN,TOHN,ZEST_ZIP,EDGE_MTFCC,...,PUMACE,RAW_ZEST_ZIP,RAW_ZEST_STATEFP,RAW_ZEST_COUNTYFP,RAW_ZEST_FULLNAME,RAW_ZEST_TRACTCE,RAW_ZEST_BLKGRPCE,GEOID_ZIP,GEOID_CT,GEOID_BG
2997,641780949,267800464,NONE,4005329899958,1105320854926,DECEMBER DR,1198,1100,36051,S1400,...,NONE,36051,1,1,DECEMBER DR,20902,1,36051,1001020902,10010209021
2998,641779051,215950931,4005329899485,4005329899410,1102603648065,TANNER DR,2500,2598,36067,S1400,...,NONE,36067,1,1,TANNER DR,21000,2,36067,1001021000,10010210002
3002,641842113,215954130,4005329906653,4005556332827,1105321152904,AMBER ROSE DR,1998,1900,36067,S1400,...,NONE,36067,1,1,AMBER ROSE DR,20803,3,36067,1001020803,10010208033
3003,641842126,263692778,NONE,4005554301498,1105321152990,LUCAS CT,1500,1598,36067,S1400,...,NONE,36067,1,1,LUCAS CT,20803,3,36067,1001020803,10010208033
3004,641842126,263692778,NONE,40020508687594,1105321152990,LUCAS CT,400,498,36067,S1400,...,NONE,36067,1,1,LUCAS CT,20803,3,36067,1001020803,10010208033
