# ZRP User Guide
The purpose of this notebook is to illustrate how to use ZRP, the main class of the zrp package that processes user input data &  returns race/ethnicity predictions

In [1]:
%load_ext autoreload
%autoreload 2
%config Completer.use_jedi=False

In [2]:
from os.path import join, expanduser
import pandas as pd
import sys
import os
import re
import warnings

## Set source code path here

In [3]:
warnings.filterwarnings(action='once')
home = expanduser('~')

src_path = '{}/zest/zrp'.format(home)
sys.path.append(src_path)

In [4]:
from zrp import ZRP
from zrp.prepare.utils import load_file

  from scipy.sparse.csr import csr_matrix
  version = LooseVersion(pd.__version__)
  from pandas import Int64Index as NumericIndex
  from pandas import MultiIndex, Int64Index


## Load sample data for prediction
Load list of New Jersey Mayors downloaded from https://www.nj.gov/dca/home/2022mayors.csv 

In [5]:
nj_mayors = load_file(src_path + "/examples/2022-nj-mayors.csv")
nj_mayors.shape

(565, 18)

In [6]:
nj_mayors

Unnamed: 0,MUNI CODE,MUNI NAME,COUNTY,ADDRESS 1,ADDRESS 2,CITY,STATE,ZIP,PHONE,FAX,MAYOR NAME,TERM START,TERM END,FORM,TERM LEGNTH,EMAIL,SOCIAL MEDIA HANDLE,Municipal Contact List
0,1330,Aberdeen Township,Monmouth,One Aberdeen Square,,Aberdeen,NJ,07747-2300,(732) 583-4200,,Fred Tagliarini,,12/31/2025,COUNCIL-MANAGER,4,fred.tagliarini@aberdeennj.org,,
1,0101,Absecon City,Atlantic,Absecon Municipal Complex,500 Mill Road,Absecon,NJ,08201,(609) 641-0663,(609) 645-5098,Kimberly Horton,,12/31/2024,MAYOR-COUNCIL,3,khorton@abseconnj.org,,
2,1001,Alexandria Township,Hunterdon,782 Frenchtown Road,,Milford,NJ,08848,(908) 996-7071,,Gabe Plumer,,12/31/2022,TOWNSHIP,3,clerk@alexandrianj.gov,,
3,2101,Allamuchy Township,Warren,Post Office Box A,,Allamuchy,NJ,07820,(908) 852-5132,,Rosemary Tuohy,,12/31/2024,FAULKNER ACT,3,mayor@allamuchynj.org,,
4,0201,Allendale Borough,Bergen,500 West Crescent Avenue,,Allendale,NJ,07401,(201) 818-4400,,Ari Bernstein,,12/31/2022,,,aribernstein@allendalenj.gov,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
560,0269,Wood-Ridge Borough,Bergen,85 Humboldt Street,,Wood-Ridge,NJ,07075-2344,(201) 939-0202,,Paul A Sarlo,,12/31/2023,,,psarlo@njwoodridge.org,,
561,1715,Woodstown Borough,Salem,Post Office Box 286,,Woodstown,NJ,08098,(856) 769-2200,,Donald Dietrich,,12/31/2023,,,Don.dietrich@comcast.net,,
562,0824,Woolwich Township,Gloucester,120 Village Green Drive,,Woolwich Township,NJ,08085-3180,(856) 467-2666,,Craig Frederick,,12/31/2024,,,cfrederick@woolwichtwp.org,,
563,0340,Wrightstown Borough,Burlington,21 Saylors Pond Road,,Wrightstown,NJ,08562,(609) 723-4450,(609) 723-7137,Donald Cottrell,,12/31/2022,,,mayor@wrightstownborough.com,,


### Wrangle NJ mayor data for predictions
This parsing of the NJ mayors file will leave some NA's, but it is sufficient for demonstration purposes


In [36]:
zrp_sample = pd.DataFrame(columns=['first_name', 'middle_name', 'last_name', 'house_number', 'street_address', 'city', 'state', 'zip_code'])

Prepare Names

In [43]:
nj_mayors['MAYOR NAME'].str.replace(' Jr\.$', '')
nj_mayors['MAYOR NAME'].str.replace(' Jr$', '')
split_mayor_names = nj_mayors['MAYOR NAME'].str.split(' ')
zrp_sample['first_name'] = split_mayor_names.str[0]
zrp_sample['last_name'] = split_mayor_names.str[-1]

  nj_mayors['MAYOR NAME'].str.replace(' Jr\.$', '')


City, State, Zip

In [44]:
zrp_sample['city'] = nj_mayors['CITY']
zrp_sample['state'] = nj_mayors['STATE']
zrp_sample['zip_code'] = nj_mayors['ZIP']

Address

In [49]:
zrp_sample['house_number'] = nj_mayors['ADDRESS 1'].str.extract('([0-9]+)')
zrp_sample['street_address'] = nj_mayors['ADDRESS 1'].str.extract('.*[0-9]+([^0-9]+)')


In [50]:
zrp_sample

Unnamed: 0,first_name,middle_name,last_name,house_number,street_address,city,state,zip_code
0,Fred,,Tagliarini,,,Aberdeen,NJ,07747-2300
1,Kimberly,,Horton,,,Absecon,NJ,08201
2,Gabe,,Plumer,782,Frenchtown Road,Milford,NJ,08848
3,Rosemary,,Tuohy,,,Allamuchy,NJ,07820
4,Ari,,Bernstein,500,West Crescent Avenue,Allendale,NJ,07401
...,...,...,...,...,...,...,...,...
560,Paul,,Sarlo,85,Humboldt Street,Wood-Ridge,NJ,07075-2344
561,Donald,,Dietrich,286,,Woodstown,NJ,08098
562,Craig,,Frederick,120,Village Green Drive,Woolwich Township,NJ,08085-3180
563,Donald,,Cottrell,21,Saylors Pond Road,Wrightstown,NJ,08562


In [41]:
%%time
zest_race_predictor = ZRP()
zest_race_predictor.fit()
output = zest_race_predictor.transform(zrp_sample)

Directory already exists
Data is loaded
   Formatting P1
   Formatting P2
reduce whitespace

[Start] Preparing geo data
  The following states are included in the data: ['NJ']
   ... on state: NJ

   Data is loaded
   [Start] Processing geo data
/Users/j/zest/zrp/zrp/prepare/../data/processed
      ...address cleaning


  0%|                                                   | 0/565 [00:00<?, ?it/s][Parallel(n_jobs=49)]: Using backend ThreadingBackend with 49 concurrent workers.
[Parallel(n_jobs=49)]: Done 102 tasks      | elapsed:    0.0s
[Parallel(n_jobs=49)]: Done 352 tasks      | elapsed:    0.0s
100%|██████████████████████████████████████| 565/565 [00:00<00:00, 14135.29it/s]

      ...replicating address
         ...Base
         ...Map street suffixes...



[Parallel(n_jobs=49)]: Done 565 out of 565 | elapsed:    0.0s finished


         ...Mapped & split by street suffixes...
         ...Number processing...

     Address dataframe expansion is complete! (n=1010)
      ...formatting
   [Completed] Processing geo data
   [Start] Mapping geo data
      ...merge user input & lookup table
      ...mapping
Directory already exists
Output saved
   [Completed] Mapping geo data
[Completed] Preparing geo data

[Start] Preparing ACS data
   ...loading ACS lookup tables



KeyboardInterrupt



In [14]:
output.reset_index(drop=True)

NameError: name 'output' is not defined