# How to Prepare data for ZRP Predictions
The purpose of this notebook is to illustrate how to use ZRP_Prepare, a class that prepares user input data for generating predictions, models, & analysis.

In [1]:
%load_ext autoreload
%autoreload 2
%config Completer.use_jedi=False

In [2]:
from os.path import join, expanduser
import pandas as pd
import sys
import os
import re
import warnings

In [3]:
warnings.filterwarnings(action='once')
home = expanduser('~')

In [4]:
src_path = '{}/zest-race-predictor/playground/kam/zrp'.format(home)
sys.path.append(src_path)

In [5]:
from prepare.utils import *

from prepare.zrp import ZRP_Prepare

## load data
simulating user input data

In [6]:
support_files_path = "/d/shared/zrp/shared_data"


In [7]:
df = load_file("/d/shared/zrp/shared_data/processed/data/state_level/voters/base_fl_2022q1.parquet")

df.shape

  if distutils.version.LooseVersion(version) < minimum_version:
  other = LooseVersion(other)
  _pyarrow_version_ge_015 = LooseVersion(pyarrow.__version__) >= LooseVersion("0.15")


(15032091, 14)

### sample data
sample for test case

In [8]:
samp = df.sample(30)

In [9]:
samp.head()

Unnamed: 0,ZEST_KEY,first_name,middle_name,last_name,house_number,street_address,city,state,zip_code,original_race,race,original_sex,sex,age
1098584,FL_109396943,Judith,Ellen,Tao,7611,SW 142Nd Ave,Miami,FL,33183,5,WHITE,F,FEMALE,44
656,FL_112946086,Patrick,M,Loia,3864,Spirited Cir,St. Cloud,FL,34772,5,WHITE,M,MALE,31
190628,FL_108070754,Richard,Clyde,Kuncicky,315,Caribe Vista Way,St Augustine,FL,32080,5,WHITE,M,MALE,52
47182,FL_107533068,Amanda,L,Peeterse,5418,Douglas ST,Milton,FL,32570,5,WHITE,F,FEMALE,18
151464,FL_114107817,THOMAS,RHEA,DAVIS,2601,NE 23RD AVE,OCALA,FL,34470,5,WHITE,M,MALE,17


In [10]:
samp.tail()

Unnamed: 0,ZEST_KEY,first_name,middle_name,last_name,house_number,street_address,city,state,zip_code,original_race,race,original_sex,sex,age
126218,FL_118029798,Johnathan,Franklin,Green,1403,Grandview DR,Crestview,FL,325398520,5,WHITE,M,MALE,28
167040,FL_116205628,WILFRANTZ,,ROMULUS,371,SW LACROIX AVE,PT ST LUCIE,FL,34953,3,BLACK,M,MALE,18
309950,FL_120164938,MILFORT,,THELISNOR,525,AVENIDA HERMOSA,WEST PALM BEACH,FL,33405,3,BLACK,M,MALE,32
431976,FL_112648879,Rollis,E,Bock,7905,Amethyst Lake Pt,Lake Worth,FL,33467,5,WHITE,M,MALE,64
351993,FL_127622543,Stephen,Jay,Coyle,411,Autumn Stream DR,Auburndale,FL,33823,5,WHITE,M,MALE,76


## ZRP Prepare  

In [11]:
prepare = ZRP_Prepare()

In [12]:
prepare.fit(samp)

In [13]:
output = prepare.transform(samp)

  0%|          | 0/30 [00:00<?, ?it/s][Parallel(n_jobs=49)]: Using backend ThreadingBackend with 49 concurrent workers.
100%|██████████| 30/30 [00:00<00:00, 80971.12it/s]

Data is loaded
Data is loaded
   Formatting P1
   Formatting P2
reduce whitespace

[Start] Preparing geo data
  The following states are included in the data: ['FL']
   ... on state: FL

   Data is loaded
   [Start] Processing geo data
/d/shared/zrp/shared_data
      ...address cleaning
      ...replicating address
         ...Base
         ...Map street suffixes...



[Parallel(n_jobs=49)]: Done  26 out of  30 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=49)]: Done  30 out of  30 | elapsed:    0.0s finished


         ...Mapped & split by street suffixes...
         ...Number processing...

     Address dataframe expansion is complete! (n=38)
      ...formatting
   [Completed] Processing geo data
   [Start] Mapping geo data


  if distutils.version.LooseVersion(version) < minimum_version:
  other = LooseVersion(other)


      ...merge user input & lookup table
      ...mapping
Output saved
   [Completed] Mapping geo data


  if distutils.version.LooseVersion(version) < minimum_version:
  other = LooseVersion(other)


[Completed] Preparing geo data

[Start] Preparing ACS data
User input data is loaded
   ...loading ACS lookup tables
   ... combining ACS & user input data
ZEST_KEY
[Complete] Preparing ACS data


In [14]:
output.shape

(101, 929)

In [15]:
samp.shape

(30, 14)

In [16]:
samp.head()

Unnamed: 0,ZEST_KEY,first_name,middle_name,last_name,house_number,street_address,city,state,zip_code,original_race,race,original_sex,sex,age
1098584,FL_109396943,Judith,Ellen,Tao,7611,SW 142Nd Ave,Miami,FL,33183,5,WHITE,F,FEMALE,44
656,FL_112946086,Patrick,M,Loia,3864,Spirited Cir,St. Cloud,FL,34772,5,WHITE,M,MALE,31
190628,FL_108070754,Richard,Clyde,Kuncicky,315,Caribe Vista Way,St Augustine,FL,32080,5,WHITE,M,MALE,52
47182,FL_107533068,Amanda,L,Peeterse,5418,Douglas ST,Milton,FL,32570,5,WHITE,F,FEMALE,18
151464,FL_114107817,THOMAS,RHEA,DAVIS,2601,NE 23RD AVE,OCALA,FL,34470,5,WHITE,M,MALE,17


In [17]:
output.head()

Unnamed: 0_level_0,B01003_001,B02001_001,B02001_002,B02001_003,B02001_004,B02001_005,B02001_006,B02001_007,B02001_008,B02001_009,...,house_number,last_name,middle_name,original_race,original_sex,race,sex,state,street_address,zip_code
ZEST_KEY,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
FL_109396943,620,620,599,3,0,18,0,0,0,0,...,7611,TAO,ELLEN,5,F,WHITE,FEMALE,FL,SW 142ND AVE,33183
FL_112946086,18238,18238,12718,1659,54,250,0,2715,842,53,...,3864,LOIA,M,5,M,WHITE,MALE,FL,SPIRITED CIR,34772
FL_108070754,1228,1228,1216,0,0,0,0,0,12,0,...,315,KUNCICKY,CLYDE,5,M,WHITE,MALE,FL,CARIBE VISTA WAY,32080
FL_114107817,874,874,637,118,0,0,0,42,77,0,...,2601,DAVIS,RHEA,5,M,WHITE,MALE,FL,NE 23RD AVE,34470
FL_115998147,581,581,546,35,0,0,0,0,0,0,...,5975,MEI,A,9,F,UNKNOWN,FEMALE,FL,N HIGHLAND PARK DR,34442
