In [14]:
import pandas as pd
import numpy as np
from sklearn.neighbors import BallTree

In [3]:
import orca
import os; os.chdir('../')
from scripts import datasources, models, variables
import warnings;warnings.simplefilter('ignore')

In [4]:
orca.run(['initialize_network_small', 'initialize_network_walk'])

Running step 'initialize_network_small'
Time to execute step 'initialize_network_small': 0.00 s
Running step 'initialize_network_walk'
Time to execute step 'initialize_network_walk': 0.00 s
Total time to execute iteration 1 with iteration value None: 0.00 s


In [5]:
jobs = orca.get_table('jobs').to_frame()
buildings = orca.get_table('buildings').to_frame()
parcels = orca.get_table('parcels').to_frame()

In [8]:
parcels_rad = np.deg2rad(parcels[['y', 'x']])

In [9]:
data_dir = '/home/data/fall_2018/'
chts_dir = 'CHTS_csv_format/'
chts_persons = pd.read_csv(data_dir + chts_dir + 'data/Deliv_PER.csv', low_memory=False)
chts_persons_lookup = pd.read_csv(data_dir + chts_dir + 'data/LookUp_PER.csv')
chts_households_lookup = pd.read_csv(data_dir + chts_dir + 'data/LookUp_Home.csv')

In [12]:
chts_persons = pd.merge(
    chts_persons.set_index(['SAMPN','PERNO']),
    chts_persons_lookup.set_index(['SAMPN','PERNO']),
    left_index=True, right_index=True,
    suffixes=('_persons', '_lookup')).reset_index()

chts_persons = pd.merge(
    chts_persons.set_index(['SAMPN']),
    chts_households_lookup.set_index(['SAMPN']),
    left_index=True, right_index=True).reset_index()

chts_persons = chts_persons[chts_persons['HCTFIP'].isin([1, 13, 41, 55, 75, 81, 85, 95, 97])].reset_index()

In [13]:
persons_work_rad = np.deg2rad(chts_persons[['WYCORD_lookup', 'WXCORD_lookup']])
persons_home_rad = np.deg2rad(chts_persons[['HYCORD', 'HXCORD']])

In [15]:
tree = BallTree(parcels_rad, metric='haversine')

dists, idxs = tree.query(persons_home_rad, return_distance=True)
chts_persons['parcel_id_home'] = parcels.iloc[idxs[:,0]].index

chts_persons['parcel_id_work'] = None
dists, idxs = tree.query(persons_work_rad[~pd.isnull(persons_work_rad['WYCORD_lookup'])], return_distance=True)
chts_persons.loc[~pd.isnull(chts_persons['WYCORD_lookup']), 'parcel_id_work'] = parcels.iloc[idxs[:,0]].index

In [16]:
tmp_parcels = parcels.reset_index()
tmp_parcels['primary_id'] = tmp_parcels['primary_id'].astype(object)

In [17]:
chts_persons_w_zone_ids = chts_persons.merge(
    parcels, left_on='parcel_id_home', right_index=True).merge(
    tmp_parcels, left_on='parcel_id_work', right_on='primary_id', suffixes=('_home', '_work'), how='left')[
    list(chts_persons.columns) + ['zone_id_home', 'zone_id_work']]

In [19]:
chts_persons_w_zone_ids.to_csv('/home/data/fall_2018/chts_persons_w_zone_ids.csv')