In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.neighbors import BallTree
import geopandas as gpd
from shapely.geometry import Point, LineString
from pyproj import Proj, transform
from matplotlib import pyplot as plt
%matplotlib inline

In [2]:
from urbansim_templates import modelmanager as mm
from urbansim_templates.models import LargeMultinomialLogitStep
from urbansim.models.util import apply_filter_query
import orca
import os; os.chdir('../')
import warnings;warnings.simplefilter('ignore')
from scripts import datasources, models, variables
from choicemodels import MultinomialLogit
from choicemodels.tools import MergedChoiceTable

Registering model step 'auto_ownership'
Registering model step 'TOD_choice'
Registering model step 'primary_mode_choice'
Registering model step 'WLCM'


### Set required runtime args

In [3]:
input_file_format = 'csv'
input_data_dir = '/home/data/spring_2019/base/'

# estimate from base-year data
formattable_fname_dict = {
    'parcels': 'parcel_attr.{0}',
    'buildings': 'buildings_v2.{0}',
    'jobs': 'jobs_v2.{0}',
    'establishments': 'establishments_v2.{0}',
    'households': 'households_v2.{0}',
    'persons': 'persons_v3.{0}',
    'rentals': 'MTC_craigslist_listings_7-10-18.{0}',
    'units': 'units_v2.{0}',
    'skims': 'skims_110118.{0}',
    'beam_skims': '30.skims-smart-23April2019-baseline.csv.gz',
    'drive_nodes': 'bay_area_tertiary_strongly_nodes.{0}',
    'drive_edges': 'bay_area_tertiary_strongly_edges.{0}',
    'drive_access_vars': 'drive_net_vars.{0}',
    'walk_nodes': 'bayarea_walk_nodes.{0}',
    'walk_edges': 'bayarea_walk_edges.{0}',
    'walk_access_vars': 'walk_net_vars.{0}',
    'zones': 'zones.{0}',
    'zone_access_vars': 'zones_w_access_vars.{0}',
}

def format_fname_dict(formattable_fname_dict, format='csv'):
    formatted_dict = {
        k: v.format('csv')
        for k, v in formattable_fname_dict.items()}
    return formatted_dict

input_fnames = format_fname_dict(
            formattable_fname_dict, input_file_format)

### Set required Orca injectables

In [4]:
orca.add_injectable('input_file_format', input_file_format)
orca.add_injectable('input_data_dir', input_data_dir)
orca.add_injectable('input_fnames', input_fnames)
orca.add_injectable('store', None)

### Initialize Orca tables

In [5]:
orca.run(['initialize_network_walk', 'initialize_network_small', 'impute_missing_skims'])

Running step 'initialize_network_walk'
Time to execute step 'initialize_network_walk': 0.00 s
Running step 'initialize_network_small'
Time to execute step 'initialize_network_small': 0.00 s
Running step 'impute_missing_skims'
Time to execute step 'impute_missing_skims': 90.92 s
Total time to execute iteration 1 with iteration value None: 90.92 s


In [6]:
buildings = orca.get_table('buildings').to_frame()
parcels = orca.get_table('parcels').to_frame()
jobs = orca.get_table('jobs').to_frame()

# using beam skims
beam_skims = orca.get_table('beam_skims').to_frame()
interaction_terms = beam_skims.rename_axis(['zone_id_home','zone_id_work'])

# # old way
# interaction_terms_tt = pd.read_csv(
#     './data/WLCM_interaction_terms_tt.csv', index_col=[
#         'zone_id_home', 'zone_id_work'])
# interaction_terms_dist = pd.read_csv(
#     './data/WLCM_interaction_terms_dist.csv', index_col=[
#         'zone_id_home', 'zone_id_work'])
# interaction_terms_cost = pd.read_csv(
#     './data/WLCM_interaction_terms_cost.csv', index_col=[
#         'zone_id_home', 'zone_id_work'])
# interaction_terms = [interaction_terms_tt, interaction_terms_dist, interaction_terms_cost]

### Get accessibility vars

If already computed:

In [61]:
walk_net_vars = pd.read_csv(input_data_dir + input_fnames['walk_access_vars'], index_col='osmid')
drive_net_vars = pd.read_csv(input_data_dir + input_fnames['drive_access_vars'], index_col='osmid')
zones = pd.read_csv(input_data_dir + input_fnames['zone_access_vars'], index_col='zone_id')

In [64]:
zones.columns

Index(['gid', 'area', 'acres', 'total_jobs', 'sum_residential_units',
       'sum_persons', 'sum_income', 'avg_income', 'total_jobs_gen_tt_CAR_15',
       'total_jobs_gen_tt_CAR_45', 'sum_persons_gen_tt_CAR_15',
       'sum_persons_gen_tt_CAR_45', 'sum_income_gen_tt_CAR_15',
       'sum_income_gen_tt_CAR_45', 'sum_residential_units_gen_tt_CAR_15',
       'sum_residential_units_gen_tt_CAR_45', 'avg_income_gen_tt_CAR_30',
       'total_jobs_gen_tt_WALK_TRANSIT_15',
       'total_jobs_gen_tt_WALK_TRANSIT_45',
       'sum_persons_gen_tt_WALK_TRANSIT_15',
       'sum_persons_gen_tt_WALK_TRANSIT_45',
       'sum_income_gen_tt_WALK_TRANSIT_15',
       'sum_income_gen_tt_WALK_TRANSIT_45',
       'sum_residential_units_gen_tt_WALK_TRANSIT_15',
       'sum_residential_units_gen_tt_WALK_TRANSIT_45',
       'total_jobs_gen_tt_RIDE_HAIL_15', 'total_jobs_gen_tt_RIDE_HAIL_45',
       'sum_persons_gen_tt_RIDE_HAIL_15', 'sum_persons_gen_tt_RIDE_HAIL_45',
       'sum_income_gen_tt_RIDE_HAIL_15', 'sum_in

If computing for the first time

In [None]:
# orca.run(['network_aggregations_small', 'network_aggregations_walk', 'skims_aggregations'])

### Load CHTS Data

Raw CHTS Households Data

In [9]:
chts_households = pd.read_csv('/home/data/fall_2018/CHTS_csv_format/data/Deliv_HH.csv')

Processed CHTS Persons Data (code available [here](https://github.com/ual/ual_model_workspace/blob/master/fall-2018-models/notebooks-max/WLCM_pre-processing.ipynb))

In [10]:
chts_persons = pd.read_csv('/home/data/fall_2018/chts_persons_w_jobs_and_res_bldgs.csv')

In [11]:
chts_persons.loc[:, 'worker'] = chts_persons[chts_persons['EMPLY'] == 1]
chts_persons.loc[:, 'work_at_home'] = chts_persons[chts_persons['WLOC'] == 2]
chts_workers = chts_persons[
    (~pd.isnull(chts_persons['job_id'])) & (chts_persons['job_id'].isin(jobs.index.values))]

### Define required model parameters

In [12]:
chooser_filters = ['age < 115', 'income < 98', 'edu < 98']
alt_sample_size = 10

### Generate the merged choice table

This step must be done manually for now by calling the `choicemodels.MergedChoiceTable()` method directly instead of using a template because `urbansim_templates` does not yet have functionality for interaction terms such as home-to-work distances

In [13]:
obs = chts_workers.merge(
    chts_households[['SAMPN', 'INCOM']], on='SAMPN').merge(
    buildings, left_on='building_id', right_index=True).merge(
    parcels, left_on='parcel_id', right_index=True).rename(
    columns={
        'zone_id': 'zone_id_home', 'AGE': 'age', 'EDUCA': 'edu', 'INCOM': 'income'})
obs.index.name = 'obs_id'

In [14]:
obs['no_higher_ed'] = (obs['edu'] < 5).astype(int)
obs['age_under_45'] = (obs['age'] < 45).astype(int)
obs['hh_inc_under_25k'] = (obs['income'] < 3).astype(int)
obs['hh_inc_25_to_75k'] = ((obs['income'] > 2) & (obs['income'] < 6)).astype(int)
obs['hh_inc_75_to_200k'] = ((obs['income'] > 5) & (obs['income'] < 9)).astype(int)

In [15]:
obs = apply_filter_query(obs, filters=chooser_filters)

In [16]:
obs = obs[[
    'job_id', 'zone_id_home', 'age_under_45', 'no_higher_ed', 'age',
    'hh_inc_under_25k', 'hh_inc_25_to_75k', 'hh_inc_75_to_200k', 'income']]

In [80]:
alts = jobs.merge(buildings, left_on='building_id', right_index=True).merge(
    parcels, left_on='parcel_id', right_index=True).merge(
    zones, left_on='zone_id', right_index=True)

In [81]:
# industry of alternatives
alts['sector_retail'] = alts['sector_id'].isin([44, 45]).astype(int)
alts['sector_healthcare'] = alts['sector_id'].isin([62]).astype(int)
alts['sector_tech'] = alts['sector_id'].isin([51, 54]).astype(int)
alts['sector_food_and_hosp'] = alts['sector_id'].isin([72]).astype(int)
alts['sector_mfg'] = alts['sector_id'].isin([31, 32, 33]).astype(int)
alts['sector_edu_serv'] = alts['sector_id'].isin([61]).astype(int)
alts['sector_oth_serv'] = alts['sector_id'].isin([81]).astype(int)
alts['sector_constr'] = alts['sector_id'].isin([23]).astype(int)
alts['sector_gov'] = alts['sector_id'].isin([92]).astype(int)
alts['sector_fire'] = alts['sector_id'].isin([52, 53]).astype(int)
alts['sector_whlsale'] = alts['sector_id'].isin([42]).astype(int)
alts['sector_admin'] = alts['sector_id'].isin([56]).astype(int)
alts['sector_transport'] = alts['sector_id'].isin([48]).astype(int)
alts['sector_arts'] = alts['sector_id'].isin([71]).astype(int)
alts['sector_util'] = alts['sector_id'].isin([22]).astype(int)

# # occupation of alternatives
# alts['occup_mgmt'] = alts['occupation_id'].isin([11]).astype(int)
# alts['occup_sales'] = alts['occupation_id'].isin([41]).astype(int)
# alts['occup_biz'] = alts['occupation_id'].isin([13]).astype(int)
# alts['occup_admin'] = alts['occupation_id'].isin([43]).astype(int)
# alts['occup_edu'] = alts['occupation_id'].isin([25]).astype(int)
# alts['occup_food'] = alts['occupation_id'].isin([35]).astype(int)
# alts['occup_health'] = alts['occupation_id'].isin([29, 31]).astype(int)
# alts['occup_tech'] = alts['occupation_id'].isin([15]).astype(int)
# alts['occup_eng'] = alts['occupation_id'].isin([17]).astype(int)
# alts['occup_transp'] = alts['occupation_id'].isin([53]).astype(int)
# alts['occup_constr'] = alts['occupation_id'].isin([47]).astype(int)

In [82]:
alts = alts[[
    'total_jobs_gen_tt_WALK_TRANSIT_15', 'total_jobs_gen_tt_WALK_TRANSIT_45',
    'total_jobs_gen_tt_CAR_45', 'total_jobs_gen_tt_CAR_15',
    'zone_id_work', 'sector_retail', 'sector_healthcare', 'sector_tech', 'sector_food_and_hosp',
    'sector_mfg', 'sector_edu_serv', 'sector_oth_serv', 'sector_constr', 'sector_gov', 'sector_fire',
    'sector_whlsale', 'sector_admin', 'sector_transport', 'sector_arts', 'sector_util',
#     'occup_mgmt', 'occup_sales', 'occup_biz', 'occup_admin', 'occup_edu', 'occup_food', 'occup_health',
#     'occup_tech', 'occup_eng', 'occup_transp', 'occup_constr'
]]

In [83]:
%%time
mct = MergedChoiceTable(obs, alts, chosen_alternatives='job_id',
                        sample_size=alt_sample_size, interaction_terms=interaction_terms)

CPU times: user 825 ms, sys: 63.8 ms, total: 889 ms
Wall time: 888 ms


### Estimate the WLCM

In [84]:
mm.initialize()

Registering model step 'auto_ownership'
Registering model step 'TOD_choice'
Registering model step 'primary_mode_choice'
Registering model step 'WLCM'


In [85]:
m = LargeMultinomialLogitStep(
    chooser_filters=chooser_filters, constrained_choices=True,
    alt_sample_size=alt_sample_size
)

In [92]:
# m.model_expression = (
#     'dist_da/tt_da + '
#     'np.log1p(jobs_1500_walk_retail) + '
#     'np.log1p(cost_da_toll):(hh_inc_under_25k + hh_inc_25_to_75k + hh_inc_75_to_200k) + '
#     'no_higher_ed:(sector_retail + sector_fire + sector_healthcare + sector_tech + sector_mfg + '
#     'sector_food_and_hosp + sector_edu_serv + sector_gov + sector_whlsale) + '
#     'sector_retail + sector_tech + sector_mfg + sector_food_and_hosp + sector_edu_serv + sector_oth_serv + '
#     'sector_constr + sector_gov + sector_whlsale + sector_admin + sector_util - 1'
# )

m.model_expression = (
    'np.log1p(gen_tt_CAR):np.log1p(gen_tt_WALK_TRANSIT) + '
    'dist * (np.log1p(gen_tt_CAR)) + np.log1p(gen_tt_WALK_TRANSIT) + '
    'np.log1p(total_jobs_gen_tt_WALK_TRANSIT_15) + '
    'np.log1p(gen_cost_CAR):(hh_inc_under_25k + hh_inc_25_to_75k + hh_inc_75_to_200k) + '
    'no_higher_ed:(sector_retail + sector_fire + sector_healthcare + sector_tech + sector_mfg + '
    'sector_food_and_hosp + sector_edu_serv + sector_gov) + '
    'sector_retail + sector_tech + sector_mfg + sector_food_and_hosp + sector_edu_serv + sector_oth_serv + '
    'sector_constr + sector_gov + sector_whlsale + sector_admin + sector_util - 1'
)

In [93]:
m.fit(mct)

                  CHOICEMODELS ESTIMATION RESULTS                  
Dep. Var.:                chosen   No. Observations:          7,928
Model:         Multinomial Logit   Df Residuals:              7,900
Method:       Maximum Likelihood   Df Model:                     28
Date:                 2019-06-17   Pseudo R-squ.:             0.508
Time:                      12:28   Pseudo R-bar-squ.:         0.507
AIC:                  18,010.223   Log-Likelihood:       -8,977.112
BIC:                  18,205.612   LL-Null:             -18,254.895
                                                        coef   std err         z     P>|z|   Conf. Int.
-------------------------------------------------------------------------------------------------------
np.log1p(gen_tt_CAR):np.log1p(gen_tt_WALK_TRANSIT)    0.0100     0.014     0.718     0.472             
dist                                                 -0.0001     0.000   -13.943     0.000             
np.log1p(gen_tt_CAR)                    

### Add attributes to the model object that are needed for the simulation step

In [96]:
m.name = 'WLCM_gen_tt'
m.tags = ['max']
m.alternatives = ['jobs', 'buildings', 'parcels', 'zones']
m.choosers = ['persons', 'households']
m.out_chooser_filters = m.chooser_filters
m.out_column = 'job_id'

In [97]:
mm.register(m)

Saving 'WLCM_gen_tt.yaml': /home/max/projects/activitysynth/activitysynth/configs
Registering model step 'WLCM_gen_tt'
