In [4]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.neighbors import BallTree
import seaborn as sns
import geopandas as gpd
from shapely.geometry import Point, LineString
from pyproj import Proj, transform
from matplotlib import pyplot as plt
%matplotlib inline
%load_ext memory_profiler

In [6]:
from urbansim_templates import modelmanager as mm
from urbansim_templates.models import LargeMultinomialLogitStep
import orca
import os; os.chdir('../')
import warnings;warnings.simplefilter('ignore')
from scripts import datasources, models, variables
from choicemodels import MultinomialLogit
from choicemodels.tools import MergedChoiceTable

### Load Data

In [7]:
chts_households = pd.read_csv('/home/data/fall_2018/CHTS_csv_format/data/Deliv_HH.csv')
chts_persons = pd.read_csv('./data/chts_persons_w_jobs_and_res_bldgs.csv')
chts_persons.loc[:, 'worker'] = chts_persons[chts_persons['EMPLY'] == 1]
chts_persons.loc[:, 'work_at_home'] = chts_persons[chts_persons['WLOC'] == 2]
chts_workers = chts_persons[~pd.isnull(chts_persons['job_id'])]
orca.run(['initialize_network_walk', 'initialize_network_small'])
buildings = orca.get_table('buildings').to_frame
parcels = orca.get_table('parcels').to_frame()
jobs = orca.get_table('jobs').to_frame()
interaction_terms_tt = pd.read_csv('./data/WLCM_interaction_terms_tt.csv', index_col=['zone_id_home', 'zone_id_work'])
interaction_terms_dist = pd.read_csv('./data/WLCM_interaction_terms_dist.csv', index_col=['zone_id_home', 'zone_id_work'])
interaction_terms_cost = pd.read_csv('./data/WLCM_interaction_terms_cost.csv', index_col=['zone_id_home', 'zone_id_work'])
walk_net_vars = pd.read_csv('./data/walk_net_vars.csv', index_col='osmid')
drive_net_vars = pd.read_csv('./data/drive_net_vars.csv', index_col='osmid')

Running step 'initialize_network_walk'
Time to execute step 'initialize_network_walk': 0.00 s
Running step 'initialize_network_small'
Time to execute step 'initialize_network_small': 0.00 s
Total time to execute iteration 1 with iteration value None: 0.00 s


### Generate distance-based sampling weights

In [4]:
# w = (interaction_terms_dist**-0.3).clip(upper=1.0).dist_da.rename('w').to_frame()

### Generate the merged choice table

This step must be done manually for now by calling the `choicemodels.MergedChoiceTable()` method directly instead of using a template because `urbansim_templates` does not yet have functionality for interaction terms such as home-to-work distances

In [8]:
obs = chts_workers.merge(
    chts_households[['SAMPN', 'INCOM']], on='SAMPN').merge(
    buildings, left_on='building_id', right_index=True).merge(
    parcels, left_on='parcel_id', right_index=True).rename(
    columns={
        'zone_id': 'zone_id_home', 'AGE': 'age', 'EDUCA': 'edu', 'INCOM': 'income'})
obs.index.name = 'obs_id'

In [9]:
obs['no_higher_ed'] = (obs['edu'] < 5).astype(int)
obs['age_under_45'] = (obs['age'] < 45).astype(int)
obs['hh_inc_under_25k'] = (obs['income'] < 3).astype(int)
obs['hh_inc_25_to_75k'] = ((obs['income'] > 2) & (obs['income'] < 6)).astype(int)
obs['hh_inc_75_to_200k'] = ((obs['income'] > 5) & (obs['income'] < 9)).astype(int)
obs = obs[[
    'job_id', 'zone_id_home', 'age_under_45', 'no_higher_ed', 'age',
    'hh_inc_under_25k', 'hh_inc_25_to_75k', 'hh_inc_75_to_200k', 'income']]

In [10]:
alts = jobs.merge(buildings, left_on='building_id', right_index=True).merge(
    parcels, left_on='parcel_id', right_index=True).merge(
    walk_net_vars, left_on='node_id_walk', right_index=True).merge(
    drive_net_vars, left_on='node_id_small', right_index=True).rename(columns={'zone_id': 'zone_id_work'})

In [11]:
# industry of alternatives
alts['sector_retail'] = alts['sector_id'].isin([44, 45]).astype(int)
alts['sector_healthcare'] = alts['sector_id'].isin([62]).astype(int)
alts['sector_tech'] = alts['sector_id'].isin([51, 54]).astype(int)
alts['sector_food_and_hosp'] = alts['sector_id'].isin([72]).astype(int)
alts['sector_mfg'] = alts['sector_id'].isin([31, 32, 33]).astype(int)
alts['sector_edu_serv'] = alts['sector_id'].isin([61]).astype(int)
alts['sector_oth_serv'] = alts['sector_id'].isin([81]).astype(int)
alts['sector_constr'] = alts['sector_id'].isin([23]).astype(int)
alts['sector_gov'] = alts['sector_id'].isin([92]).astype(int)
alts['sector_fire'] = alts['sector_id'].isin([52, 53]).astype(int)
alts['sector_whlsale'] = alts['sector_id'].isin([42]).astype(int)
alts['sector_admin'] = alts['sector_id'].isin([56]).astype(int)
alts['sector_transport'] = alts['sector_id'].isin([48]).astype(int)
alts['sector_arts'] = alts['sector_id'].isin([71]).astype(int)
alts['sector_util'] = alts['sector_id'].isin([22]).astype(int)

# # occupation of alternatives
# alts['occup_mgmt'] = alts['occupation_id'].isin([11]).astype(int)
# alts['occup_sales'] = alts['occupation_id'].isin([41]).astype(int)
# alts['occup_biz'] = alts['occupation_id'].isin([13]).astype(int)
# alts['occup_admin'] = alts['occupation_id'].isin([43]).astype(int)
# alts['occup_edu'] = alts['occupation_id'].isin([25]).astype(int)
# alts['occup_food'] = alts['occupation_id'].isin([35]).astype(int)
# alts['occup_health'] = alts['occupation_id'].isin([29, 31]).astype(int)
# alts['occup_tech'] = alts['occupation_id'].isin([15]).astype(int)
# alts['occup_eng'] = alts['occupation_id'].isin([17]).astype(int)
# alts['occup_transp'] = alts['occupation_id'].isin([53]).astype(int)
# alts['occup_constr'] = alts['occupation_id'].isin([47]).astype(int)

In [12]:
alts = alts[[
    'jobs_1500_walk', 'jobs_1500_walk_tech', 'jobs_2500_walk_tech', 'jobs_2500_walk_retail',
    'jobs_1500_walk_retail', 'jobs_1500_walk_fire', 'jobs_2500_walk_fire',
    'zone_id_work', 'sector_retail', 'sector_healthcare', 'sector_tech', 'sector_food_and_hosp',
    'sector_mfg', 'sector_edu_serv', 'sector_oth_serv', 'sector_constr', 'sector_gov', 'sector_fire',
    'sector_whlsale', 'sector_admin', 'sector_transport', 'sector_arts', 'sector_util',
#     'occup_mgmt', 'occup_sales', 'occup_biz', 'occup_admin', 'occup_edu', 'occup_food', 'occup_health',
#     'occup_tech', 'occup_eng', 'occup_transp', 'occup_constr'
]]

In [13]:
%%time
%memit
mct = MergedChoiceTable(obs, alts, chosen_alternatives='job_id',
                        sample_size=10, interaction_terms=[
                            interaction_terms_tt, interaction_terms_dist, interaction_terms_cost])

peak memory: 10562.37 MiB, increment: 0.01 MiB
CPU times: user 1.25 s, sys: 2.1 s, total: 3.35 s
Wall time: 3.84 s


In [14]:
mct.to_frame().head()

Unnamed: 0_level_0,Unnamed: 1_level_0,zone_id_home,age_under_45,no_higher_ed,age,hh_inc_under_25k,hh_inc_25_to_75k,hh_inc_75_to_200k,income,jobs_1500_walk,jobs_1500_walk_tech,...,sector_transport,sector_arts,sector_util,chosen,tt_da,tt_wTrnW,dist_da,dist_walk,cost_da_toll,cost_wTrnW
obs_id,job_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
8917,13503.0,654,0,0,49,0,0,1,7,4060.0,101.0,...,0,0,0,1,41.47,158.59,24.5,24.22,519.4,161.0
8917,427836.0,654,0,0,49,0,0,1,7,2296.0,731.0,...,0,0,0,0,21.93,96.28,9.3,9.58,197.16,161.0
8917,2492604.0,654,0,0,49,0,0,1,7,65075.0,3989.0,...,0,0,0,0,129.7,-999.0,107.24,-999.0,2675.49,-999.0
8917,1324438.0,654,0,0,49,0,0,1,7,1260.0,116.0,...,0,1,0,0,71.24,-999.0,50.66,51.43,1082.9,-999.0
8917,94646.0,654,0,0,49,0,0,1,7,25641.0,2658.0,...,0,0,0,0,33.87,141.01,17.8,18.5,377.36,161.0


### Estimate the WLCM

In [15]:
mm.initialize()

Registering model step 'WLCM-baseline'
Registering model step 'WLCM-age-sector'
Registering model step 'WLCM-higher_ed_x_sector-tt_x_dist-cost_x_income'
Registering model step 'WLCM-higher_ed_x_sector-tt_x_dist'
Registering model step 'WLCM_constrained-higher_ed_x_sector-tt_x_dist-cost_x_income'
Registering model step 'WLCM-edu-sector'
Registering model step 'WLCM-higher_ed_x_sector'
Registering model step 'WLCM'


In [16]:
m = LargeMultinomialLogitStep(
    chooser_filters=['age < 115', 'income < 98', 'edu < 98'], constrained_choices=True,
    alt_sample_size=10
)

In [17]:
m.model_expression = (
    'dist_da/tt_da + tt_wTrnW + np.log1p(jobs_1500_walk_retail) + '
    'np.log1p(cost_da_toll):(hh_inc_under_25k + hh_inc_25_to_75k + hh_inc_75_to_200k) + '
    'no_higher_ed:(sector_retail + sector_fire + sector_healthcare + sector_tech + sector_mfg + '
    'sector_food_and_hosp + sector_edu_serv + sector_gov + sector_whlsale) + '
    'sector_retail + sector_tech + sector_mfg + sector_food_and_hosp + sector_edu_serv + sector_oth_serv + '
    'sector_constr + sector_gov + sector_whlsale + sector_admin + sector_util - 1'
)

In [21]:
%mprun -f m.fit m.fit(mct)

                  CHOICEMODELS ESTIMATION RESULTS                  
Dep. Var.:                chosen   No. Observations:          8,918
Model:         Multinomial Logit   Df Residuals:              8,891
Method:       Maximum Likelihood   Df Model:                     27
Date:                 2018-11-09   Pseudo R-squ.:             0.500
Time:                      22:57   Pseudo R-bar-squ.:         0.498
AIC:                  20,596.659   Log-Likelihood:      -10,271.330
BIC:                  20,788.247   LL-Null:             -20,534.454
                                              coef   std err         z     P>|z|   Conf. Int.
---------------------------------------------------------------------------------------------
dist_da                                    -0.1100     0.003   -42.814     0.000             
dist_da:tt_da                               0.0002     0.000     7.097     0.000             
tt_wTrnW                                   -0.0001     0.000    -2.324     0.020

In [51]:
m.name = 'WLCM_constrained-higher_ed_x_sector-tt_x_dist-cost_x_income'
m.tags = ['max']

In [52]:
mm.register(m)

Saving 'WLCM_constrained-higher_ed_x_sector-tt_x_dist-cost_x_income.yaml': /home/max/projects/ual_model_workspace/fall-2018-models/configs
Registering model step 'WLCM_constrained-higher_ed_x_sector-tt_x_dist-cost_x_income'
