In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.neighbors import BallTree
# import seaborn as sns
import geopandas as gpd
from shapely.geometry import Point, LineString
from pyproj import Proj, transform
from matplotlib import pyplot as plt
%matplotlib inline
# %load_ext memory_profiler

In [2]:
from urbansim_templates import modelmanager as mm
from urbansim_templates.models import LargeMultinomialLogitStep
import orca
import os; os.chdir('../')
import warnings;warnings.simplefilter('ignore')
from scripts import datasources, models, variables
from choicemodels import MultinomialLogit
from choicemodels.tools import MergedChoiceTable

Registering model step 'WLCM_constrained-higher_ed_x_sector-tt_x_dist-cost_x_income'


### Load Data

In [3]:
chts_households = pd.read_csv('/home/data/fall_2018/CHTS_csv_format/data/Deliv_HH.csv')
chts_persons = pd.read_csv('./data/chts_persons_w_jobs_and_res_bldgs.csv')
chts_persons.loc[:, 'worker'] = chts_persons[chts_persons['EMPLY'] == 1]
chts_persons.loc[:, 'work_at_home'] = chts_persons[chts_persons['WLOC'] == 2]
chts_workers = chts_persons[~pd.isnull(chts_persons['job_id'])]
orca.run(['initialize_network_walk', 'initialize_network_small'])
buildings = orca.get_table('buildings').to_frame()
parcels = orca.get_table('parcels').to_frame()
jobs = orca.get_table('jobs').to_frame()
interaction_terms_tt = pd.read_csv('./data/WLCM_interaction_terms_tt.csv', index_col=['zone_id_home', 'zone_id_work'])
interaction_terms_dist = pd.read_csv('./data/WLCM_interaction_terms_dist.csv', index_col=['zone_id_home', 'zone_id_work'])
interaction_terms_cost = pd.read_csv('./data/WLCM_interaction_terms_cost.csv', index_col=['zone_id_home', 'zone_id_work'])
walk_net_vars = pd.read_csv('./data/walk_net_vars.csv', index_col='osmid')
drive_net_vars = pd.read_csv('./data/drive_net_vars.csv', index_col='osmid')

Running step 'initialize_network_walk'
Time to execute step 'initialize_network_walk': 0.00 s
Running step 'initialize_network_small'
Time to execute step 'initialize_network_small': 0.00 s
Total time to execute iteration 1 with iteration value None: 0.00 s


### Generate distance-based sampling weights

In [4]:
# w = (interaction_terms_dist**-0.3).clip(upper=1.0).dist_da.rename('w').to_frame()

### Generate the merged choice table

This step must be done manually for now by calling the `choicemodels.MergedChoiceTable()` method directly instead of using a template because `urbansim_templates` does not yet have functionality for interaction terms such as home-to-work distances

In [5]:
obs = chts_workers.merge(
    chts_households[['SAMPN', 'INCOM']], on='SAMPN').merge(
    buildings, left_on='building_id', right_index=True).merge(
    parcels, left_on='parcel_id', right_index=True).rename(
    columns={
        'zone_id': 'zone_id_home', 'AGE': 'age', 'EDUCA': 'edu', 'INCOM': 'income'})
obs.index.name = 'obs_id'

In [6]:
obs['no_higher_ed'] = (obs['edu'] < 5).astype(int)
obs['age_under_45'] = (obs['age'] < 45).astype(int)
obs['hh_inc_under_25k'] = (obs['income'] < 3).astype(int)
obs['hh_inc_25_to_75k'] = ((obs['income'] > 2) & (obs['income'] < 6)).astype(int)
obs['hh_inc_75_to_200k'] = ((obs['income'] > 5) & (obs['income'] < 9)).astype(int)
obs = obs[[
    'job_id', 'zone_id_home', 'age_under_45', 'no_higher_ed', 'age',
    'hh_inc_under_25k', 'hh_inc_25_to_75k', 'hh_inc_75_to_200k', 'income']]

In [7]:
alts = jobs.merge(buildings, left_on='building_id', right_index=True).merge(
    parcels, left_on='parcel_id', right_index=True).merge(
    walk_net_vars, left_on='node_id_walk', right_index=True).merge(
    drive_net_vars, left_on='node_id_small', right_index=True).rename(columns={'zone_id': 'zone_id_work'})

In [8]:
# industry of alternatives
alts['sector_retail'] = alts['sector_id'].isin([44, 45]).astype(int)
alts['sector_healthcare'] = alts['sector_id'].isin([62]).astype(int)
alts['sector_tech'] = alts['sector_id'].isin([51, 54]).astype(int)
alts['sector_food_and_hosp'] = alts['sector_id'].isin([72]).astype(int)
alts['sector_mfg'] = alts['sector_id'].isin([31, 32, 33]).astype(int)
alts['sector_edu_serv'] = alts['sector_id'].isin([61]).astype(int)
alts['sector_oth_serv'] = alts['sector_id'].isin([81]).astype(int)
alts['sector_constr'] = alts['sector_id'].isin([23]).astype(int)
alts['sector_gov'] = alts['sector_id'].isin([92]).astype(int)
alts['sector_fire'] = alts['sector_id'].isin([52, 53]).astype(int)
alts['sector_whlsale'] = alts['sector_id'].isin([42]).astype(int)
alts['sector_admin'] = alts['sector_id'].isin([56]).astype(int)
alts['sector_transport'] = alts['sector_id'].isin([48]).astype(int)
alts['sector_arts'] = alts['sector_id'].isin([71]).astype(int)
alts['sector_util'] = alts['sector_id'].isin([22]).astype(int)

# # occupation of alternatives
# alts['occup_mgmt'] = alts['occupation_id'].isin([11]).astype(int)
# alts['occup_sales'] = alts['occupation_id'].isin([41]).astype(int)
# alts['occup_biz'] = alts['occupation_id'].isin([13]).astype(int)
# alts['occup_admin'] = alts['occupation_id'].isin([43]).astype(int)
# alts['occup_edu'] = alts['occupation_id'].isin([25]).astype(int)
# alts['occup_food'] = alts['occupation_id'].isin([35]).astype(int)
# alts['occup_health'] = alts['occupation_id'].isin([29, 31]).astype(int)
# alts['occup_tech'] = alts['occupation_id'].isin([15]).astype(int)
# alts['occup_eng'] = alts['occupation_id'].isin([17]).astype(int)
# alts['occup_transp'] = alts['occupation_id'].isin([53]).astype(int)
# alts['occup_constr'] = alts['occupation_id'].isin([47]).astype(int)

In [9]:
alts = alts[[
    'jobs_1500_walk', 'jobs_1500_walk_tech', 'jobs_2500_walk_tech', 'jobs_2500_walk_retail',
    'jobs_1500_walk_retail', 'jobs_1500_walk_fire', 'jobs_2500_walk_fire',
    'zone_id_work', 'sector_retail', 'sector_healthcare', 'sector_tech', 'sector_food_and_hosp',
    'sector_mfg', 'sector_edu_serv', 'sector_oth_serv', 'sector_constr', 'sector_gov', 'sector_fire',
    'sector_whlsale', 'sector_admin', 'sector_transport', 'sector_arts', 'sector_util',
#     'occup_mgmt', 'occup_sales', 'occup_biz', 'occup_admin', 'occup_edu', 'occup_food', 'occup_health',
#     'occup_tech', 'occup_eng', 'occup_transp', 'occup_constr'
]]

In [18]:
%%time
# %memit
mct = MergedChoiceTable(obs, alts, chosen_alternatives='job_id',
                        sample_size=100, interaction_terms=[
                            interaction_terms_tt, interaction_terms_dist, interaction_terms_cost])

CPU times: user 3.19 s, sys: 888 ms, total: 4.08 s
Wall time: 4.09 s


In [11]:
mct.to_frame().head()

Unnamed: 0_level_0,Unnamed: 1_level_0,zone_id_home,age_under_45,no_higher_ed,age,hh_inc_under_25k,hh_inc_25_to_75k,hh_inc_75_to_200k,income,jobs_1500_walk,jobs_1500_walk_tech,...,sector_transport,sector_arts,sector_util,chosen,tt_da,tt_wTrnW,dist_da,dist_walk,cost_da_toll,cost_wTrnW
obs_id,job_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
8917,13503.0,654,0,0,49,0,0,1,7,4060.0,101.0,...,0,0,0,1,41.47,158.59,24.5,24.22,519.4,161.0
8917,635806.0,654,0,0,49,0,0,1,7,7483.0,1148.0,...,0,0,0,0,30.49,82.63,15.76,15.3,334.11,161.0
8917,1169030.0,654,0,0,49,0,0,1,7,401.0,10.0,...,0,0,0,0,64.85,-999.0,44.34,44.16,948.92,-999.0
8917,1360647.0,654,0,0,49,0,0,1,7,8116.0,1171.0,...,0,0,0,0,55.74,155.5,33.35,34.98,707.02,570.0
8917,1194067.0,654,0,0,49,0,0,1,7,18737.0,683.0,...,0,0,0,0,60.89,150.0,41.43,41.38,887.23,591.0


### Estimate the WLCM

In [19]:
mm.initialize()

Registering model step 'WLCM_constrained-higher_ed_x_sector-tt_x_dist-cost_x_income'


In [20]:
m = LargeMultinomialLogitStep(
    chooser_filters=['age < 115', 'income < 98', 'edu < 98'], constrained_choices=True,
    alt_sample_size=100
)

In [21]:
m.model_expression = (
    'dist_da/tt_da + tt_wTrnW + np.log1p(jobs_1500_walk_retail) + '
    'np.log1p(cost_da_toll):(hh_inc_under_25k + hh_inc_25_to_75k + hh_inc_75_to_200k) + '
    'no_higher_ed:(sector_retail + sector_fire + sector_healthcare + sector_tech + sector_mfg + '
    'sector_food_and_hosp + sector_edu_serv + sector_gov + sector_whlsale) + '
    'sector_retail + sector_tech + sector_mfg + sector_food_and_hosp + sector_edu_serv + sector_oth_serv + '
    'sector_constr + sector_gov + sector_whlsale + sector_admin + sector_util - 1'
)

In [22]:
# %mprun -f m.fit 
m.fit(mct)

       -6.26353105e-01, -4.92359861e-01, -2.64377742e-01,  5.23251404e-01,
       -1.09476407e+00, -7.19217541e-01, -1.56288006e+00, -6.13501437e-01,
        1.09435176e+00, -1.46427523e+00, -7.01324674e-01,  5.51569518e-01,
       -1.15546463e+00,  6.18100102e-01, -5.55207124e-01, -1.91906642e+00,
        1.22632717e+00, -5.57587897e-01, -1.25653225e+00,  4.47698930e-01,
       -2.27388137e+00, -1.12884394e+00,  6.05754425e-01]), array([[27560.70022855]]), {'grad': array([-0.03166736, -0.08453475, -0.11535843,  0.04644688,  0.02001229,
       -0.00805658,  0.02415927, -0.00785179,  0.00445038, -0.02243847,
        0.00155468,  0.0001496 , -0.0102652 ,  0.00577386,  0.01130301,
        0.0358884 , -0.00069574, -0.0793092 , -0.00531949, -0.01909658,
        0.04425809, -0.00665129,  0.00572571,  0.02786018,  0.00971504,
        0.00560601, -0.01078316]), 'task': b'ABNORMAL_TERMINATION_IN_LNSRCH', 'funcalls': 7298, 'nit': 6331, 'warnflag': 2})


                  CHOICEMODELS ESTIMATION RESULTS                  
Dep. Var.:                chosen   No. Observations:          8,918
Model:         Multinomial Logit   Df Residuals:              8,891
Method:       Maximum Likelihood   Df Model:                     27
Date:                 2019-06-17   Pseudo R-squ.:             0.329
Time:                      09:57   Pseudo R-bar-squ.:         0.328
AIC:                  55,175.400   Log-Likelihood:      -27,560.700
BIC:                  55,366.988   LL-Null:             -41,068.908
                                              coef   std err         z     P>|z|   Conf. Int.
---------------------------------------------------------------------------------------------
dist_da                                    -0.1155     0.002   -52.828     0.000             
dist_da:tt_da                               0.0002     0.000     8.676     0.000             
tt_wTrnW                                   -0.0001     0.000    -6.063     0.000

In [51]:
m.name = 'WLCM_constrained-higher_ed_x_sector-tt_x_dist-cost_x_income'
m.tags = ['max']

In [52]:
mm.register(m)

Saving 'WLCM_constrained-higher_ed_x_sector-tt_x_dist-cost_x_income.yaml': /home/max/projects/ual_model_workspace/fall-2018-models/configs
Registering model step 'WLCM_constrained-higher_ed_x_sector-tt_x_dist-cost_x_income'
