In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.neighbors import BallTree
import seaborn as sns
import geopandas as gpd
from shapely.geometry import Point, LineString
from pyproj import Proj, transform
from matplotlib import pyplot as plt
%matplotlib inline

In [2]:
from urbansim_templates import modelmanager as mm
from urbansim_templates.models import LargeMultinomialLogitStep
import orca
import os; os.chdir('../')
import warnings;warnings.simplefilter('ignore')
from scripts import datasources, models, variables
from choicemodels import MultinomialLogit
from choicemodels.tools import MergedChoiceTable

### Load Data

In [3]:
chts_persons = pd.read_csv('./data/chts_persons_w_jobs_and_res_bldgs.csv')
chts_workers = chts_persons[~pd.isnull(chts_persons['job_id'])]
orca.run(['initialize_network_walk', 'initialize_network_small'])
buildings = orca.get_table('buildings').to_frame()
parcels = orca.get_table('parcels').to_frame()
jobs = orca.get_table('jobs').to_frame()
interaction_terms = pd.read_csv('./data/WLCM_interaction_terms.csv', index_col=['zone_id_home', 'zone_id_work'])
walk_net_vars = pd.read_csv('./data/walk_net_vars.csv', index_col='osmid')
drive_net_vars = pd.read_csv('./data/drive_net_vars.csv', index_col='osmid')

Running step 'initialize_network_walk'
Time to execute step 'initialize_network_walk': 0.00 s
Running step 'initialize_network_small'
Time to execute step 'initialize_network_small': 0.00 s
Total time to execute iteration 1 with iteration value None: 0.00 s


### Generate the merged choice table

This step must be done manually for now by calling the `choicemodels.MergedChoiceTable()` method directly instead of using a template because `urbansim_templates` does not yet have functionality for interaction terms such as home-to-work distances

In [175]:
obs = chts_workers.merge(buildings, left_on='building_id', right_index=True).merge(
    parcels, left_on='parcel_id', right_index=True).rename(
    columns={
        'zone_id': 'zone_id_home', 'AGE': 'age', 'EDUCA': 'edu'})
obs.index.name = 'obs_id'

In [176]:
obs['no_higher_ed'] = (obs['edu'] < 5).astype(int)
obs['age_under_45'] = (obs['age'] < 45).astype(int)
obs = obs[['job_id', 'zone_id_home', 'age_under_45', 'no_higher_ed', 'age']]

In [101]:
alts = jobs.merge(buildings, left_on='building_id', right_index=True).merge(
    parcels, left_on='parcel_id', right_index=True).merge(
    walk_net_vars, left_on='node_id_walk', right_index=True).merge(
    drive_net_vars, left_on='node_id_small', right_index=True).rename(columns={'zone_id': 'zone_id_work'})

In [106]:
alts['retail'] = alts['sector_id'].isin([44, 45]).astype(int)
alts['healthcare'] = alts['sector_id'].isin([62]).astype(int)
alts['tech'] = alts['sector_id'].isin([51, 54]).astype(int)
alts['food_and_hosp'] = alts['sector_id'].isin([72]).astype(int)
alts['mfg'] = alts['sector_id'].isin([31, 32, 33]).astype(int)
alts['edu_serv'] = alts['sector_id'].isin([61]).astype(int)
alts['oth_serv'] = alts['sector_id'].isin([81]).astype(int)
alts['constr'] = alts['sector_id'].isin([23]).astype(int)
alts['gov'] = alts['sector_id'].isin([92]).astype(int)
alts['fire'] = alts['sector_id'].isin([52, 53]).astype(int)
alts['whlsale'] = alts['sector_id'].isin([42]).astype(int)
alts['admin'] = alts['sector_id'].isin([56]).astype(int)
alts['transport'] = alts['sector_id'].isin([48]).astype(int)
alts['arts'] = alts['sector_id'].isin([71]).astype(int)
alts['util'] = alts['sector_id'].isin([22]).astype(int)

In [107]:
alts = alts[[
    'jobs_1500_walk', 'jobs_25000', 'zone_id_work', 'retail', 'healthcare', 'tech', 'food_and_hosp',
    'mfg', 'edu_serv', 'oth_serv', 'constr', 'gov', 'fire', 'whlsale', 'admin', 'transport', 'arts', 'util'
]]

In [177]:
%%time
mct = MergedChoiceTable(obs, alts, chosen_alternatives='job_id',
                        sample_size=10, interaction_terms=interaction_terms)

CPU times: user 713 ms, sys: 85.3 ms, total: 798 ms
Wall time: 796 ms


In [157]:
mct.to_frame().head(12)

Unnamed: 0_level_0,Unnamed: 1_level_0,zone_id_home,age,no_higher_ed,jobs_1500_walk,jobs_25000,zone_id_work,retail,healthcare,tech,food_and_hosp,...,gov,fire,whlsale,admin,transport,arts,util,chosen,tt_da,tt_wTrnW
obs_id,job_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
24022,13503.0,654,49,0,4060.0,658449.0,383,0,1,0,0,...,0,0,0,0,0,0,0,1,41.47,158.59
24022,1339689.0,654,49,0,2223.0,764667.0,930,0,1,0,0,...,0,0,0,0,0,0,0,0,68.26,-999.0
24022,1172868.0,654,49,0,12852.0,557719.0,349,0,0,0,0,...,0,0,0,0,0,0,0,0,46.35,144.02
24022,1136282.0,654,49,0,3935.0,434461.0,810,0,0,0,0,...,0,0,0,0,0,0,0,0,56.75,-999.0
24022,765223.0,654,49,0,20750.0,277113.0,1175,0,0,1,0,...,0,0,0,0,0,0,0,0,58.04,-999.0
24022,24911.0,654,49,0,2851.0,628942.0,489,0,0,0,0,...,0,0,0,0,0,0,0,0,39.38,-999.0
24022,1795464.0,654,49,0,89899.0,762234.0,17,0,0,0,0,...,0,0,0,0,0,0,0,0,86.57,-999.0
24022,1269345.0,654,49,0,7825.0,492788.0,355,0,1,0,0,...,0,0,0,0,0,0,0,0,53.61,167.08
24022,2434213.0,654,49,0,146582.0,113286.0,1342,0,0,0,0,...,0,0,0,0,0,0,0,0,134.55,-999.0
24022,1255907.0,654,49,0,8857.0,403740.0,861,0,0,0,1,...,0,0,0,0,0,0,0,0,59.4,-999.0


### Estimate the WLCM

In [110]:
mm.initialize()

Registering model step 'WLCM-baseline'
Registering model step 'WLCM'


In [126]:
print(mm.get_step('WLCM-baseline').summary_table)

                  CHOICEMODELS ESTIMATION RESULTS                  
Dep. Var.:                chosen   No. Observations:          8,918
Model:         Multinomial Logit   Df Residuals:              8,914
Method:       Maximum Likelihood   Df Model:                      4
Date:                 2018-10-10   Pseudo R-squ.:             0.418
Time:                      19:31   Pseudo R-bar-squ.:         0.417
AIC:                  23,923.534   Log-Likelihood:      -11,957.767
BIC:                  23,951.917   LL-Null:             -20,534.454
                              coef   std err          z     P>|z|   Conf. Int.
------------------------------------------------------------------------------
np.log1p(jobs_1500_walk)    0.0656     0.008      8.057     0.000             
np.log(jobs_25000)          0.1022     0.007     14.575     0.000             
np.log1p(tt_da)            -2.3851     0.022   -109.502     0.000             
tt_wTrnW                    0.0002     0.000      6.856     0

In [178]:
m = LargeMultinomialLogitStep(chooser_filters=['age < 100', 'age > 16'])

In [2]:
# m.model_expression = ('np.log1p(jobs_1500_walk) + np.log(jobs_25000) + np.log1p(tt_da) + '
#                       'no_higher_ed:(retail + fire + healthcare + tech + mfg + food_and_hosp + edu_serv + oth_serv + constr + gov + whlsale + admin + transport + arts + util) + '
#                       'retail + fire + healthcare + tech + mfg + food_and_hosp + edu_serv + oth_serv + constr + gov + whlsale + admin + transport + arts + util +'
#                       'tt_wTrnW - 1')

m.model_expression = ('np.log1p(jobs_1500_walk) + np.log(jobs_25000) + np.log1p(tt_da) + '
                      'no_higher_ed:(retail + fire + healthcare + tech + mfg + food_and_hosp + edu_serv + constr + gov + whlsale) + '
                      'retail + tech + mfg + food_and_hosp + edu_serv + oth_serv + constr + gov + whlsale + admin + util +'
                      'tt_wTrnW - 1')

NameError: name 'm' is not defined

In [184]:
m.fit(mct)

                  CHOICEMODELS ESTIMATION RESULTS                  
Dep. Var.:                chosen   No. Observations:          8,918
Model:         Multinomial Logit   Df Residuals:              8,893
Method:       Maximum Likelihood   Df Model:                     25
Date:                 2018-10-10   Pseudo R-squ.:             0.480
Time:                      21:13   Pseudo R-bar-squ.:         0.479
AIC:                  21,388.613   Log-Likelihood:      -10,669.307
BIC:                  21,566.009   LL-Null:             -20,534.454
                                coef   std err          z     P>|z|   Conf. Int.
--------------------------------------------------------------------------------
np.log1p(jobs_1500_walk)      0.0641     0.009      7.511     0.000             
np.log(jobs_25000)            0.0587     0.008      7.597     0.000             
np.log1p(tt_da)              -2.3806     0.023   -103.496     0.000             
no_higher_ed:retail           0.4284     0.102     

In [185]:
m.name = 'WLCM-higher_ed_x_sector'
m.tags = ['max']

In [186]:
mm.register(m)

Saving 'WLCM-higher_ed_x_sector.yaml': /home/max/projects/ual_model_workspace/fall-2018-models/configs
Registering model step 'WLCM-higher_ed_x_sector'


In [1]:
mm.list_steps()

NameError: name 'mm' is not defined