In [1]:
import orca
import pandas as pd
import warnings
import time
from tqdm import tqdm
from matplotlib import pyplot as plt
import numpy as np
import os; os.chdir('../')
from urbansim_templates import modelmanager as mm
from urbansim_templates import utils
from urbansim_templates.models import LargeMultinomialLogitStep
from urbansim.models.util import columns_in_formula
import warnings;warnings.simplefilter('ignore')
from choicemodels import MultinomialLogit
from choicemodels.tools import (MergedChoiceTable, monte_carlo_choices, 
        iterative_lottery_choices, parallel_lottery_choices, monte_carlo_choices)

from scripts import datasources, models, variables

%matplotlib inline

Registering model step 'auto_ownership'
Registering model step 'TOD_choice'
Registering model step 'primary_mode_choice'
Registering model step 'WLCM'


### Load data

In [2]:
orca.run(['initialize_network_small', 'initialize_network_walk'])

Running step 'initialize_network_small'
Time to execute step 'initialize_network_small': 0.00 s
Running step 'initialize_network_walk'
Time to execute step 'initialize_network_walk': 0.00 s
Total time to execute iteration 1 with iteration value None: 0.00 s


In [3]:
walk_net_vars = pd.read_csv('./data/walk_net_vars.csv', index_col='osmid')
drive_net_vars = pd.read_csv('./data/drive_net_vars.csv', index_col='osmid')
orca.add_table('nodeswalk', walk_net_vars)
orca.add_table('nodessmall', drive_net_vars)

<orca.orca.DataFrameWrapper at 0x7fc01e8516a0>

In [4]:
interaction_terms_tt = pd.read_csv(
    './data/WLCM_interaction_terms_tt.csv', index_col=[
        'zone_id_home', 'zone_id_work'])
interaction_terms_dist = pd.read_csv(
    './data/WLCM_interaction_terms_dist.csv', index_col=[
        'zone_id_home', 'zone_id_work'])
interaction_terms_cost = pd.read_csv(
    './data/WLCM_interaction_terms_cost.csv', index_col=[
        'zone_id_home', 'zone_id_work'])

In [5]:
interaction_terms = [interaction_terms_tt, interaction_terms_dist, interaction_terms_cost]

### Prepare data

In [6]:
m = mm.get_step('WLCM')

In [7]:
uniq_intx_idx_names = set([idx for intx in interaction_terms for idx in intx.index.names])
obs_extra_cols = [m.chooser_size] + list(uniq_intx_idx_names)
alts_extra_cols = [m.alt_capacity] + list(uniq_intx_idx_names)

In [8]:
observations = utils.get_data(
    tables = m.out_choosers, 
    fallback_tables = m.choosers, 
    filters = m.out_chooser_filters,
    model_expression = m.model_expression,
    extra_columns = obs_extra_cols)

In [9]:
alternatives = utils.get_data(
    tables = m.out_alternatives, 
    fallback_tables = m.alternatives, 
    filters = m.out_alt_filters,
    model_expression = m.model_expression,
    extra_columns = alts_extra_cols)

In [10]:
expr_cols = columns_in_formula(m.model_expression)

obs_cols = set(observations.columns) & set(expr_cols + utils.to_list(obs_extra_cols))
observations = observations[list(obs_cols)]

alt_cols = set(alternatives.columns) & set(expr_cols + utils.to_list(alts_extra_cols))
alternatives = alternatives[list(alt_cols)]

### Parallelized `choicemodels` code

In [11]:
def mct_callable(obs, alts):
    return MergedChoiceTable(
        obs, alts,
        sample_size=m.alt_sample_size,
        interaction_terms=interaction_terms)

def probs_callable(mct):
    return m.model.probabilities(mct)

Choosers: ALL // Alts: 10 // Batch size: 200k

In [12]:
choices = parallel_lottery_choices(
    observations, alternatives, mct_callable, probs_callable, m.alt_capacity, m.chooser_size,
    chooser_batch_size=200000)

100%|██████████| 16/16 [01:50<00:00,  6.90s/it]


In [13]:
assert len(np.unique(choices.values)) == len(alternatives) - 1

Choosers: 16 // Alts: 10 // Batch size: 1

In [14]:
obs = observations.sample(16)

In [15]:
choices = parallel_lottery_choices(
    obs, alternatives, mct_callable, probs_callable, m.alt_capacity, m.chooser_size,
    chooser_batch_size=1)

100%|██████████| 16/16 [00:18<00:00,  1.17s/it]


In [16]:
assert len(np.unique(list(choices.values))) == min(len(alternatives) - 1, len(obs))

Choosers: 16 // Alts: ALL // Batch size: 1

In [17]:
def mct_callable(obs, alts):
    return MergedChoiceTable(
        obs, alts,
        sample_size=len(alts),
        interaction_terms=interaction_terms)

In [18]:
obs = observations.sample(16)

In [19]:
choices = parallel_lottery_choices(
    obs, alternatives, mct_callable, probs_callable, m.alt_capacity, m.chooser_size,
    chooser_batch_size=1)

100%|██████████| 16/16 [00:35<00:00,  2.24s/it]


In [20]:
assert len(np.unique(list(choices.values))) == min(len(alternatives) - 1, len(obs))

Choosers: 16 // Alts: ALL // Batch size: 16

In [12]:
def mct_callable(obs, alts):
    return MergedChoiceTable(
        obs, alts,
        sample_size=len(alts),
        interaction_terms=interaction_terms)

In [13]:
obs = observations.sample(16)

In [14]:
choices = parallel_lottery_choices(
    obs, alternatives, mct_callable, probs_callable, m.alt_capacity, m.chooser_size,
    chooser_batch_size=16)

100%|██████████| 1/1 [05:23<00:00, 323.21s/it]


Choosers: 300 // Alts: ALL // Batch size: 15

In [21]:
obs = observations.sample(32)

In [22]:
def mct_callable(obs, alts):
    return MergedChoiceTable(
        obs, alts,
        sample_size=len(alts),
        interaction_terms=interaction_terms)

In [23]:
choices = parallel_lottery_choices(
    obs, alternatives, mct_callable, probs_callable, m.alt_capacity, m.chooser_size,
    chooser_batch_size=2)

  0%|          | 0/16 [00:00<?, ?it/s]Process Process-62:
Traceback (most recent call last):
  File "/home/max/anaconda3/envs/wlcm/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/max/anaconda3/envs/wlcm/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/max/projects/choicemodels/choicemodels/tools/simulation.py", line 308, in _parallel_lottery_choices_worker
    probs = probs_callable(mct)
  File "<ipython-input-11-6545052b8ef7>", line 8, in probs_callable
    return m.model.probabilities(mct)
  File "/home/max/projects/choicemodels/choicemodels/mnl.py", line 313, in probabilities
    dm = dmatrix(self.model_expression, data=df)
  File "/home/max/anaconda3/envs/wlcm/lib/python3.6/site-packages/patsy/highlevel.py", line 291, in dmatrix
    NA_action, return_type)
  File "/home/max/anaconda3/envs/wlcm/lib/python3.6/site-packages/patsy/highlevel.py", line 169, in _do_highlev

In [25]:
len(choices)

30