In [1]:
import pandas as pd
import patsy
from patsy import dmatrix
import numpy as np

In [2]:
from urbansim_templates import modelmanager as mm
from urbansim_templates.models import LargeMultinomialLogitStep
import orca
import os; os.chdir('../')
import warnings;warnings.simplefilter('ignore')
from scripts import datasources, models, variables
from choicemodels import MultinomialLogit, MultinomialLogitResults
from choicemodels import mnl
from choicemodels.tools import MergedChoiceTable, simulation
from urbansim.models import util
from urbansim.utils import misc

Registering model step 'WLCM-baseline'
Registering model step 'WLCM-age-sector'
Registering model step 'WLCM-higher_ed_x_sector-tt_x_dist-cost_x_income'
Registering model step 'WLCM-higher_ed_x_sector-tt_x_dist'
Registering model step 'WLCM_constrained-higher_ed_x_sector-tt_x_dist-cost_x_income'
Registering model step 'ELCM-defaults'
Registering model step 'WLCM-edu-sector'
Registering model step 'WLCM-higher_ed_x_sector'
Registering model step 'WLCM'


### Generate accessibility variables (or load them if they already exist)

In [3]:
orca.run(['initialize_network_small', 'initialize_network_walk'])

Running step 'initialize_network_small'
Time to execute step 'initialize_network_small': 0.00 s
Running step 'initialize_network_walk'
Time to execute step 'initialize_network_walk': 0.00 s
Total time to execute iteration 1 with iteration value None: 0.00 s


If they exist already:

In [4]:
walk_net_vars = pd.read_csv('./data/walk_net_vars.csv', index_col='osmid')
drive_net_vars = pd.read_csv('./data/drive_net_vars.csv', index_col='osmid')
orca.add_table('nodeswalk', walk_net_vars)
orca.add_table('nodessmall', drive_net_vars)

<orca.orca.DataFrameWrapper at 0x7f1f673fbf60>

If generating on the fly:

In [5]:
# orca.run(['network_aggregations_small', 'network_aggregations_walk'])

### Load interaction terms

Created in WLCM_pre-processing.ipynb

In [6]:
interaction_terms_tt = pd.read_csv(
    './data/WLCM_interaction_terms_tt.csv', index_col=[
        'zone_id_home', 'zone_id_work'])
interaction_terms_dist = pd.read_csv(
    './data/WLCM_interaction_terms_dist.csv', index_col=[
        'zone_id_home', 'zone_id_work'])
interaction_terms_cost = pd.read_csv(
    './data/WLCM_interaction_terms_cost.csv', index_col=[
        'zone_id_home', 'zone_id_work'])

### Load estimated model specification

In [7]:
m = mm.get_step('WLCM_constrained-higher_ed_x_sector-tt_x_dist-cost_x_income')

In [8]:
m.choosers = ['persons', 'households']
m.alternatives = ['jobs', 'buildings', 'parcels', 'nodeswalk', 'nodessmall']
m.out_chooser_filters = ['worker == 1', 'work_at_home == 0']
m.out_table = 'persons'
m.out_column = 'job_id'

In [9]:
m.run(chooser_batch_size=200000, interaction_terms=[
    interaction_terms_tt, interaction_terms_dist, interaction_terms_cost])

Iteration 1: 191312 of 6571488 valid choices
Iteration 2: 381914 of 6571488 valid choices
Iteration 3: 571792 of 6571488 valid choices
Iteration 4: 760699 of 6571488 valid choices
Iteration 5: 948647 of 6571488 valid choices
Iteration 6: 1135329 of 6571488 valid choices
Iteration 7: 1320071 of 6571488 valid choices
Iteration 8: 1502860 of 6571488 valid choices
Iteration 9: 1682721 of 6571488 valid choices
Iteration 10: 1859018 of 6571488 valid choices
Iteration 11: 2029804 of 6571488 valid choices
Iteration 12: 2192363 of 6571488 valid choices
Iteration 13: 2341223 of 6571488 valid choices
Iteration 14: 2465943 of 6571488 valid choices
Iteration 15: 2545698 of 6571488 valid choices
Iteration 16: 2575209 of 6571488 valid choices
Iteration 17: 2578046 of 6571488 valid choices
No valid alternatives for the remaining choosers


### Format the simulation input data

In [10]:
chooser_filters = ['worker == 1', 'work_at_home == 0']
query = ' and '.join(chooser_filters)

In [11]:
obs = orca.merge_tables('persons', [
    'persons', 'households', 'units', 'buildings', 'parcels'])
obs.index.name = 'obs_id'
obs = obs.query(query)
obs = obs[[
    'zone_id_home', 'age', 'edu', 'income']]

KeyboardInterrupt: 

In [None]:
obs['no_higher_ed'] = (obs['edu'] < 21).astype(int)
obs['age_under_45'] = (obs['age'] < 45).astype(int)
obs['hh_inc_under_25k'] = ((obs['income'] < 25000) & (obs['income'] > 10)).astype(int)
obs['hh_inc_25_to_75k'] = (
    (obs['income'] >= 25000) & (obs['income'] < 75000)).astype(int)
obs['hh_inc_75_to_200k'] = (
    (obs['income'] >= 75000) & (obs['income'] < 200000)).astype(int)

In [None]:
alts = orca.merge_tables(
    'jobs', [
        'jobs', 'buildings', 'parcels', 'nodeswalk', 'nodessmall']).rename(
    columns={'zone_id': 'zone_id_work'})
alts = alts[[
    'jobs_1500_walk_retail', 'sector_id', 'zone_id_work'
]]

In [None]:
alts['sector_retail'] = alts['sector_id'].isin([44, 45]).astype(int)
alts['sector_healthcare'] = alts['sector_id'].isin([62]).astype(int)
alts['sector_tech'] = alts['sector_id'].isin([51, 54]).astype(int)
alts['sector_food_and_hosp'] = alts['sector_id'].isin([72]).astype(int)
alts['sector_mfg'] = alts['sector_id'].isin([31, 32, 33]).astype(int)
alts['sector_edu_serv'] = alts['sector_id'].isin([61]).astype(int)
alts['sector_oth_serv'] = alts['sector_id'].isin([81]).astype(int)
alts['sector_constr'] = alts['sector_id'].isin([23]).astype(int)
alts['sector_gov'] = alts['sector_id'].isin([92]).astype(int)
alts['sector_fire'] = alts['sector_id'].isin([52, 53]).astype(int)
alts['sector_whlsale'] = alts['sector_id'].isin([42]).astype(int)
alts['sector_admin'] = alts['sector_id'].isin([56]).astype(int)
alts['sector_transport'] = alts['sector_id'].isin([48]).astype(int)
alts['sector_arts'] = alts['sector_id'].isin([71]).astype(int)
alts['sector_util'] = alts['sector_id'].isin([22]).astype(int)

### Simulation with constrained choices

In [12]:
model = MultinomialLogitResults(model_expression = m.model_expression, 
                fitted_parameters = m.fitted_parameters)

In [13]:
def mct_callable(obs, alts):
    return MergedChoiceTable(obs, alts, sample_size=m.alt_sample_size, interaction_terms=[
                            interaction_terms_tt, interaction_terms_dist, interaction_terms_cost])
        
def probs_callable(mct):
    return model.probabilities(mct)

#### Iterative Lottery Choices with Sampling

In [12]:
ch = m._get_df(m.choosers).head(1000)

In [13]:
alt = m._get_df(m.alternatives)

In [14]:
n_obs = ch.shape[0]
        
oid_name = ch.index.name
aid_name = alt.index.name

samp_size = m.alt_sample_size

In [15]:
obs_ids = np.repeat(ch.index.values, samp_size)

In [16]:
alt_ids = np.random.choice(alt.index.values, 
                                       replace = True,
                                       size = n_obs * samp_size)

In [17]:
df = pd.DataFrame({oid_name: obs_ids, aid_name: alt_ids})

In [19]:
df.head()

Unnamed: 0,person_id,job_id
0,0,2124996
1,0,462320
2,0,1678718
3,0,701715
4,0,1620126


In [22]:
oid_name

'person_id'

In [18]:
df = df.join(ch, how='left', on=oid_name)

ValueError: columns overlap but no suffix specified: Index(['job_id'], dtype='object')

In [None]:
df = df.join(alt, how='left', on=aid_name)

In [26]:
df.join(pd.DataFrame(interaction_terms_tt), how='left', 
                         on=interaction_terms_tt.index.names)

KeyError: 'zone_id_home'

In [25]:
# for intx_table in [interaction_terms_tt, interaction_terms_dist, interaction_terms_cost]:
    merged = merged.join(pd.DataFrame(intx_table), how='left', 
                         on=intx_table.index.names)

KeyError: 'zone_id_home'

In [9]:
MergedChoiceTable(ch, alt, sample_size=m.alt_sample_size, interaction_terms=[
    interaction_terms_tt, interaction_terms_dist, interaction_terms_cost])

KeyError: 'zone_id_home'

In [25]:
choices = simulation.iterative_lottery_choices(
    obs, alts, mct_callable=mct_callable, probs_callable=probs_callable, alt_capacity=m.alt_capacity,
    chooser_size=m.chooser_size, max_iter=m.max_iter, chooser_batch_size=200000)

ValueError: The column label 'zone_id_home' is not unique.

In [16]:
print("WLCM placed {0} of {1} workers {2}% during simulation.".format(
    len(choices), len(obs), np.round(len(choices)/len(obs) * 100, 1)))

WLCM placed 2578046 of 3060996 workers 84.2% during simulation.


### Save results

In [17]:
persons = orca.get_table('persons')

In [18]:
persons.update_col('job_id', pd.Series())

In [19]:
persons.update_col_from_series('job_id', choices, cast=True)

In [21]:
persons.to_frame().to_csv('./data/persons_w_jobs_2018_10_26.csv')

### Old way

In [13]:
dm = patsy.dmatrix(m.model_expression, data=mct_df, return_type='matrix')

In [12]:
%%time
dm = patsy.dmatrix(m.model_expression, data=mct_df, return_type='dataframe')

CPU times: user 37.5 s, sys: 8.63 s, total: 46.1 s
Wall time: 46.2 s


In [16]:
probs = mnl.mnl_simulate(data = dm, coeff = m.fitted_parameters, 
                                 numalts = 10, returnprobs=True)

In [21]:
choice_positions = mnl.mnl_simulate(data = dm, coeff = m.fitted_parameters, 
                                            numalts = 10, returnprobs=False)

In [24]:
ids = mct_df.reset_index()['job_id'].tolist()

In [49]:
N = len(choice_positions)
J = len(ids) // N
ids_by_obs = np.reshape(ids, (N,J))
choices = [ids_by_obs[i][choice_positions[i]] for i in range(N)]

In [50]:
mct_df['probability'] = np.reshape(probs, (probs.size, 1))

In [55]:
obs['choice'] = choices

In [64]:
persons = orca.get_table('persons').to_frame()

In [65]:
merged = pd.merge(persons, obs[['choice']], left_index=True, right_index=True, how='left').rename(columns={'choice': 'job_id'})

In [70]:
merged.to_csv('/home/max/projects/ual_model_workspace/fall-2018-models/data/persons_w_jobs_2018_10_16.csv')