In [3]:
import pandas as pd
import patsy
import numpy as np

In [4]:
from urbansim_templates import modelmanager as mm
from urbansim_templates.models import LargeMultinomialLogitStep
import orca
import os; os.chdir('../')
import warnings;warnings.simplefilter('ignore')
from scripts import datasources, models, variables
from choicemodels import MultinomialLogit
from choicemodels import mnl
from choicemodels.tools import MergedChoiceTable

In [5]:
orca.run(['initialize_network_small', 'initialize_network_walk'])

Running step 'initialize_network_small'
Time to execute step 'initialize_network_small': 0.00 s
Running step 'initialize_network_walk'
Time to execute step 'initialize_network_walk': 0.00 s
Total time to execute iteration 1 with iteration value None: 0.00 s


In [4]:
persons = orca.get_table('persons').to_frame()
households = orca.get_table('households').to_frame()
buildings = orca.get_table('buildings').to_frame()
parcels = orca.get_table('parcels').to_frame()
jobs = orca.get_table('jobs').to_frame()
interaction_terms = pd.read_csv('./data/WLCM_interaction_terms.csv', index_col=['zone_id_home', 'zone_id_work'])
walk_net_vars = pd.read_csv('./data/walk_net_vars.csv', index_col='osmid')
drive_net_vars = pd.read_csv('./data/drive_net_vars.csv', index_col='osmid')

In [5]:
persons.columns

Index(['member_id', 'age', 'primary_commute_mode', 'relate', 'edu', 'sex',
       'hours', 'hispanic', 'earning', 'race_id', 'student', 'work_at_home',
       'worker', 'household_id', 'node_id_small', 'node_id_walk'],
      dtype='object')

In [6]:
households.columns

Index(['serialno', 'persons', 'building_type', 'cars', 'income',
       'race_of_head', 'hispanic_head', 'age_of_head', 'workers', 'state',
       'county', 'tract', 'block_group', 'children', 'tenure', 'recent_mover',
       'block_group_id', 'single_family', 'unit_id', 'building_id',
       'node_id_small', 'node_id_walk'],
      dtype='object')

In [7]:
commuters = persons[(persons['worker'] == 1) & (persons['work_at_home'] == 0)]

In [8]:
len(commuters)

3295341

In [9]:
obs = commuters.merge(households, left_on='household_id', right_index=True).merge(buildings, left_on='building_id', right_index=True).merge(
    parcels, left_on='parcel_id', right_index=True)[['zone_id']].rename(columns={'zone_id': 'zone_id_home'})
obs.index.name = 'obs_id'

In [10]:
len(obs)

3060996

In [11]:
obs.head()

Unnamed: 0_level_0,zone_id_home
obs_id,Unnamed: 1_level_1
0,557
3754,557
3755,557
22609,557
1,539


In [12]:
alts = jobs.merge(buildings, left_on='building_id', right_index=True).merge(
    parcels, left_on='parcel_id', right_index=True).merge(
    walk_net_vars, left_on='node_id_walk', right_index=True).merge(
    drive_net_vars, left_on='node_id_small', right_index=True).rename(columns={'zone_id': 'zone_id_work'})

In [13]:
alts = alts[['jobs_1500_walk', 'jobs_25000', 'zone_id_work']]

In [14]:
len(alts)

2578046

In [15]:
alts.head()

Unnamed: 0_level_0,jobs_1500_walk,jobs_25000,zone_id_work
job_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,46.0,4987.0,297
1,46.0,4987.0,297
387,46.0,4987.0,297
388,46.0,4987.0,297
389,46.0,4987.0,297


In [16]:
%%time
mct = MergedChoiceTable(obs, alts,
                        sample_size=10, interaction_terms=interaction_terms)

CPU times: user 22.6 s, sys: 7.1 s, total: 29.7 s
Wall time: 29.8 s


In [17]:
mct_df = mct.to_frame()

In [6]:
mm.initialize(path='/home/max/projects/ual_model_workspace/fall-2018-models/configs/')

Registering model step 'WLCM'


In [8]:
m = mm.get_step('WLCM')

In [9]:
m.model_expression

'np.log1p(jobs_1500_walk) + np.log(jobs_25000) + np.log1p(tt_da) + tt_wTrnW - 1'

In [20]:
dm = patsy.dmatrix(m.model_expression, data=mct_df, return_type='dataframe')

In [21]:
probs = mnl.mnl_simulate(data = dm, coeff = m.fitted_parameters, 
                                 numalts = 10, returnprobs=True)

In [22]:
choice_positions = mnl.mnl_simulate(data = dm, coeff = m.fitted_parameters, 
                                            numalts = 10, returnprobs=False)

In [23]:
ids = mct_df.reset_index()[mct.alternative_id_col].tolist()

In [24]:
N = len(choice_positions)
J = len(ids) // N
ids_by_obs = np.reshape(ids, (N,J))
choices = [ids_by_obs[i][choice_positions[i]] for i in range(N)]

In [25]:
mct_df['probability'] = np.reshape(probs, (probs.size, 1))

In [26]:
mct_df

Unnamed: 0_level_0,Unnamed: 1_level_0,zone_id_home,jobs_1500_walk,jobs_25000,zone_id_work,tt_da,tt_wTrnW,probability
obs_id,job_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,413751,557,2888.0,529017.0,583,11.49,40.35,0.697010
0,1320102,557,4352.0,388068.0,875,46.54,99.90,0.030184
0,1447013,557,8278.0,353118.0,309,39.00,83.34,0.046763
0,2120253,557,16295.0,728987.0,40,73.02,116.13,0.012714
0,2421446,557,78468.0,108870.0,1361,121.05,-999.00,0.002970
0,835143,557,1987.0,185191.0,1108,67.66,-999.00,0.009401
0,487342,557,2583.0,99222.0,718,50.90,-999.00,0.017111
0,1252816,557,9047.0,524827.0,340,34.57,68.30,0.064976
0,678438,557,12474.0,281800.0,733,40.59,-999.00,0.036676
0,916320,557,1420.0,411762.0,768,29.06,88.98,0.082195


In [27]:
obs['choice'] = choices

In [28]:
obs.head()

Unnamed: 0_level_0,zone_id_home,choice
obs_id,Unnamed: 1_level_1,Unnamed: 2_level_1
0,557,413751
3754,557,230424
3755,557,895329
22609,557,102199
1,539,31870


In [29]:
obs[~obs.index.isin(commuters.index)]

Unnamed: 0_level_0,zone_id_home,choice
obs_id,Unnamed: 1_level_1,Unnamed: 2_level_1


In [30]:
merged = pd.merge(commuters, obs[['choice']], left_index=True, right_index=True, how='left').rename(columns={'choice': 'job_id'})

In [36]:
len(merged)

3295341

In [39]:
merged = pd.merge(persons, merged[['job_id']], left_index=True, right_index=True, how='left')

In [42]:
len(merged) == len(persons)

True

In [47]:
merged.to_csv('/home/max/projects/ual_model_workspace/fall-2018-models/data/persons_w_jobs.csv')