In [1]:
import pandas as pd
import patsy
import numpy as np

In [18]:
from urbansim_templates import modelmanager as mm
from urbansim_templates.models import LargeMultinomialLogitStep
import orca
import os; os.chdir('../')
import warnings;warnings.simplefilter('ignore')
from scripts import datasources, models, variables
from choicemodels import MultinomialLogit
from choicemodels import mnl
from choicemodels.tools import MergedChoiceTable
from urbansim.models import util

In [3]:
orca.run(['initialize_network_small', 'initialize_network_walk'])

Running step 'initialize_network_small'
Time to execute step 'initialize_network_small': 0.00 s
Running step 'initialize_network_walk'
Time to execute step 'initialize_network_walk': 0.00 s
Total time to execute iteration 1 with iteration value None: 0.00 s


In [4]:
interaction_terms_tt = pd.read_csv(
    './data/WLCM_interaction_terms_tt.csv', index_col=[
        'zone_id_home', 'zone_id_work'])
interaction_terms_dist = pd.read_csv(
    './data/WLCM_interaction_terms_dist.csv', index_col=[
        'zone_id_home', 'zone_id_work'])
interaction_terms_cost = pd.read_csv(
    './data/WLCM_interaction_terms_cost.csv', index_col=[
        'zone_id_home', 'zone_id_work'])
walk_net_vars = pd.read_csv('./data/walk_net_vars.csv', index_col='osmid')
drive_net_vars = pd.read_csv('./data/drive_net_vars.csv', index_col='osmid')
orca.add_table('nodeswalk', walk_net_vars)
orca.add_table('nodessmall', drive_net_vars)

<orca.orca.DataFrameWrapper at 0x7f4cba392f60>

In [15]:
mm.initialize()

Registering model step 'WLCM-baseline'
Registering model step 'WLCM-age-sector'
Registering model step 'WLCM-higher_ed_x_sector-tt_x_dist-cost_x_income'
Registering model step 'WLCM-higher_ed_x_sector-tt_x_dist'
Registering model step 'WLCM-edu-sector'
Registering model step 'WLCM-higher_ed_x_sector'
Registering model step 'WLCM'


In [29]:
m.chooser_filters = ['age < 115', 'worker == 1', 'work_at_home == 0']
query = ' and '.join(m.chooser_filters)

In [31]:
obs = orca.merge_tables('persons', [
    'persons', 'households', 'units', 'buildings', 'parcels']).rename(
    columns={'zone_id': 'zone_id_home'})
obs.index.name = 'obs_id'
obs = obs.query(query)
obs = obs[[
    'zone_id_home', 'age', 'edu', 'income']]

In [6]:
alts = orca.merge_tables(
    'jobs', [
        'jobs', 'buildings', 'parcels', 'nodeswalk', 'nodessmall']).rename(
    columns={'zone_id': 'zone_id_work'})
alts = alts[[
    'jobs_1500_walk_retail', 'sector_id', 'zone_id_work'
]]

In [33]:
mct = MergedChoiceTable(
    obs, alts, sample_size=10, interaction_terms=[
        interaction_terms_tt, interaction_terms_dist,
        interaction_terms_cost])

In [34]:
mct_df = mct.to_frame()

In [39]:
mct_df['sector_retail'] = mct_df['sector_id'].isin([44, 45]).astype(int)
mct_df['sector_healthcare'] = mct_df['sector_id'].isin([62]).astype(int)
mct_df['sector_tech'] = mct_df['sector_id'].isin([51, 54]).astype(int)
mct_df['sector_food_and_hosp'] = mct_df['sector_id'].isin([72]).astype(int)
mct_df['sector_mfg'] = mct_df['sector_id'].isin([31, 32, 33]).astype(int)
mct_df['sector_edu_serv'] = mct_df['sector_id'].isin([61]).astype(int)
mct_df['sector_oth_serv'] = mct_df['sector_id'].isin([81]).astype(int)
mct_df['sector_constr'] = mct_df['sector_id'].isin([23]).astype(int)
mct_df['sector_gov'] = mct_df['sector_id'].isin([92]).astype(int)
mct_df['sector_fire'] = mct_df['sector_id'].isin([52, 53]).astype(int)
mct_df['sector_whlsale'] = mct_df['sector_id'].isin([42]).astype(int)
mct_df['sector_admin'] = mct_df['sector_id'].isin([56]).astype(int)
mct_df['sector_transport'] = mct_df['sector_id'].isin([48]).astype(int)
mct_df['sector_arts'] = mct_df['sector_id'].isin([71]).astype(int)
mct_df['sector_util'] = mct_df['sector_id'].isin([22]).astype(int)
mct_df['no_higher_ed'] = (mct_df['edu'] < 21).astype(int)
mct_df['age_under_45'] = (mct_df['age'] < 45).astype(int)
mct_df['hh_inc_under_25k'] = ((mct_df['income'] < 25000) & (mct_df['income'] > 10)).astype(int)
mct_df['hh_inc_25_to_75k'] = (
    (mct_df['income'] >= 25000) & (mct_df['income'] < 75000)).astype(int)
mct_df['hh_inc_75_to_200k'] = (
    (mct_df['income'] >= 75000) & (mct_df['income'] < 200000)).astype(int)

In [42]:
dm = patsy.dmatrix(m.model_expression, data=mct_df, return_type='dataframe')

In [44]:
probs = mnl.mnl_simulate(data = dm, coeff = m.fitted_parameters, 
                                 numalts = 10, returnprobs=True)

In [45]:
choice_positions = mnl.mnl_simulate(data = dm, coeff = m.fitted_parameters, 
                                            numalts = 10, returnprobs=False)

In [48]:
ids = mct_df.reset_index()[mct.alternative_id_col].tolist()

In [49]:
N = len(choice_positions)
J = len(ids) // N
ids_by_obs = np.reshape(ids, (N,J))
choices = [ids_by_obs[i][choice_positions[i]] for i in range(N)]

In [50]:
mct_df['probability'] = np.reshape(probs, (probs.size, 1))

In [55]:
obs['choice'] = choices

In [64]:
persons = orca.get_table('persons').to_frame()

In [65]:
merged = pd.merge(persons, obs[['choice']], left_index=True, right_index=True, how='left').rename(columns={'choice': 'job_id'})

In [70]:
merged.to_csv('/home/max/projects/ual_model_workspace/fall-2018-models/data/persons_w_jobs_2018_10_16.csv')