In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.neighbors import BallTree
import geopandas as gpd
from shapely.geometry import Point, LineString
from pyproj import Proj, transform
from matplotlib import pyplot as plt
%matplotlib inline

In [2]:
from urbansim_templates import modelmanager as mm
from urbansim_templates.models import LargeMultinomialLogitStep, SegmentedLargeMultinomialLogitStep
from urbansim.utils import misc
from urbansim_defaults.utils import yaml_to_class
from urbansim.models.util import apply_filter_query
import orca
import os; os.chdir('../')

In [3]:
import warnings;warnings.simplefilter('ignore')
from baus import datasources, models, variables, ual
from choicemodels import MultinomialLogit
from choicemodels.tools import MergedChoiceTable

In [4]:
orca.add_injectable("year", 2015)

In [5]:
orca.run([
    "generate_skims_vars",
    "skims_aggregations_drive",
    "neighborhood_vars",
    "price_vars",
    "rsh_simulate",
    "wlcm_simulate"
])

Running step 'generate_skims_vars'
Calculating number of households for zones
Calculating mean_persons of households for zones
Calculating median_persons of households for zones
Calculating std_persons of households for zones
Calculating sum_persons of households for zones
Calculating mean_income of households for zones
Calculating median_income of households for zones
Calculating std_income of households for zones
Calculating sum_income of households for zones
Calculating number of jobs for zones
Calculating mean_sector_id of jobs for zones
Calculating median_sector_id of jobs for zones
Calculating std_sector_id of jobs for zones
Calculating number of buildings for zones
Calculating mean_residential_units of buildings for zones
Calculating median_residential_units of buildings for zones
Calculating std_residential_units of buildings for zones
Calculating sum_residential_units of buildings for zones
Time to execute step 'generate_skims_vars': 18.88 s
Running step 'skims_aggregations_dr

In [115]:
orca.run(["rsh_simulate"])

Running step 'rsh_simulate'
count    2.201072e+06
mean     4.210859e+02
std      2.039033e+02
min     -4.318688e+03
25%      2.760981e+02
50%      3.990236e+02
75%      5.359256e+02
max      1.171712e+03
dtype: float64
Clipping produces
 count    2.778898e+06
mean     3.825435e+02
std      1.925757e+02
min      2.000000e+02
25%      2.000000e+02
50%      3.410230e+02
75%      4.925668e+02
max      1.171712e+03
Name: unit_residential_price, dtype: float64
Time to execute step 'rsh_simulate': 5.12 s
Total time to execute iteration 1 with iteration value None: 5.12 s


In [7]:
mm.initialize()

Registering model step 'wlcm'


In [8]:
config = misc.config("hlcm_owner.yaml")

In [9]:
config = yaml_to_class(config).from_yaml(str_or_buffer=config)

In [10]:
config.choosers_fit_filters

"(tenure == 'own' & income > 0)"

In [11]:
config.alts_fit_filters

In [12]:
config.choice_column

In [13]:
config.sample_size

50

In [14]:
config.default_model_expr

'np.log1p(total_jobs_gen_tt_CAR_45) + np.log1p(sum_income_gen_tt_CAR_45) + juris_ave_income + jobs_1500 + residential_units_1500 + np.log1p(unit_residential_price) + np.log1p(sqft_per_unit) + sfdu'

In [45]:
chooser_filters = ['tenure == "own"', 'income > 0', 'unit_id > 0']
alt_sample_size = 50

### Create Merged Choice Table

- Have to merge persons-level attributes (i.e. commutes) AFTER creating the merged choice table on households

In [116]:
obs = orca.get_table('households').to_frame().sample(100000)
obs = apply_filter_query(obs, filters=chooser_filters)
obs = obs[['unit_id', 'tenure', 'income']]
obs.index.name = 'obs_id'

Calculating number of households for parcels
Calculating mean_persons of households for parcels
Calculating median_persons of households for parcels
Calculating std_persons of households for parcels
Calculating sum_persons of households for parcels
Calculating mean_income of households for parcels
Calculating median_income of households for parcels
Calculating std_income of households for parcels
Calculating sum_income of households for parcels
Calculating number of jobs for parcels
Calculating mean_sector_id of jobs for parcels
Calculating median_sector_id of jobs for parcels
Calculating std_sector_id of jobs for parcels
Calculating number of buildings for parcels
Calculating mean_residential_units of buildings for parcels
Calculating median_residential_units of buildings for parcels
Calculating std_residential_units of buildings for parcels
Calculating sum_residential_units of buildings for parcels


In [117]:
alts = orca.merge_tables(
    'residential_units', ['residential_units'] + orca.get_injectable('aggregations') + ['buildings', 'zones'] )

alts.rename(columns={'zone_id': 'zone_id_home'}, inplace=True)

alts = alts[[
    'total_jobs_gen_tt_CAR_45', 'sum_income_gen_tt_CAR_45', 'juris_ave_income', 'jobs_1500',
    'residential_units_1500', 'unit_residential_price', 'sqft_per_unit', 'sfdu', 'zone_id_home'
]]

Calculating number of households for zones
Calculating mean_persons of households for zones
Calculating median_persons of households for zones
Calculating std_persons of households for zones
Calculating sum_persons of households for zones
Calculating mean_income of households for zones
Calculating median_income of households for zones
Calculating std_income of households for zones
Calculating sum_income of households for zones
Calculating number of jobs for zones
Calculating mean_sector_id of jobs for zones
Calculating median_sector_id of jobs for zones
Calculating std_sector_id of jobs for zones
Calculating number of buildings for zones
Calculating mean_residential_units of buildings for zones
Calculating median_residential_units of buildings for zones
Calculating std_residential_units of buildings for zones
Calculating sum_residential_units of buildings for zones


In [184]:
interaction_terms = orca.get_table('beam_skims_imputed').to_frame().rename_axis(
            ['zone_id_home', 'zone_id_work'])[['dist', 'gen_tt_CAR', 'gen_tt_WALK_TRANSIT']]

interaction_terms = interaction_terms.reset_index().set_index(['zone_id_work', 'zone_id_home'])

In [201]:
mct = MergedChoiceTable(
    obs, alts,
    chosen_alternatives='unit_id',
    sample_size=alt_sample_size)

In [202]:
mct = MergedChoiceTable(
    obs, alts,
    chosen_alternatives='unit_id',
    sample_size=alt_sample_size,
                       )
mct_df = mct.to_frame()
mct_df.reset_index(inplace=True)

In [203]:
persons = orca.get_table('persons').to_frame(columns=['household_id', 'job_id'])
jobs = orca.get_table('jobs').to_frame(columns=['zone_id_work'])
persons_jobs = pd.merge(persons, jobs, left_on='job_id', right_index=True, how='left')

mct_persons = pd.merge(mct_df, persons_jobs, left_on='obs_id', right_on='household_id', how='left')
mct_persons = mct_persons.join(interaction_terms, how='left', on=interaction_terms.index.names)

person_aggs = mct_persons.groupby(['obs_id', 'unit_id']).agg(
    max_hh_commute_auto = pd.NamedAgg('gen_tt_CAR', 'max'),
    max_hh_commute_transit = pd.NamedAgg('gen_tt_WALK_TRANSIT', 'max'),
    max_hh_commute_dist = pd.NamedAgg('dist', 'max')
).reset_index()

In [204]:
mct_df = pd.merge(mct_df, person_aggs, on=['obs_id', 'unit_id'], how='left')
mct_df['segment'] = 'no commuters'
mct_df.loc[~pd.isnull(mct_df['max_commute_auto']), 'segment'] = '1+ commuter'
mct_df.set_index(['obs_id', 'unit_id'], inplace=True)

In [205]:
mct = MergedChoiceTable.from_df(mct_df)
mct_commuters = MergedChoiceTable.from_df(mct_df[mct_df['segment'] == '1+ commuter'])
mct_no_commuters = MergedChoiceTable.from_df(mct_df[mct_df['segment'] == 'no commuter'])

In [206]:
default = LargeMultinomialLogitStep(
    choosers=pd.DataFrame(),
    chooser_filters=chooser_filters,
    choice_column='unit_id',
    constrained_choices=True,
    alternatives=pd.DataFrame(),
    alt_sample_size=alt_sample_size,
)

In [207]:
default.model_expression = """np.log1p(total_jobs_gen_tt_CAR_45) + np.log1p(sum_income_gen_tt_CAR_45) + 
    + juris_ave_income + jobs_1500 + residential_units_1500 + np.log1p(unit_residential_price)"""

In [208]:
m = SegmentedLargeMultinomialLogitStep(
    defaults=default, 
    segmentation_column='segment'
)

In [209]:
m.build_submodels(mct=mct)

Building submodels for 2 categories: ['1+ commuter' 'no commuters']


In [210]:
commuter_hlcm = m.submodels['1+ commuter']

In [218]:
commuter_hlcm.model_expression = """np.log1p(total_jobs_gen_tt_CAR_45) + np.log1p(sum_income_gen_tt_CAR_45) + 
    juris_ave_income + jobs_1500 + residential_units_1500 + np.log1p(unit_residential_price) + 
    max_commute_auto * np.log1p(max_commute_dist)"""

In [219]:
commuter_hlcm.fit(mct_commuters)

                  CHOICEMODELS ESTIMATION RESULTS                   
Dep. Var.:                chosen   No. Observations:          35,510
Model:         Multinomial Logit   Df Residuals:              35,500
Method:       Maximum Likelihood   Df Model:                      10
Date:                 2020-04-11   Pseudo R-squ.:              0.013
Time:                      01:04   Pseudo R-bar-squ.:          0.013
AIC:                 274,296.185   Log-Likelihood:      -137,138.093
BIC:                 274,380.961   LL-Null:             -138,915.937
                                                 coef   std err         z     P>|z|   Conf. Int.
------------------------------------------------------------------------------------------------
Intercept                                      0.0000     0.879     0.000     1.000             
np.log1p(total_jobs_gen_tt_CAR_45)             0.1348     0.064     2.117     0.034             
np.log1p(sum_income_gen_tt_CAR_45)            -0.1161     0.