In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.neighbors import BallTree
import geopandas as gpd
from shapely.geometry import Point, LineString
from pyproj import Proj, transform
from matplotlib import pyplot as plt
%matplotlib inline

In [2]:
from urbansim_templates import modelmanager as mm
from urbansim_templates.models import LargeMultinomialLogitStep, SegmentedLargeMultinomialLogitStep
from urbansim.utils import misc
from urbansim_defaults.utils import yaml_to_class
from urbansim.models.util import apply_filter_query
import orca
import os; os.chdir('../')

In [3]:
import warnings;warnings.simplefilter('ignore')
from baus import datasources, models, variables, ual
from choicemodels import MultinomialLogit
from choicemodels.tools import MergedChoiceTable

In [4]:
orca.add_injectable("year", 2015)

In [5]:
orca.run([
    "generate_skims_vars",
    "skims_aggregations_drive",
    "neighborhood_vars",
    "price_vars"
])

Running step 'generate_skims_vars'
Calculating number of households for zones
Calculating mean_persons of households for zones
Calculating median_persons of households for zones
Calculating std_persons of households for zones
Calculating sum_persons of households for zones
Calculating mean_income of households for zones
Calculating median_income of households for zones
Calculating std_income of households for zones
Calculating sum_income of households for zones
Calculating number of jobs for zones
Calculating mean_sector_id of jobs for zones
Calculating median_sector_id of jobs for zones
Calculating std_sector_id of jobs for zones
Calculating number of buildings for zones
Calculating mean_residential_units of buildings for zones
Calculating median_residential_units of buildings for zones
Calculating std_residential_units of buildings for zones
Calculating sum_residential_units of buildings for zones
Time to execute step 'generate_skims_vars': 50.02 s
Running step 'skims_aggregations_dr

In [6]:
orca.run(["wlcm_simulate"])

Running step 'wlcm_simulate'
Registering model step 'wlcm'
Describe of development projects
          parcel_id  residential_units  residential_sqft  sqft_per_unit  \
count  2.431000e+03        2431.000000            2431.0     741.000000   
mean   1.051065e+06         131.194981               0.0     904.892038   
std    5.938067e+05         368.788354               0.0     434.518082   
min    1.900000e+01           0.000000               0.0       0.000000   
25%    5.758060e+05           0.000000               0.0     853.000000   
50%    1.113052e+06          24.000000               0.0     925.000000   
75%    1.414510e+06         127.000000               0.0    1200.000000   
max    2.052766e+06        9400.000000               0.0    2500.000000   

       non_residential_sqft  building_sqft      stories   year_built  \
count          2.431000e+03   2.431000e+03  2431.000000  2431.000000   
mean           6.248265e+04   2.277823e+05     2.477170  2019.361580   
std            2

In [7]:
mm.initialize()

Registering model step 'wlcm'


In [8]:
config = misc.config("hlcm_owner.yaml")

In [9]:
config = yaml_to_class(config).from_yaml(str_or_buffer=config)

In [10]:
config.choosers_fit_filters

"(tenure == 'own' & income > 0)"

In [11]:
config.alts_fit_filters

In [34]:
config.choice_column

In [12]:
config.sample_size

50

In [13]:
config.default_model_expr

'np.log1p(total_jobs_gen_tt_CAR_45) + np.log1p(sum_income_gen_tt_CAR_45) + juris_ave_income + jobs_1500 + residential_units_1500 + np.log1p(unit_residential_price) + np.log1p(sqft_per_unit) + sfdu'

In [14]:
chooser_filters = ['tenure == "own"', 'income > 0']
alt_sample_size = 50

### Create Merged Choice Table

- Have to merge persons-level attributes (i.e. commutes) AFTER creating the merged choice table on households

In [15]:
obs = orca.get_table('households').to_frame().sample(100000)

Calculating number of households for parcels
Calculating mean_persons of households for parcels
Calculating median_persons of households for parcels
Calculating std_persons of households for parcels
Calculating sum_persons of households for parcels
Calculating mean_income of households for parcels
Calculating median_income of households for parcels
Calculating std_income of households for parcels
Calculating sum_income of households for parcels
Calculating number of jobs for parcels
Calculating mean_sector_id of jobs for parcels
Calculating median_sector_id of jobs for parcels
Calculating std_sector_id of jobs for parcels
Calculating number of buildings for parcels
Calculating mean_residential_units of buildings for parcels
Calculating median_residential_units of buildings for parcels
Calculating std_residential_units of buildings for parcels
Calculating sum_residential_units of buildings for parcels


In [16]:
obs = apply_filter_query(obs, filters=chooser_filters)
obs = obs[['unit_id', 'tenure', 'income']]

In [17]:
alts = orca.merge_tables(
    'residential_units', ['residential_units'] + orca.get_injectable('aggregations') + ['buildings', 'zones'] )

Calculating number of households for zones
Calculating mean_persons of households for zones
Calculating median_persons of households for zones
Calculating std_persons of households for zones
Calculating sum_persons of households for zones
Calculating mean_income of households for zones
Calculating median_income of households for zones
Calculating std_income of households for zones
Calculating sum_income of households for zones
Calculating number of jobs for zones
Calculating mean_sector_id of jobs for zones
Calculating median_sector_id of jobs for zones
Calculating std_sector_id of jobs for zones
Calculating number of buildings for zones
Calculating mean_residential_units of buildings for zones
Calculating median_residential_units of buildings for zones
Calculating std_residential_units of buildings for zones
Calculating sum_residential_units of buildings for zones


In [18]:
alts.rename(columns={'zone_id': 'zone_id_home'}, inplace=True)

In [19]:
alts = alts[[
    'total_jobs_gen_tt_CAR_45', 'sum_income_gen_tt_CAR_45', 'juris_ave_income', 'jobs_1500',
    'residential_units_1500', 'unit_residential_price', 'sqft_per_unit', 'sfdu', 'zone_id_home'
]]

In [20]:
interaction_terms = orca.get_table('beam_skims_imputed').to_frame().rename_axis(
            ['zone_id_home', 'zone_id_work'])[['gen_tt_CAR', 'gen_tt_WALK_TRANSIT']]

interaction_terms = interaction_terms.reset_index().set_index(['zone_id_work', 'zone_id_home'])

In [21]:
mct = MergedChoiceTable(
    obs, alts,
    chosen_alternatives='unit_id',
    sample_size=alt_sample_size,
                       )

In [22]:
mct = mct.to_frame()
mct.reset_index(inplace=True)
mct.head()

Unnamed: 0,household_id,unit_id,tenure,income,total_jobs_gen_tt_CAR_45,sum_income_gen_tt_CAR_45,juris_ave_income,jobs_1500,residential_units_1500,unit_residential_price,sqft_per_unit,sfdu,zone_id_home,chosen
0,2679662,255690,own,285000.0,5408.0,1167622000.0,11.115443,11.895347,12.127992,0.0,1655.200203,11.723243,1403.0,1
1,2679662,1180216,own,285000.0,1176928.0,129053800000.0,11.373675,7.998684,8.420469,0.0,1200.0,6.61604,65.0,0
2,2679662,341292,own,285000.0,1365153.0,151072300000.0,10.933125,8.259623,8.398545,0.0,590.4,6.787541,975.0,0
3,2679662,843820,own,285000.0,1140820.0,124107100000.0,11.243751,6.172249,5.592457,0.0,1297.0,5.480614,1083.0,0
4,2679662,2712088,own,285000.0,1285963.0,139486700000.0,11.373675,10.138785,10.139724,0.0,514.285714,4.723692,32.0,0


In [23]:
persons = orca.get_table('persons').to_frame(columns=['household_id', 'job_id'])
jobs = orca.get_table('jobs').to_frame(columns=['zone_id_work'])
persons_jobs = pd.merge(persons, jobs, left_on='job_id', right_index=True, how='left')

In [24]:
mct_persons = pd.merge(mct, persons_jobs, on='household_id', how='left')
mct_persons = mct_persons.join(interaction_terms, how='left', on=interaction_terms.index.names)

In [25]:
person_aggs = mct_persons.groupby(['household_id', 'unit_id']).agg(
    max_commute_auto = pd.NamedAgg('gen_tt_CAR', 'max'),
    max_commute_transit = pd.NamedAgg('gen_tt_WALK_TRANSIT', 'max')
).reset_index()

In [26]:
mct = pd.merge(mct, person_aggs, on=['household_id', 'unit_id'], how='left')

In [27]:
mct.set_index('household_id', inplace=True)

In [28]:
mct.index.name = 'obs_id'

In [29]:
mct['segment'] = ~pd.isnull(mct['max_commute_auto'])

In [35]:
default = LargeMultinomialLogitStep(
    choosers=obs,
    chooser_filters=chooser_filters,
    choice_column='unit_id',
    constrained_choices=True,
    alternatives=alts,
    alt_sample_size=alt_sample_size,
)

In [36]:
default.model_expression = config.default_model_expr

In [37]:
m = SegmentedLargeMultinomialLogitStep(
    defaults=default, 
    segmentation_column='segment'
)

In [39]:
mct['segment']

obs_id
2679662    False
2679662    False
2679662    False
2679662    False
2679662    False
           ...  
565        False
565        False
565        False
565        False
565        False
Name: segment, Length: 2857100, dtype: bool

In [38]:
m.fit_all(mct=mct)

KeyError: 'segment'