In [1]:
import os 
os.chdir('../')

In [2]:
import orca
from collections import OrderedDict
from urbansim_templates import modelmanager as mm
from urbansim_templates.models import LargeMultinomialLogitStep
from urbansim_templates.utils import get_data, get_df

from choicemodels.tools import MergedChoiceTable

from urbansim.models.util import columns_in_formula, apply_filter_query

In [3]:
region_code = '06197001'
orca.add_injectable('running_calibration_routine', False)
orca.add_injectable('local_simulation', True)
orca.add_injectable('initial_run', False)
orca.add_injectable('region_code', region_code)
orca.add_injectable('base_year', 2010)
orca.add_injectable('forecast_year', 2011)
orca.add_injectable('calibrated', True)
orca.add_injectable('calibrated_folder', 'custom')
orca.add_injectable('multi_level_lcms', True)
orca.add_injectable('segmented_lcms', True)
orca.add_injectable('capacity_boost', 1)
orca.add_injectable('all_local', True)
orca.add_injectable('skim_source', 'beam')

In [4]:
import datasources
import variables
import models

importing datasources
importing datasources for region 06197001
custom_mpo_06197001_model_data.h5
Checking if custom_settings.yaml file exists
Checking if custom output_parameters.yaml file exists


  hct = hct.append(forecast_hct.reset_index())
  ect = ect.append(forecast_ect.reset_index())


Output path exists!
importing variables for region 06197001
importing models for region 06197001
Checking if value configs exist
Checking if rent configs exist


In [5]:
configs_folder = 'configs/calibrated_configs/custom/06197001'
mm.initialize(configs_folder)
orca.run(orca.get_injectable('pre_processing_steps'))

Registering model step 'hlcm_county_own_1p_54less_pf'
Registering model step 'rdplcm_06081_blocks_pf'
Registering model step 'hlcm_06013_blocks_rent_1p_54less_pf'
Registering model step 'elcm_06097_blocks_1_pf'
Registering model step 'hlcm_06001_blocks_pf'
Registering model step 'mortality'
Registering model step 'hlcm_06075_blocks_own_2p_54less_pf'
Registering model step 'elcm_06001_blocks_2_pf'
Registering model step 'elcm_06041_blocks_3_pf'
Registering model step 'hlcm_06081_blocks_own_1p_54less_pf'
Registering model step 'elcm_06075_blocks_0_pf'
Registering model step 'hlcm_06013_blocks_pf'
Registering model step 'rdplcm_06013_blocks_sf_pf'
Registering model step 'hlcm_06055_blocks_own_1p_54less_pf'
Registering model step 'hlcm_06013_blocks_own_2p_54less_pf'
Registering model step 'hlcm_06085_blocks_own_2p_54less_pf'
Registering model step 'elcm_06013_blocks_4_pf'
Registering model step 'hlcm_06085_blocks_rent_2p_54less_pf'
Registering model step 'hlcm_06097_blocks_own_1p_55plus_pf

. 10% . 20% . 30% . 40% . 50% . 60% . 70% . 80% . 90% . 100%
Precomputing network for distance 1000.
Network precompute starting.
Network precompute done.


  coords = blocks.local.groupby('block_group_id').mean().reset_index()
  coords = blocks.local.groupby('block_group_id').mean().reset_index()


Time to execute step 'build_networks': 4.02 s
Running step 'generate_outputs'
Generating outputs for (year 2010, forecast year 2011)...
Time to execute step 'generate_outputs': 0.00 s
Running step 'update_travel_data'
Time to execute step 'update_travel_data': 1.93 s
Total time to execute iteration 1 with iteration value None: 5.96 s


## Estimation

### Persons Table

In [None]:
@orca.column('persons')
def very_high_income(persons):
    return (persons.earning >= 500000).astype(int)

In [None]:
# Orca Table 
persons = orca.get_table('persons').to_frame()

In [None]:
sample_size = 10000

workers = persons[persons.work_zone_id > 0]
workers = workers.sample(sample_size)

workers = workers[['age', 'home_taz','work_zone_id', 
                   'taz_pct_no_higher_ed', 
                   'taz_pct_hh_inc_under_25k', 'earning', 'no_higher_ed', 'very_high_income']]

workers['zone_id'] = workers.work_zone_id.astype(str)

### Travel Data

In [None]:
from itertools import product 

In [None]:
from itertools import product 

def add_missing_combinations(df):
    # Get the unique values from each index level
    index_values = [df.index.get_level_values(level).unique() for level in range(df.index.nlevels)]

    # Generate all possible pair combinations
    index_pairs = list(product(*index_values))

    # Reindex the DataFrame with all possible combinations
    new_df = df.reindex(index=index_pairs)

    return new_df

@orca.step('update_travel_data')
def update_travel_data(travel_data):
    t = travel_data.local
    t = add_missing_combinations(t)
    orca.add_table('travel_data', t)
    

In [None]:
orca.run(['update_travel_data'])

In [None]:
travel_data = orca.get_table('travel_data').to_frame(columns = ['tour_sov_in_vehicle_time', 
                                                                'logsum', 
                                                                'tour_dist'])

In [None]:
travel_data.index = travel_data.index.set_names(['home_taz', 'zone_id'])

In [None]:
travel_data['dist_0_5'] = travel_data['tour_dist'].clip(0,5)
travel_data['dist_1_2'] = (travel_data['tour_dist']-1).clip(0,1)
travel_data['dist_2_5'] = (travel_data['tour_dist']-2).clip(0,3)
travel_data['dist_5_15'] = (travel_data['tour_dist']-5).clip(0,10)
travel_data['dist_15plus'] = (travel_data['tour_dist']-15).clip(0)

### Zones Table

In [None]:
accesibility_vars = ['jobs_1_sum_20_min_sov',
                     'jobs_2_sum_20_min_sov',
                     'jobs_3_sum_20_min_sov',
                     'jobs_5_sum_20_min_sov',
                     'jobs_4_sum_20_min_sov',
                     'jobs_0_sum_20_min_sov', 
                     'pct_hh_inc_under_25k',
                     'pct_hh_inc_25_to_75k',
                     'pct_hh_inc_75_to_200k',
                     'pct_no_higher_ed',
                     'pct_sector_tech',
                     'pct_sector_retail',
                     'pct_sector_healthcare', 'density_jobs', 'density_jobs_ave_5_min_sov'
                    ]

In [None]:
zones = orca.get_table('zones').to_frame(columns = accesibility_vars)

In [None]:
zones

### Merge Choice Table (MCT)

In [None]:
m = LargeMultinomialLogitStep()
m.name = 'WLCM'

In [None]:
table = MergedChoiceTable(observations = workers, 
                          alternatives = zones, 
                          chosen_alternatives = 'zone_id', 
                          sample_size = 100, 
                          interaction_terms = travel_data,
                         )


m.mergedchoicetable = table
# table.to_frame().shape

In [None]:
m.model_expression = ('logsum + '
                      'logsum:no_higher_ed +'
                      'np.log1p(jobs_1_sum_20_min_sov)+ np.log1p(jobs_2_sum_20_min_sov) +'
                      'np.log1p(jobs_3_sum_20_min_sov)+ np.log1p(jobs_4_sum_20_min_sov) +'
                      'np.log1p(jobs_5_sum_20_min_sov) + pct_sector_tech + pct_sector_retail +'
                      'np.log1p(density_jobs) +'
                      'dist_0_5 + dist_5_15 + dist_15plus '
                      ' - 1'
                     )

m.fit(table)

In [None]:
m.name = 'WLCM_v1'
mm.register(m)

### Running Block Level Simulation

In [6]:
m = mm.get_step('WLCM_v1')

In [7]:
m.model_expression

'logsum + logsum:no_higher_ed +np.log1p(zones_jobs_1_sum_20_min_sov)+ np.log1p(zones_jobs_2_sum_20_min_sov) +np.log1p(zones_jobs_3_sum_20_min_sov)+ np.log1p(zones_jobs_4_sum_20_min_sov) +np.log1p(zones_jobs_5_sum_20_min_sov) + pct_sector_tech + pct_sector_retail +np.log1p(density_jobs) +dist_0_5 + dist_5_15 + dist_15plus  - 1'

In [8]:
mct_intx_ops = OrderedDict({
    'extra_alts_cols': ['zone_id'],
    'extra_obs_cols':['home_taz'],
    'successive_merges': [{
        'right_table': 'travel_data',
        'right_cols':['logsum', 'tour_sov_in_vehicle_time', 'tour_dist', 
                      'dist_0_5','dist_5_15', 'dist_15plus'],
        'left_on': ['home_taz', 'zone_id'],
        'right_index': True,
        'how': 'left'
    }],
#     'aggregations': None,
#     'rename_cols': None,
#     'sequential_eval_ops': None
})

In [9]:
m.out_choosers = 'persons'
m.out_column = 'work_location'
m.out_chooser_filters = ['worker == 1', 'work_at_home == 0']
m.tags = ['juan']
m.name = 'wlcm'
m.alt_sample_size = 100
m.alternatives = 'blocks'
m.alt_capacity = 'employment_capacity'
m.constrained_choices = True
m.mct_intx_ops = mct_intx_ops

In [None]:
# orca.get_table('blocks').columns

In [None]:
m.run(chooser_batch_size = 50000)

Calculating sum of jobs_1 within min 20 based on sov from skim
Calculating sum of jobs_2 within min 20 based on sov from skim
Calculating sum of jobs_5 within min 20 based on sov from skim
Calculating sum of jobs_3 within min 20 based on sov from skim
Calculating sum of jobs_4 within min 20 based on sov from skim


  valid_choices = pd.Series()


Replacing MCT None's and NaN's with 0
Iteration 1: 50000 of 3013484 valid choices
Replacing MCT None's and NaN's with 0
Iteration 2: 100000 of 3013484 valid choices
Replacing MCT None's and NaN's with 0
Iteration 3: 150000 of 3013484 valid choices
Replacing MCT None's and NaN's with 0
Iteration 4: 200000 of 3013484 valid choices
Replacing MCT None's and NaN's with 0
Iteration 5: 250000 of 3013484 valid choices


In [10]:
mm.register(m)

Saving 'wlcm.yaml': /Users/juandavidcaicedocastro/Dropbox/01_berkeley/22_UrbanSim/01_projects/MLCM/02_github/DEMOS_URBANSIM/demos_urbansim/configs/calibrated_configs/custom/06197001
Registering model step 'wlcm'


In [None]:
orca.list_steps()

In [None]:
orca.run(['work_location'])

In [None]:
work_block_id = orca.get_table('persons').to_frame(columns = ['work_block_id'])
work_block_id