In [1]:
import pandas as pd
import numpy as np
import pandana as pdna

In [2]:
from urbansim_templates import modelmanager as mm
from urbansim_templates.models import LargeMultinomialLogitStep

import orca
import os; os.chdir('../')
import warnings;warnings.simplefilter('ignore')

In [3]:
%matplotlib inline

### Required runtime Orca injectables

In [4]:
data_mode = 'csv'
local_data_dir = '/home/data/spring_2019/base/'
csv_fnames = {
    'parcels': 'parcel_attr.csv',
    'buildings': 'buildings_v2.csv',
    'jobs': 'jobs_v2.csv',
    'establishments': 'establishments_v2.csv',
    'households': 'households_v2.csv',
    'persons': 'persons_v3.csv',
    'rentals': 'MTC_craigslist_listings_7-10-18.csv',
    'units': 'units_v2.csv',
    'skims': 'skims_110118.csv',
    'drive_nodes': 'bay_area_tertiary_strongly_nodes.csv',
    'drive_edges': 'bay_area_tertiary_strongly_edges.csv',
    'drive_access_vars': 'drive_net_vars.csv',
    'walk_nodes': 'bayarea_walk_nodes.csv',
    'walk_edges': 'bayarea_walk_edges.csv',
    'walk_access_vars': 'walk_net_vars.csv',
}

In [5]:
orca.add_injectable('data_mode', data_mode)
orca.add_injectable('csv_fnames', csv_fnames)
orca.add_injectable('store', None)
orca.add_injectable('s3_input_data_url', None)
orca.add_injectable('local_data_dir', local_data_dir)

### Load Data

In [6]:
from scripts import datasources, models, variables

Registering model step 'auto_ownership'
Registering model step 'TOD_choice'
Registering model step 'primary_mode_choice'
Registering model step 'ELCM_finance'
Registering model step 'WLCM'


Accessibility Variables

In [7]:
orca.run(['initialize_network_small','initialize_network_walk'])

Running step 'initialize_network_small'
Time to execute step 'initialize_network_small': 0.00 s
Running step 'initialize_network_walk'
Time to execute step 'initialize_network_walk': 0.00 s
Total time to execute iteration 1 with iteration value None: 0.00 s


In [8]:
# if network vars have been previously computed and saved:
walk_net_vars = pd.read_csv(
    local_data_dir + csv_fnames['walk_access_vars'],
    index_col='osmid')
drive_net_vars = pd.read_csv(
    local_data_dir + csv_fnames['drive_access_vars'],
    index_col='osmid')
orca.add_table('nodeswalk', walk_net_vars)
orca.add_table('nodessmall', drive_net_vars)

<orca.orca.DataFrameWrapper at 0x7f0855b84240>

In [10]:
# # else
# orca.run(['network_aggregations_small', 'network_aggregations_walk'])
# orca.get_table('nodeswalk').to_frame().to_csv(
#     local_data_dir + csv_fnames['walk_access_vars'])
# orca.get_table('nodessmall').to_frame().to_csv(
#     local_data_dir + csv_fnames['drive_access_vars'])

### List data tables and columns

In [153]:
for table_name in orca.list_tables():

    print(table_name.upper())
    print(orca.get_table(table_name).to_frame().columns.tolist())
        
    print()

PARCELS
['development_type_id', 'land_value', 'acres', 'county_id', 'zone_id', 'proportion_undevelopable', 'tax_exempt_status', 'apn', 'parcel_id_local', 'geom_id', 'imputation_flag', 'x', 'y', 'shape_area', 'block_id', 'node_id', 'node_id_small', 'node_id_walk', 'average_income_20', 'above_jobs_20', 'above_jobs_40', 'above_jobs_60', 'below_jobs_20', 'below_jobs_40', 'below_jobs_60', 'employment_20', 'employment_40', 'employment_60', 'population_20', 'population_40', 'population_60']

BUILDINGS
['parcel_id', 'development_type_id', 'improvement_value', 'residential_units', 'residential_sqft', 'sqft_per_unit', 'non_residential_sqft', 'building_sqft', 'nonres_rent_per_sqft', 'res_price_per_sqft', 'stories', 'year_built', 'redfin_sale_price', 'redfin_sale_year', 'redfin_home_type', 'costar_property_type', 'costar_rent', 'building_type_id', 'res_sqft_per_unit', 'node_id_small', 'node_id_walk']

JOBS
['building_id', 'sector_id', 'occupation_id', 'node_id_small', 'node_id_walk', 'sector_retai

In [12]:
f = mm.get_step('ELCM_finance')

In [14]:
latest_exp = f.model_expression

In [15]:
latest_exp

'np.log1p(avg_income_500_walk) + nonres_rent_per_sqft*np.log1p(jobs_10000) + np.log1p(jobs_500_walk)*np.log1p(jobs_10000_retail) + np.log1p(jobs_2500_walk_finance) + np.log1p(units_mf_1500_walk) + np.log1p(units_sf_10000) + np.log1p(land_value)*nonres_rent_per_sqft + nonres_rent_per_sqft/np.log1p(sales_vol) + np.log1p(nonres_sqft_2500)/np.log1p(sqft_2500) + np.log1p(pop_10000)'

### Make an ELCM for retail model

In [61]:
m = LargeMultinomialLogitStep()
m.name = 'ELCM_finance'

In [79]:
m.chooser_filters = ['sector_id == 52']

m.choosers = ['establishments']
m.alternatives = ['buildings', 'parcels', 'nodessmall', 'nodeswalk']
m.choice_column = 'building_id'

m.alt_sample_size = 100
m.alt_filters = ['0 < jobs_25000 < 1000000']

In [56]:
m.model_expression = ('np.log1p(avg_income_500_walk) + np.log1p(pop_2500_walk) + nonres_rent_per_sqft + '
                      'np.log1p(jobs_10000) + np.log1p(jobs_500_walk_retail) + '
                      'np.log1p(jobs_1500_walk_fire) + np.log1p(jobs_10000_tech) + '
                      'np.log1p(jobs_1500_walk_serv) + np.log1p(jobs_2500_walk_finance)'
                     )

In [57]:
m.fit()

                  CHOICEMODELS ESTIMATION RESULTS                  
Dep. Var.:                chosen   No. Observations:         15,588
Model:         Multinomial Logit   Df Residuals:             15,578
Method:       Maximum Likelihood   Df Model:                     10
Date:                 2019-05-22   Pseudo R-squ.:             0.459
Time:                      14:58   Pseudo R-bar-squ.:         0.458
AIC:                  77,759.780   Log-Likelihood:      -38,869.890
BIC:                  77,836.323   LL-Null:             -71,785.393
                                      coef   std err         z     P>|z|   Conf. Int.
-------------------------------------------------------------------------------------
Intercept                          -0.0000     0.173    -0.000     1.000             
np.log1p(avg_income_500_walk)      -0.0397     0.004   -11.115     0.000             
np.log1p(pop_2500_walk)            -0.3153     0.011   -28.833     0.000             
nonres_rent_per_sqft      

In [89]:
m.model_expression = ('np.log1p(avg_income_500_walk) + np.log1p(pop_2500_walk) + nonres_rent_per_sqft + '
                      'np.log1p(jobs_500_walk) + np.log1p(jobs_2500_walk_finance)'
                      ' + np.log1p(units_mf_1500_walk) + np.log1p(units_sf_10000)'
                     )

In [90]:
m.fit()

                  CHOICEMODELS ESTIMATION RESULTS                  
Dep. Var.:                chosen   No. Observations:         15,588
Model:         Multinomial Logit   Df Residuals:             15,580
Method:       Maximum Likelihood   Df Model:                      8
Date:                 2019-05-22   Pseudo R-squ.:             0.460
Time:                      15:27   Pseudo R-bar-squ.:         0.460
AIC:                  77,484.754   Log-Likelihood:      -38,734.377
BIC:                  77,545.988   LL-Null:             -71,785.393
                                      coef   std err         z     P>|z|   Conf. Int.
-------------------------------------------------------------------------------------
Intercept                          -0.0000     0.171    -0.000     1.000             
np.log1p(avg_income_500_walk)      -0.0194     0.004    -5.255     0.000             
np.log1p(pop_2500_walk)            -0.3353     0.013   -26.330     0.000             
nonres_rent_per_sqft      

In [105]:
m.model_expression = ('np.log1p(avg_income_500_walk) + np.log1p(pop_2500_walk) + nonres_rent_per_sqft + '
                      'np.log1p(jobs_500_walk)/np.log1p(jobs_10000_retail) + np.log1p(jobs_2500_walk_finance)'
                      ' + np.log1p(units_mf_1500_walk) + np.log1p(units_sf_10000)'
                     )

In [106]:
m.fit()

                  CHOICEMODELS ESTIMATION RESULTS                  
Dep. Var.:                chosen   No. Observations:         15,588
Model:         Multinomial Logit   Df Residuals:             15,579
Method:       Maximum Likelihood   Df Model:                      9
Date:                 2019-05-22   Pseudo R-squ.:             0.461
Time:                      15:37   Pseudo R-bar-squ.:         0.461
AIC:                  77,353.610   Log-Likelihood:      -38,667.805
BIC:                  77,422.498   LL-Null:             -71,785.393
                                                         coef   std err         z     P>|z|   Conf. Int.
--------------------------------------------------------------------------------------------------------
Intercept                                             -0.0000     0.253    -0.000     1.000             
np.log1p(avg_income_500_walk)                         -0.0137     0.004    -3.694     0.000             
np.log1p(pop_2500_walk)             

In [123]:
m.model_expression = ('np.log1p(avg_income_500_walk) + nonres_rent_per_sqft/np.log1p(jobs_10000) + '
                      'np.log1p(jobs_500_walk)/np.log1p(jobs_10000_retail) + np.log1p(jobs_2500_walk_finance)'
                      ' + np.log1p(units_mf_1500_walk) + np.log1p(units_sf_10000)'
                     )

In [124]:
m.fit()

                  CHOICEMODELS ESTIMATION RESULTS                  
Dep. Var.:                chosen   No. Observations:         15,588
Model:         Multinomial Logit   Df Residuals:             15,579
Method:       Maximum Likelihood   Df Model:                      9
Date:                 2019-05-22   Pseudo R-squ.:             0.472
Time:                      15:48   Pseudo R-bar-squ.:         0.472
AIC:                  75,807.301   Log-Likelihood:      -37,894.651
BIC:                  75,876.189   LL-Null:             -71,785.393
                                                         coef   std err         z     P>|z|   Conf. Int.
--------------------------------------------------------------------------------------------------------
Intercept                                             -0.0000     0.247    -0.000     1.000             
np.log1p(avg_income_500_walk)                         -0.0413     0.004   -11.467     0.000             
nonres_rent_per_sqft                

In [129]:
m.model_expression = ('np.log1p(avg_income_500_walk) + nonres_rent_per_sqft/np.log1p(jobs_10000) + '
                      'np.log1p(jobs_500_walk)/np.log1p(jobs_10000_retail) + np.log1p(jobs_2500_walk_finance) + '
                      'np.log1p(units_mf_1500_walk) + np.log1p(units_sf_10000) + '
                      'np.log1p(jobs_10000_serv)'
                     )

In [130]:
m.fit()

                  CHOICEMODELS ESTIMATION RESULTS                  
Dep. Var.:                chosen   No. Observations:         15,588
Model:         Multinomial Logit   Df Residuals:             15,578
Method:       Maximum Likelihood   Df Model:                     10
Date:                 2019-05-23   Pseudo R-squ.:             0.474
Time:                      08:01   Pseudo R-bar-squ.:         0.474
AIC:                  75,530.960   Log-Likelihood:      -37,755.480
BIC:                  75,607.502   LL-Null:             -71,785.393
                                                         coef   std err         z     P>|z|   Conf. Int.
--------------------------------------------------------------------------------------------------------
Intercept                                             -0.0000     0.260    -0.000     1.000             
np.log1p(avg_income_500_walk)                         -0.0453     0.004   -12.565     0.000             
nonres_rent_per_sqft                

In [131]:
m.model_expression = ('np.log1p(avg_income_500_walk) + nonres_rent_per_sqft/np.log1p(jobs_10000) + '
                      'np.log1p(jobs_500_walk)/np.log1p(jobs_10000_retail) + np.log1p(jobs_2500_walk_finance) + '
                      'np.log1p(units_mf_1500_walk) + np.log1p(units_sf_10000) + '
                      'np.log1p(jobs_10000_serv) + np.log1p(land_value)'
                     )

In [133]:
m.fit()

                  CHOICEMODELS ESTIMATION RESULTS                  
Dep. Var.:                chosen   No. Observations:         15,588
Model:         Multinomial Logit   Df Residuals:             15,577
Method:       Maximum Likelihood   Df Model:                     11
Date:                 2019-05-23   Pseudo R-squ.:             0.484
Time:                      08:03   Pseudo R-bar-squ.:         0.484
AIC:                  74,054.631   Log-Likelihood:      -37,016.315
BIC:                  74,138.828   LL-Null:             -71,785.393
                                                         coef   std err         z     P>|z|   Conf. Int.
--------------------------------------------------------------------------------------------------------
Intercept                                             -0.0000     0.265    -0.000     1.000             
np.log1p(avg_income_500_walk)                         -0.0337     0.004    -9.247     0.000             
nonres_rent_per_sqft                

In [147]:
m.model_expression = ('np.log1p(avg_income_500_walk) + nonres_rent_per_sqft/np.log1p(jobs_10000) + '
                      'np.log1p(jobs_500_walk)/np.log1p(jobs_10000_retail) + np.log1p(jobs_2500_walk_finance) + '
                      'np.log1p(units_mf_1500_walk) + np.log1p(units_sf_10000) + '
                      'np.log1p(jobs_10000_serv) + np.log1p(land_value) + nonres_rent_per_sqft/np.log1p(sales_vol)'
                     )

In [148]:
m.fit()

                  CHOICEMODELS ESTIMATION RESULTS                  
Dep. Var.:                chosen   No. Observations:         15,588
Model:         Multinomial Logit   Df Residuals:             15,576
Method:       Maximum Likelihood   Df Model:                     12
Date:                 2019-05-23   Pseudo R-squ.:             0.485
Time:                      08:37   Pseudo R-bar-squ.:         0.484
AIC:                  74,027.801   Log-Likelihood:      -37,001.900
BIC:                  74,119.652   LL-Null:             -71,785.393
                                                         coef   std err         z     P>|z|   Conf. Int.
--------------------------------------------------------------------------------------------------------
Intercept                                             -0.0000     0.259    -0.000     1.000             
np.log1p(avg_income_500_walk)                         -0.0347     0.004    -9.436     0.000             
nonres_rent_per_sqft                

In [173]:
m.model_expression = ('np.log1p(avg_income_500_walk) + nonres_rent_per_sqft*np.log1p(jobs_10000) + '
                      'np.log1p(jobs_500_walk)*np.log1p(jobs_10000_retail) + np.log1p(jobs_2500_walk_finance) + '
                      'np.log1p(units_mf_1500_walk) + np.log1p(units_sf_10000) + '
                      'np.log1p(land_value)*nonres_rent_per_sqft + '
                      'nonres_rent_per_sqft/np.log1p(sales_vol) + '
                      'np.log1p(nonres_sqft_2500)/np.log1p(sqft_2500) + np.log1p(pop_10000)'
                     )

In [13]:
f.model_expression

'np.log1p(avg_income_500_walk) + nonres_rent_per_sqft*np.log1p(jobs_10000) + np.log1p(jobs_500_walk)*np.log1p(jobs_10000_retail) + np.log1p(jobs_2500_walk_finance) + np.log1p(units_mf_1500_walk) + np.log1p(units_sf_10000) + np.log1p(land_value)*nonres_rent_per_sqft + nonres_rent_per_sqft/np.log1p(sales_vol) + np.log1p(nonres_sqft_2500)/np.log1p(sqft_2500) + np.log1p(pop_10000)'

In [174]:
m.fit()

                  CHOICEMODELS ESTIMATION RESULTS                  
Dep. Var.:                chosen   No. Observations:         15,588
Model:         Multinomial Logit   Df Residuals:             15,571
Method:       Maximum Likelihood   Df Model:                     17
Date:                 2019-05-23   Pseudo R-squ.:             0.497
Time:                      13:56   Pseudo R-bar-squ.:         0.496
AIC:                  72,305.008   Log-Likelihood:      -36,135.504
BIC:                  72,435.131   LL-Null:             -71,785.393
                                                         coef   std err         z     P>|z|   Conf. Int.
--------------------------------------------------------------------------------------------------------
Intercept                                              0.0000     0.385     0.000     1.000             
np.log1p(avg_income_500_walk)                         -0.0214     0.004    -5.503     0.000             
nonres_rent_per_sqft                

In [175]:
mm.register(m)

Saving 'ELCM_finance.yaml': /home/max/projects/ual_model_workspace/summer-2019-models/configs
Registering model step 'ELCM_finance'
