## HLCM  

Arezoo Besharati, UrbanSim, June 2018 

This notebook is a primary model estimation for HLCM Bay Area


In [1]:
import os; os.chdir('../')
import numpy as np, pandas as pd 

In [2]:
from urbansim_templates import modelmanager as mm
from urbansim_templates.models import LargeMultinomialLogitStep
import orca

  from pandas.core import datetools


### Load data

In [3]:
# Load any script-based Orca registrations
from scripts import datasources
from scripts import models

#### Tables loaded by datasources.py

In [4]:
for table_name in orca.list_tables():
    print(table_name.upper())
    print(orca.get_table(table_name).to_frame().columns.tolist())
    print()

PARCELS
['development_type_id', 'land_value', 'acres', 'county_id', 'zone_id', 'proportion_undevelopable', 'tax_exempt_status', 'apn', 'parcel_id_local', 'geom_id', 'imputation_flag', 'x', 'y', 'shape_area', 'block_id', 'node_id']

BUILDINGS
['parcel_id', 'development_type_id', 'improvement_value', 'residential_units', 'residential_sqft', 'sqft_per_unit', 'non_residential_sqft', 'building_sqft', 'nonres_rent_per_sqft', 'res_price_per_sqft', 'stories', 'year_built', 'redfin_sale_price', 'redfin_sale_year', 'redfin_home_type', 'costar_property_type', 'costar_rent', 'building_type_id']

UNITS
['Unnamed: 0', 'building_id', 'num_units', 'tenure', 'unit_num', 'unit_residential_price', 'unit_residential_rent']

HOUSEHOLDS
['household_id', 'serialno', 'persons', 'building_type', 'cars', 'income', 'race_of_head', 'hispanic_head', 'age_of_head', 'workers', 'state', 'county', 'tract', 'block group', 'children', 'tenure', 'recent_mover', 'block_group_id', 'single_family', 'unit_id']

PERSONS
['Unn

In [5]:
## If you wanna make a df for any of the tables
#households = orca.get_table('households').to_frame()
#units = orca.get_table('units').to_frame()    

### Generate accessibility measures

In [6]:
#orca.list_steps()

In [7]:
orca.run(['initialize_network'])

Running step 'initialize_network'
Time to execute step 'initialize_network': 9.97 s
Total time to execute iteration 1 with iteration value None: 9.97 s


In [8]:
orca.run(['network_aggregations'])

Running step 'network_aggregations'
Computing accessibility variables
Computing sum_income_3000
Removed 189769 rows because they contain missing values
Computing residential_units_500
Removed 4 rows because they contain missing values
Computing residential_units_1500
Removed 4 rows because they contain missing values
Computing population
Removed 189769 rows because they contain missing values
Computing poor
Removed 53114 rows because they contain missing values
Computing renters
Removed 102597 rows because they contain missing values
Computing ave_income_500
Removed 189769 rows because they contain missing values
       sum_income_3000  residential_units_500  residential_units_1500  \
count     3.082600e+04           30826.000000            30826.000000   
mean      1.478443e+09               4.360008                6.175417   
std       1.927634e+09               2.391189                2.258848   
min       0.000000e+00               0.000000                0.000000   
25%       4.96

In [9]:
for table_name in orca.list_tables():
    print(table_name.upper())
    print(orca.get_table(table_name).to_frame().columns.tolist())
    print()

PARCELS
['development_type_id', 'land_value', 'acres', 'county_id', 'zone_id', 'proportion_undevelopable', 'tax_exempt_status', 'apn', 'parcel_id_local', 'geom_id', 'imputation_flag', 'x', 'y', 'shape_area', 'block_id', 'node_id']

BUILDINGS
['parcel_id', 'development_type_id', 'improvement_value', 'residential_units', 'residential_sqft', 'sqft_per_unit', 'non_residential_sqft', 'building_sqft', 'nonres_rent_per_sqft', 'res_price_per_sqft', 'stories', 'year_built', 'redfin_sale_price', 'redfin_sale_year', 'redfin_home_type', 'costar_property_type', 'costar_rent', 'building_type_id', 'node_id']

UNITS
['Unnamed: 0', 'building_id', 'num_units', 'tenure', 'unit_num', 'unit_residential_price', 'unit_residential_rent', 'node_id']

HOUSEHOLDS
['household_id', 'serialno', 'persons', 'building_type', 'cars', 'income', 'race_of_head', 'hispanic_head', 'age_of_head', 'workers', 'state', 'county', 'tract', 'block group', 'children', 'tenure', 'recent_mover', 'block_group_id', 'single_family', 'un

## Model Estimation

### First model: includes only building related variables


In [10]:
m1 = LargeMultinomialLogitStep()
m1.choosers = ['households']
m1.alternatives = ['units', 'buildings']
m1.choice_column = 'unit_id'
m1.alt_sample_size = 10

m1.model_expression = 'res_price_per_sqft + non_residential_sqft - 1'

m1.name = 'hlcm1'
m1.tags = ['arezoo', 'test', 'buildingsVar']

In [11]:
%%time
m1.fit()

                   CHOICEMODELS ESTIMATION RESULTS                    
Dep. Var.:                chosen   No. Observations:                  
Model:         Multinomial Logit   Df Residuals:                      
Method:       Maximum Likelihood   Df Model:                          
Date:                              Pseudo R-squ.:                     
Time:                              Pseudo R-bar-squ.:                 
AIC:                               Log-Likelihood:      -5,733,227.662
BIC:                               LL-Null:             -5,733,241.162
                          coef   std err         z     P>|z|   Conf. Int.
-------------------------------------------------------------------------
res_price_per_sqft      0.0000     0.000     6.064                       
non_residential_sqft   -0.0000     0.000    -0.278                       
CPU times: user 2min 36s, sys: 1min 27s, total: 4min 4s
Wall time: 4min 3s


### Second model: includes only neighborhood variables 

Warning: there should be no missing values in columns that we put in model expression

In [12]:
# Handling missing data

In [13]:
nds = orca.get_table('nodes').to_frame()  

In [14]:
#nodes.dropna(axis=0, how='any', inplace = True)
nds.fillna(nds.mean())
# Update column
orca.get_table('nodes').update_col_from_series('residential_units_500', nds.residential_units_500)

In [15]:
m2 = LargeMultinomialLogitStep()
m2.choosers = ['households']
m2.alternatives = ['units', 'nodes']
m2.choice_column = 'unit_id'
m2.alt_sample_size = 10

m2.model_expression = 'residential_units_500 - 1'

m2.name = 'hlcm2'
m2.tags = ['arezoo', 'test', 'neighborhoodVar']

In [13]:
orca.list_broadcasts()

[('parcels', 'buildings'),
 ('buildings', 'units'),
 ('units', 'households'),
 ('households', 'persons'),
 ('nodes', 'parcels'),
 ('nodes', 'units')]

In [17]:
df = orca.merge_tables(target='units', tables=['units', 'buildings', 'parcels'])

In [18]:
%%time
m2.fit()

                   CHOICEMODELS ESTIMATION RESULTS                    
Dep. Var.:                chosen   No. Observations:                  
Model:         Multinomial Logit   Df Residuals:                      
Method:       Maximum Likelihood   Df Model:                          
Date:                              Pseudo R-squ.:                     
Time:                              Pseudo R-bar-squ.:                 
AIC:                               Log-Likelihood:      -5,733,152.417
BIC:                               LL-Null:             -5,733,241.162
                           coef   std err          z     P>|z|   Conf. Int.
---------------------------------------------------------------------------
residential_units_500   -0.0104     0.000   -101.427                       
CPU times: user 1min 16s, sys: 49.5 s, total: 2min 5s
Wall time: 2min 9s


### Third model: includes variables from different tables

In [11]:
m3 = LargeMultinomialLogitStep()
m3.choosers = ['households']
m3.alternatives = ['units', 'buildings', 'nodes']
m3.choice_column = 'unit_id'
m3.alt_sample_size = 10

m3.model_expression = 'res_price_per_sqft + residential_units_500 - 1'

m3.name = 'hlcm3'
m3.tags = ['arezoo', 'test', 'mixedVar']

In [12]:
m3.fit()

                   CHOICEMODELS ESTIMATION RESULTS                    
Dep. Var.:                chosen   No. Observations:                  
Model:         Multinomial Logit   Df Residuals:                      
Method:       Maximum Likelihood   Df Model:                          
Date:                              Pseudo R-squ.:                     
Time:                              Pseudo R-bar-squ.:                 
AIC:                               Log-Likelihood:      -5,733,126.645
BIC:                               LL-Null:             -5,733,241.162
                           coef   std err         z     P>|z|   Conf. Int.
--------------------------------------------------------------------------
res_price_per_sqft       0.0000     0.000     7.319                       
residential_units_500   -0.0112     0.000   -89.320                       


In [10]:
df = orca.merge_tables(target='units', tables=['units', 'buildings', 'nodes'])