## HLCM  

Arezoo Besharati, UrbanSim, June 2018 

This notebook is a primary model estimation for HLCM Bay Area


In [None]:
import os; os.chdir('../')
import numpy as np, pandas as pd 

In [None]:
from urbansim_templates import modelmanager as mm
from urbansim_templates.models import LargeMultinomialLogitStep
import orca

### Load data

In [None]:
# Load any script-based Orca registrations
from scripts import datasources
from scripts import models

#### Tables loaded by datasources.py

In [None]:
for table_name in orca.list_tables():
    print(table_name.upper())
    print(orca.get_table(table_name).to_frame().columns.tolist())
    print()

In [None]:
## If you wanna make a df for any of the tables
#households = orca.get_table('households').to_frame()
#units = orca.get_table('units').to_frame()    

### Generate accessibility measures

In [None]:
#orca.list_steps()

In [None]:
orca.run(['initialize_network'])

In [None]:
orca.run(['network_aggregations'])

In [None]:
for table_name in orca.list_tables():
    print(table_name.upper())
    print(orca.get_table(table_name).to_frame().columns.tolist())
    print()

## Model Estimation

### First model: includes only building related variables


In [None]:
m1 = LargeMultinomialLogitStep()
m1.choosers = ['households']
m1.alternatives = ['buildings']
m1.choice_column = 'unit_id'
m1.alt_sample_size = 10

m1.model_expression = 'res_price_per_sqft + non_residential_sqft - 1'

m1.name = 'hlcm1'
m1.tags = ['arezoo', 'test', 'buildingsVar']

In [None]:
%%time
m1.fit()

### Second model: includes only neighborhood variables 

Warning: there should be no missing values in columns that we put in model expression

In [None]:
# Handling missing data

In [None]:
nds = orca.get_table('nodes').to_frame()  

In [None]:
#nodes.dropna(axis=0, how='any', inplace = True)
nds.fillna(nds.mean())
# Update column
orca.get_table('nodes').update_col_from_series('residential_units_500', nds.residential_units_500)

In [None]:
m2 = LargeMultinomialLogitStep()
m2.choosers = ['households']
m2.alternatives = ['nodes']
m2.choice_column = 'unit_id'
m2.alt_sample_size = 10

m2.model_expression = 'residential_units_500 - 1'

m2.name = 'hlcm2'
m2.tags = ['arezoo', 'test', 'neighborhoodVar']

In [None]:
%%time
m2.fit()

### Third model: includes variables from different tables

In [None]:
m3 = LargeMultinomialLogitStep()
m3.choosers = ['households']
m3.alternatives = ['buildings','nodes']
m3.choice_column = 'unit_id'
m3.alt_sample_size = 10

m3.model_expression = 'res_price_per_sqft + residential_units_500 - 1'

m3.name = 'hlcm3'
m3.tags = ['arezoo', 'test', 'mixedVar']

m3.fit()

LargeMNL can't fit the model when alternatrives are drawn from more than one table. 