## HLCM  

Arezoo Besharati, UrbanSim, June 2018 

This notebook is a primary model estimation for HLCM Bay Area


In [None]:
import os; os.chdir('../')
import numpy as np, pandas as pd 

In [None]:
from urbansim_templates import modelmanager as mm
from urbansim_templates.models import LargeMultinomialLogitStep
import orca

### Load data

In [None]:
# Load any script-based Orca registrations
from scripts import datasources
from scripts import models

#### Tables loaded by datasources.py

In [None]:
for table_name in orca.list_tables():
    print(table_name.upper())
    print(orca.get_table(table_name).to_frame().columns.tolist())
    print()

### Generate accessibility variables

In [None]:
#orca.list_steps()

In [None]:
orca.run(['initialize_network'])

In [None]:
orca.run(['network_aggregations'])

In [None]:
for table_name in orca.list_tables():
    print(table_name.upper())
    print(orca.get_table(table_name).to_frame().columns.tolist())
    print()

## Data Preprocessing

 - Handling missing values
 - Check for feature's distributions (check the skewness)
 - Create dummy variables
 - Create desired features such as variables intercations
 - Check the units of data

#### Handling missing values

In [None]:
# check if there is any nan value in our tables
nds = orca.get_table('nodes').to_frame() 
households = orca.get_table('households').to_frame()
units = orca.get_table('units').to_frame()  
bld = orca.get_table('buildings').to_frame()

print(nds.isnull().values.any())
print(households['household_id'].isnull().values.any())
print(units.isnull().values.any())
print(bld.isnull().values.any())


In [None]:
# Which columns have missing values
bld.columns[bld.isna().any()].tolist()

In [None]:
## Option 1: drop all of them
#bld.dropna(axis=0, how='any', inplace = True)

##Update the orca table (buildings) by the new table (bld) that doesn't have missing values.

#df = orca.get_table('bld')
## make changes to the dataframe
#orca.add_table('bld', df)

In [None]:
#Option2 : In case one wants to update only a column in the orca table not the whole table

bld.redfin_sale_price.fillna(bld.redfin_sale_price.mean(), inplace = True)

# Update column
orca.get_table('buildings').update_col_from_series('redfin_sale_price', bld.redfin_sale_price)

## Model Estimation

### First model: includes only building related variables


In [None]:
# Select choosers, owners/renters
households.tenure.unique()

In [None]:
# Scale the units
bld["redfin_sale_price"] = bld["redfin_sale_price"] / 1000

# Update column
orca.get_table('buildings').update_col_from_series('redfin_sale_price', bld.redfin_sale_price)

In [None]:
m1 = LargeMultinomialLogitStep()
m1.choosers = ['households']
m1.alternatives = ['buildings']
m1.choice_column = 'unit_id'
m1.alt_sample_size = 10
m1.chooser_filters = ['tenure == 2']

m1.model_expression = ' non_residential_sqft+ redfin_sale_price - 1'

m1.name = 'hlcm1'
m1.tags = ['arezoo', 'test', 'buildingsVar']

In [None]:
# %%time
m1.fit()

### Second model: includes only neighborhood variables 

In [None]:
df = orca.merge_tables(target='units', tables=['units', 'buildings', 'parcels'])


In [None]:
m2 = LargeMultinomialLogitStep()
m2.choosers = ['households']
m2.alternatives = ['units', 'nodes']
m2.choice_column = 'unit_id'
m2.alt_sample_size = 10

m2.model_expression = 'residential_units_500 - 1'

m2.name = 'hlcm2'
m2.tags = ['arezoo', 'test', 'neighborhoodVar']

In [None]:
%%time
m2.fit()

### Third model: includes variables from different tables

In [None]:
df = orca.merge_tables(target='units', tables=['units', 'buildings', 'nodes'])


In [None]:
m3 = LargeMultinomialLogitStep()
m3.choosers = ['households']
m3.alternatives = ['units','buildings','nodes']
m3.choice_column = 'unit_id'
m3.alt_sample_size = 10
m3.chooser_filters = ['tenure == 2']

m3.model_expression = 'res_price_per_sqft + residential_units_500 + job_500 - 1'

m3.name = 'hlcm3'
m3.tags = ['arezoo', 'test', 'mixedVar']

In [None]:
m3.fit()