# MNL demo

Sam Maurer | Python 3.6

In [1]:
import os; os.chdir('../')

In [2]:
import numpy as np
import pandas as pd
from collections import OrderedDict

In [3]:
from urbansim_templates import modelmanager as mm
from urbansim_templates.models import BinaryLogitStep, OLSRegressionStep, SmallMultinomialLogitStep
import orca

  from pandas.core import datetools


In [4]:
# Load any script-based Orca registrations
from scripts import datasources
from scripts import models

In [5]:
for table_name in orca.list_tables():
    print(table_name.upper())
    print(orca.get_table(table_name).to_frame().columns.tolist())
    print()

HOUSEHOLDS
['household_id', 'taz', 'serialno', 'puma5', 'income', 'persons', 'hht', 'unittype', 'noc', 'bldgsz', 'tenure', 'vehicl', 'hinccat1', 'hinccat2', 'hhagecat', 'hsizecat', 'hfamily', 'hunittype', 'hnoccat', 'hwrkrcat', 'h0004', 'h0511', 'h1215', 'h1617', 'h1824', 'h2534', 'h3549', 'h5064', 'h6579', 'h80up', 'hworkers', 'hwork_f', 'hwork_p', 'huniv', 'hnwork', 'hretire', 'hpresch', 'hschpred', 'hschdriv', 'htypdwel', 'hownrent', 'hadnwst', 'hadwpst', 'hadkids', 'bucketbin', 'originalpuma', 'hmultiunit', 'building_id']

BUILDINGS
['building_id', 'parcel_id', 'development_type_id', 'improvement_value', 'residential_units', 'residential_sqft', 'sqft_per_unit', 'non_residential_sqft', 'building_sqft', 'nonres_rent_per_sqft', 'res_price_per_sqft', 'stories', 'year_built', 'redfin_sale_price', 'redfin_sale_year', 'redfin_home_type', 'costar_property_type', 'costar_rent', 'building_type_id']



### Small MNL

In [6]:
# i think 1=own w/mortgage, 2=own free&clear, 3=rent, 4=no payment

orca.get_table('households').to_frame(['tenure']).tenure.unique()

array([3, 2, 4, 1])

In [7]:
m = SmallMultinomialLogitStep()
m.tables = ['households']
m.choice_column = 'tenure'
m.filters = ['household_id % 1000 < 1',
             'tenure < 4']

m.model_expression = OrderedDict([
    ('intercept', [1,3]),        
    ('income', [1,3]),
    ('persons', [1,3])])

m.initial_coefs = np.zeros(6)

In [8]:
len(m._get_data())

2575

In [9]:
%%time
m.fit()

Log-likelihood at zero: -2,828.9266
Initial Log-likelihood: -2,828.9266
Estimation Time for Point Estimation: 0.04 seconds.
Final log-likelihood: -2,419.4331




                     Multinomial Logit Model Regression Results                    
Dep. Variable:                     _chosen   No. Observations:                2,575
Model:             Multinomial Logit Model   Df Residuals:                    2,569
Method:                                MLE   Df Model:                            6
Date:                     Mon, 19 Mar 2018   Pseudo R-squ.:                   0.145
Time:                             15:00:28   Pseudo R-bar-squ.:               0.143
AIC:                             4,850.866   Log-Likelihood:             -2,419.433
BIC:                             4,885.988   LL-Null:                    -2,828.927
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
intercept_1    -0.0230      0.132     -0.174      0.862      -0.282       0.236
intercept_3     1.0235      0.129      7.914      0.000       0.770       1.277
income_1

In [10]:
df = m.run()

In [13]:
len(df)

10432076

In [None]:
df = m._get_data()
print(df.columns.tolist())

In [None]:
alts = df['tenure'].sort_values().unique().tolist()

#alts_df = pd.DataFrame({'_alt_id': alts}, index=alts)
#alts_df.index = alts

In [None]:
alts

In [None]:
obs = df.index.sort_values().unique()

obs_df = pd.DataFrame({'_obs_id': obs}, index=obs)

print(len(obs_df))

In [None]:
obs = df.index.sort_values().unique().tolist()

In [None]:
obs_prod, alts_prod = pd.core.reshape.util.cartesian_product([obs, alts])

long_df = pd.DataFrame({'_obs_id': obs_prod, '_alts_id': alts_prod})
print(len(long_df))

In [None]:
print(long_df.head())

In [None]:
long_df = long_df.merge(df, left_on='_obs_id', right_index=True)
print(df.columns.tolist())

In [None]:
%%time
long_df['_chosen'] = 0
long_df.loc[long_df._alts_id == long_df['tenure'], '_chosen'] = 1

In [None]:
print(long_df.head(3))

In [None]:
d = OrderedDict([('a', 'b')])
d['a'] = 'b'
d['c'] = 'd'
print(d)

In [None]:
[k[0] for k in list(d.items()) if k[0] is not 'c']