# Live demo from 3/15/2018

Sam Maurer, March 2018 | Python 3.6

Updated June 2018 to confirm everything still runs

This notebook demonstrates working with Orca, UrbanSim Templates, and ModelManager

In [1]:
import os; os.chdir('../')

In [2]:
from urbansim_templates import modelmanager as mm
from urbansim_templates.models import BinaryLogitStep, OLSRegressionStep
import orca

  from pandas.core import datetools


In [3]:
# Load any script-based Orca registrations
from scripts import datasources
from scripts import models

In [4]:
orca.list_tables()

['parcels', 'buildings', 'units', 'households', 'persons']

In [5]:
%%time
type(orca.get_table('households').to_frame())

CPU times: user 9.36 s, sys: 1.05 s, total: 10.4 s
Wall time: 10.9 s


pandas.core.frame.DataFrame

In [6]:
for table_name in orca.list_tables():
    print(table_name.upper())
    print(orca.get_table(table_name).to_frame().columns.tolist())
    print()

PARCELS
['development_type_id', 'land_value', 'acres', 'county_id', 'zone_id', 'proportion_undevelopable', 'tax_exempt_status', 'apn', 'parcel_id_local', 'geom_id', 'imputation_flag', 'x', 'y', 'shape_area', 'block_id', 'node_id']

BUILDINGS
['parcel_id', 'development_type_id', 'improvement_value', 'residential_units', 'residential_sqft', 'sqft_per_unit', 'non_residential_sqft', 'building_sqft', 'nonres_rent_per_sqft', 'res_price_per_sqft', 'stories', 'year_built', 'redfin_sale_price', 'redfin_sale_year', 'redfin_home_type', 'costar_property_type', 'costar_rent', 'building_type_id']

UNITS
['Unnamed: 0', 'building_id', 'num_units', 'tenure', 'unit_num', 'unit_residential_price', 'unit_residential_rent']

HOUSEHOLDS
['household_id', 'serialno', 'persons', 'building_type', 'cars', 'income', 'race_of_head', 'hispanic_head', 'age_of_head', 'workers', 'state', 'county', 'tract', 'block group', 'children', 'tenure', 'recent_mover', 'block_group_id', 'single_family', 'unit_id']

PERSONS
['Unn

### Turn 'tenure' into a 0/1 binary variable

In [7]:
h = orca.get_table('households').to_frame()

In [8]:
h.tenure.describe()

count    2.679684e+06
mean     1.427137e+00
std      4.946626e-01
min      1.000000e+00
25%      1.000000e+00
50%      1.000000e+00
75%      2.000000e+00
max      2.000000e+00
Name: tenure, dtype: float64

In [9]:
h.loc[h.tenure==2, 'tenure'] = 0
h.tenure.describe()

count    2.679684e+06
mean     5.728631e-01
std      4.946626e-01
min      0.000000e+00
25%      0.000000e+00
50%      1.000000e+00
75%      1.000000e+00
max      1.000000e+00
Name: tenure, dtype: float64

In [10]:
# Update column
orca.get_table('households').update_col_from_series('tenure', h.tenure)

# Check that it worked
orca.get_table('households').to_frame().tenure.describe()

count    2.679684e+06
mean     5.728631e-01
std      4.946626e-01
min      0.000000e+00
25%      0.000000e+00
50%      1.000000e+00
75%      1.000000e+00
max      1.000000e+00
Name: tenure, dtype: float64

### Estimate a model

In [11]:
m = BinaryLogitStep()
m.tables = (['households','units','buildings'])
m.model_expression = 'tenure ~ income + persons + stories'
m.filters = ['income < 500000']

In [12]:
m.fit()

Optimization terminated successfully.
         Current function value: 0.589161
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:                 tenure   No. Observations:              2460070
Model:                          Logit   Df Residuals:                  2460066
Method:                           MLE   Df Model:                            3
Date:                Mon, 18 Jun 2018   Pseudo R-squ.:                  0.1349
Time:                        12:15:36   Log-Likelihood:            -1.4494e+06
converged:                       True   LL-Null:                   -1.6754e+06
                                        LLR p-value:                     0.000
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.1377      0.004     33.241      0.000       0.130       0.146
income      1.046e-05   2.22e

### Run the model without saving it

In [13]:
# Create a new column for predicted tenure
df = orca.get_table('households').to_frame()
df['predicted_tenure'] = -1
orca.add_table('households', df)

<orca.orca.DataFrameWrapper at 0x10fe39c88>

In [14]:
m.out_column = 'predicted_tenure'

In [15]:
m.run()

In [16]:
orca.get_table('households').to_frame().predicted_tenure.describe()

count    2.679684e+06
mean     4.706361e-01
std      6.251186e-01
min     -1.000000e+00
25%      0.000000e+00
50%      1.000000e+00
75%      1.000000e+00
max      1.000000e+00
Name: predicted_tenure, dtype: float64

### Save the model for future use

In [17]:
m.name = 'binary-tenure-model-test'
m.tags = ['demo','sam']

In [18]:
m.register()

In [19]:
orca.list_steps()

['model_one',
 'model_two',
 'small-mnl-test',
 'ols-test',
 'large-mnl-test',
 'test_manual_registration',
 'initialize_network',
 'network_aggregations',
 'binary-tenure-model-test']

In [20]:
mm.list_steps()

[{'name': 'model_one', 'tags': ['sam', 'testing'], 'type': 'BinaryLogitStep'},
 {'name': 'model_two', 'tags': ['sam', 'testing'], 'type': 'BinaryLogitStep'},
 {'name': 'small-mnl-test',
  'tags': ['sam', 'testing'],
  'type': 'SmallMultinomialLogitStep'},
 {'name': 'ols-test', 'tags': ['sam', 'testing'], 'type': 'OLSRegressionStep'},
 {'name': 'large-mnl-test',
  'tags': ['sam', 'testing'],
  'type': 'LargeMultinomialLogitStep'},
 {'name': 'binary-tenure-model-test',
  'tags': ['demo', 'sam'],
  'type': 'BinaryLogitStep'}]

In [21]:
saved_model = mm.get_step('binary-tenure-model-test')
type(saved_model)

urbansim_templates.models.binary_logit.BinaryLogitStep

In [22]:
# Re-loaded model can be run, introspected, edited, re-saved, etc.

print(saved_model.summary_table)

                           Logit Regression Results                           
Dep. Variable:                 tenure   No. Observations:              2460070
Model:                          Logit   Df Residuals:                  2460066
Method:                           MLE   Df Model:                            3
Date:                Mon, 18 Jun 2018   Pseudo R-squ.:                  0.1349
Time:                        12:15:36   Log-Likelihood:            -1.4494e+06
converged:                       True   LL-Null:                   -1.6754e+06
                                        LLR p-value:                     0.000
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.1377      0.004     33.241      0.000       0.130       0.146
income      1.046e-05   2.22e-08    470.707      0.000    1.04e-05    1.05e-05
persons        0.0044      0.001      4.836      0.0

In [23]:
# Remove the saved model, to keep the config file tidy

mm.remove_step('binary-tenure-model-test')
mm.list_steps()

[{'name': 'model_one', 'tags': ['sam', 'testing'], 'type': 'BinaryLogitStep'},
 {'name': 'model_two', 'tags': ['sam', 'testing'], 'type': 'BinaryLogitStep'},
 {'name': 'small-mnl-test',
  'tags': ['sam', 'testing'],
  'type': 'SmallMultinomialLogitStep'},
 {'name': 'ols-test', 'tags': ['sam', 'testing'], 'type': 'OLSRegressionStep'},
 {'name': 'large-mnl-test',
  'tags': ['sam', 'testing'],
  'type': 'LargeMultinomialLogitStep'}]