# Live demo from 3/15/2018

Sam Maurer | Python 3.6

Working with Orca, UrbanSim Templates, and ModelManager

In [1]:
import os; os.chdir('../')

In [2]:
from urbansim_templates import modelmanager as mm
from urbansim_templates.models import BinaryLogitStep
import orca

  from pandas.core import datetools


In [3]:
# Load any script-based Orca registrations
from scripts import datasources
from scripts import models

In [4]:
orca.list_tables()

['households', 'buildings']

In [5]:
type(orca.get_table('households').to_frame())

pandas.core.frame.DataFrame

In [6]:
for table_name in orca.list_tables():
    print(table_name.upper())
    print(orca.get_table(table_name).to_frame().columns.tolist())
    print()

HOUSEHOLDS
['household_id', 'taz', 'serialno', 'puma5', 'income', 'persons', 'hht', 'unittype', 'noc', 'bldgsz', 'tenure', 'vehicl', 'hinccat1', 'hinccat2', 'hhagecat', 'hsizecat', 'hfamily', 'hunittype', 'hnoccat', 'hwrkrcat', 'h0004', 'h0511', 'h1215', 'h1617', 'h1824', 'h2534', 'h3549', 'h5064', 'h6579', 'h80up', 'hworkers', 'hwork_f', 'hwork_p', 'huniv', 'hnwork', 'hretire', 'hpresch', 'hschpred', 'hschdriv', 'htypdwel', 'hownrent', 'hadnwst', 'hadwpst', 'hadkids', 'bucketbin', 'originalpuma', 'hmultiunit', 'building_id']

BUILDINGS
['building_id', 'parcel_id', 'development_type_id', 'improvement_value', 'residential_units', 'residential_sqft', 'sqft_per_unit', 'non_residential_sqft', 'building_sqft', 'nonres_rent_per_sqft', 'res_price_per_sqft', 'stories', 'year_built', 'redfin_sale_price', 'redfin_sale_year', 'redfin_home_type', 'costar_property_type', 'costar_rent', 'building_type_id']



### Turn 'hownrent' into a 0/1 binary variable

In [7]:
h = orca.get_table('households').to_frame()

In [8]:
h.loc[h.hownrent==2, 'hownrent'] = 0
h.hownrent.describe()

count    2.608019e+06
mean     5.869478e-01
std      4.923821e-01
min      0.000000e+00
25%      0.000000e+00
50%      1.000000e+00
75%      1.000000e+00
max      1.000000e+00
Name: hownrent, dtype: float64

In [9]:
# Update column
orca.get_table('households').update_col_from_series('hownrent', h.hownrent)

# Check that it worked
orca.get_table('households').to_frame().hownrent.describe()

count    2.608019e+06
mean     5.869478e-01
std      4.923821e-01
min      0.000000e+00
25%      0.000000e+00
50%      1.000000e+00
75%      1.000000e+00
max      1.000000e+00
Name: hownrent, dtype: float64

### Estimate a model

In [10]:
m = BinaryLogitStep()

In [11]:
m.tables = (['households','buildings'])

In [12]:
m.model_expression = 'hownrent ~ income + persons + stories'

In [13]:
m.filters = ['income < 500000']

In [14]:
m.fit()

Optimization terminated successfully.
         Current function value: 0.618479
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:               hownrent   No. Observations:              2594607
Model:                          Logit   Df Residuals:                  2594603
Method:                           MLE   Df Model:                            3
Date:                Fri, 16 Mar 2018   Pseudo R-squ.:                 0.08808
Time:                        15:11:31   Log-Likelihood:            -1.6047e+06
converged:                       True   LL-Null:                   -1.7597e+06
                                        LLR p-value:                     0.000
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0793      0.004     19.676      0.000       0.071       0.087
income      1.133e-05    2.8e

In [15]:
m.out_column = 'taz'

In [16]:
orca.get_table('households').to_frame().taz.describe()

count    2.608019e+06
mean     7.526463e+02
std      4.301403e+02
min      1.000000e+00
25%      3.760000e+02
50%      7.630000e+02
75%      1.146000e+03
max      1.454000e+03
Name: taz, dtype: float64

In [17]:
m.run()

In [18]:
orca.get_table('households').to_frame().taz.describe()

count    2.608019e+06
mean     1.126827e+00
std      1.808492e+01
min      0.000000e+00
25%      0.000000e+00
50%      1.000000e+00
75%      1.000000e+00
max      1.252000e+03
Name: taz, dtype: float64

In [19]:
m.register()

In [20]:
orca.list_steps()

['model_two',
 'model_one',
 'BinaryLogitStep-20180316-145603',
 'BinaryLogitStep-20180316-144717',
 'test_automated_registration',
 'BinaryLogitStep-20180315-103903',
 'BinaryLogitStep-20180316-145008',
 'test_manual_registration',
 'BinaryLogitStep-20180316-151129']

In [21]:
mm.list_steps()

[{'name': 'model_two', 'tags': None, 'type': 'BinaryLogitStep'},
 {'name': 'model_one', 'tags': None, 'type': 'BinaryLogitStep'},
 {'name': 'BinaryLogitStep-20180316-145603',
  'tags': None,
  'type': 'BinaryLogitStep'},
 {'name': 'BinaryLogitStep-20180316-144717',
  'tags': None,
  'type': 'BinaryLogitStep'},
 {'name': 'test_automated_registration', 'tags': [], 'type': 'RegressionStep'},
 {'name': 'BinaryLogitStep-20180315-103903',
  'tags': None,
  'type': 'BinaryLogitStep'},
 {'name': 'BinaryLogitStep-20180316-145008',
  'tags': None,
  'type': 'BinaryLogitStep'},
 {'name': 'BinaryLogitStep-20180316-151129',
  'tags': None,
  'type': 'BinaryLogitStep'}]

In [22]:
new = mm.get_step('BinaryLogitStep-20180316-145603')

### Check that the orca callable registration is working properly

In [23]:
orca.get_table('households').to_frame().taz.describe()

count    2.608019e+06
mean     1.126827e+00
std      1.808492e+01
min      0.000000e+00
25%      0.000000e+00
50%      1.000000e+00
75%      1.000000e+00
max      1.252000e+03
Name: taz, dtype: float64

In [27]:
m = BinaryLogitStep()
m.tables = (['households','buildings'])
m.model_expression = 'hownrent ~ income + persons + stories'
m.out_column = 'taz'
m.out_value_true = int(1e6)
m.name = 'model_one'
m.fit()
m.register()

Optimization terminated successfully.
         Current function value: 0.618887
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:               hownrent   No. Observations:              2603610
Model:                          Logit   Df Residuals:                  2603606
Method:                           MLE   Df Model:                            3
Date:                Fri, 16 Mar 2018   Pseudo R-squ.:                 0.08698
Time:                        15:13:34   Log-Likelihood:            -1.6113e+06
converged:                       True   LL-Null:                   -1.7649e+06
                                        LLR p-value:                     0.000
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0969      0.004     24.134      0.000       0.089       0.105
income       1.09e-05   2.74e

In [28]:
m.name = 'model_two'
m.out_value_true = int(1e9)
m.register()

In [29]:
orca.run(['model_one'])
orca.get_table('households').to_frame().taz.describe()

Running step 'model_one'
Time to execute step 'model_one': 1.88 s
Total time to execute iteration 1 with iteration value None: 1.88 s


count    2.608019e+06
mean     5.864537e+05
std      4.924686e+05
min      0.000000e+00
25%      0.000000e+00
50%      1.000000e+06
75%      1.000000e+06
max      1.000000e+06
Name: taz, dtype: float64

In [30]:
orca.run(['model_two'])
orca.get_table('households').to_frame().taz.describe()

Running step 'model_two'
Time to execute step 'model_two': 2.03 s
Total time to execute iteration 1 with iteration value None: 2.03 s


count    2.608019e+06
mean     5.863159e+08
std      4.924933e+08
min      0.000000e+00
25%      0.000000e+00
50%      1.000000e+09
75%      1.000000e+09
max      1.000000e+09
Name: taz, dtype: float64