# Testing the data loading

Sam Maurer, Mar 2018 | Python 3.6

In [1]:
import os; os.chdir('../')

It's standard to run UrbanSim from the root level of the project directory, so we have to chdir.

Can we remove this requirement? It's always bugged me. Maybe by importing the modelmanager config explicitly, like we do datasources. Also need to provide data location.

In [2]:
import modelmanager as mm
from modelmanager.models import RegressionStep
import orca

In [3]:
# Load any script-based Orca registrations
from scripts import datasources
from scripts import models

## Load data via Orca, just for testing

In [4]:
orca.list_tables()

['households', 'buildings']

In [5]:
hh = orca.get_table('households').to_frame()

In [6]:
hh.columns

Index(['household_id', 'taz', 'serialno', 'puma5', 'income', 'persons', 'hht',
       'unittype', 'noc', 'bldgsz', 'tenure', 'vehicl', 'hinccat1', 'hinccat2',
       'hhagecat', 'hsizecat', 'hfamily', 'hunittype', 'hnoccat', 'hwrkrcat',
       'h0004', 'h0511', 'h1215', 'h1617', 'h1824', 'h2534', 'h3549', 'h5064',
       'h6579', 'h80up', 'hworkers', 'hwork_f', 'hwork_p', 'huniv', 'hnwork',
       'hretire', 'hpresch', 'hschpred', 'hschdriv', 'htypdwel', 'hownrent',
       'hadnwst', 'hadwpst', 'hadkids', 'bucketbin', 'originalpuma',
       'hmultiunit', 'building_id'],
      dtype='object')

## Estimate a model using a template class

In [7]:
model = RegressionStep(model_expression = 'income ~ tenure + persons + hadkids',
                       tables = ['households', 'buildings'],
                       name = 'test_automated_registration')

In [8]:
model.fit()

                            OLS Regression Results                            
Dep. Variable:                 income   R-squared:                       0.113
Model:                            OLS   Adj. R-squared:                  0.113
Method:                 Least Squares   F-statistic:                 1.110e+05
Date:                Tue, 06 Mar 2018   Prob (F-statistic):               0.00
Time:                        21:12:32   Log-Likelihood:            -3.2980e+07
No. Observations:             2603610   AIC:                         6.596e+07
Df Residuals:                 2603606   BIC:                         6.596e+07
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept   1.112e+05    146.032    761.329      0.0

## Register the model step

In [9]:
model.register()
orca.list_steps()

['test_automated_registration', 'test_manual_registration']

In [10]:
mm.list_steps()

[{'name': 'test_automated_registration', 'tags': [], 'type': 'RegressionStep'}]

In [11]:
mm.get_step('test_automated_registration').model.report_fit()

R-Squared: 0.113
Adj. R-Squared: 0.113

+-----------+-------------+------------+----------+
| Component | Coefficient | Std. Error | T-Score  |
+-----------+-------------+------------+----------+
| Intercept |  111178.373 |  146.032   | 761.329  |
| hadkids   |  -6694.812  |  113.533   | -58.968  |
| persons   |   7707.931  |   32.788   | 235.086  |
| tenure    |  -24871.928 |   50.855   | -489.074 |
+-----------+-------------+------------+----------+


## Run model steps

In [12]:
orca.run(['test_manual_registration'])

Running step 'test_manual_registration'
Model step is running
Time to execute step 'test_manual_registration': 0.00 s
Total time to execute iteration 1 with iteration value None: 0.00 s


In [None]:
# This one gets through prediction, but can't save the data; not sure why
orca.run(['test_automated_registration'])