# Real estate price model workflow

Sam Maurer, Feb 2018; Paul Waddell, June 2018

Python 3.6, intended to be backward compatible with 2.7

In [1]:
from __future__ import print_function

import numpy as np
import pandas as pd

In [2]:
# Standard to run UrbanSim from the root level of the project directory

import os; os.chdir('../')

In [3]:
from urbansim_templates import modelmanager as mm
from urbansim_templates.models import OLSRegressionStep
import orca

  from pandas.core import datetools


In [4]:
# Load any script-based Orca registrations
from scripts import datasources
from scripts import models

## Explore the Orca registrations

In [5]:
orca.list_tables()

['parcels', 'buildings', 'sales', 'units', 'households', 'persons', 'jobs']

In [None]:
for table_name in orca.list_tables():
    print(table_name.upper())
    print(orca.get_table(table_name).to_frame().columns.tolist())
    print()

In [None]:
orca.list_broadcasts()

In [None]:
orca.list_steps()

## Explore the data

Orca doesn't execute code to load the registered objects until it needs to.

In [None]:
orca.get_table('parcels').to_frame().describe()

In [None]:
orca.get_table('buildings').to_frame().describe()

In [None]:
orca.get_table('units').to_frame().describe()

In [6]:
orca.get_table('sales').to_frame().describe()

  table = table()


Unnamed: 0,sr_unique_id,sr_property_id,sr_scm_id,mm_state_code,mm_muni_name,mm_fips_state_code,mm_fips_muni_code,mm_fips_county_name,sr_parcel_nbr_raw,sr_site_addr_raw,...,use_code_std,sa_sqft,sa_x_coord,sa_y_coord,sa_geo_qlty_code,ucb_geo_id,ucb_price_sqft,ucb_price_sqft_adj,ucb_condo_subdiv_flag,ucb_condo_subdiv_sqft
count,2147515,2147515,2147515,2147515,2147515,2147515,2147515,2147515,691804,913221,...,2147515,2147515,2147515.0,2147515.0,2147515,2147515,2147515,2147515,2147515,2147515
unique,2147508,1177568,19,2,10,3,19,10,596470,737291,...,12,15239,683697.0,716646.0,8,3154,3086,3165,5,16199
top,sr_unique_id,23529393,68,CA,SANTA CLARA,6,85,SANTA CLARA,230 29 106,1000 PINE ST,...,RSFR,1000,122.2952,37.83943,0,6013355108,125,215,0,1000
freq,8,122,483328,2147507,485745,2081979,483328,485745,118,31,...,1578663,9580,1014.0,1018.0,1942970,7327,9892,7249,2076195,9557


## Generate accessibility measures for the price model

The network accessibility metrics are not stored on disk; for now we'll generate them using legacy code.

In [None]:
orca.run(['initialize_network'])

In [None]:
orca.run(['network_aggregations'])

In [None]:
%%capture
orca.run(['neighborhood_vars'])

In [None]:
orca.list_tables()

In [None]:
print(orca.get_table('nodes').to_frame().columns.tolist())

# Estimate a price model

The basic idea of the parcel template is that we create model steps by _passing arguments to classes_ rather than by writing Python functions and giving them Orca decorators, as we would for a fully custom model.

Much of the functionality for this is already built into UrbanSim and Orca, we'll just need to extend things here and there.

This demo uses a new RegressionStep() class that provides a full model development workflow: estimating a model, registering it with Orca, saving it for future use.

### Specify parameters and pass them to a model object

In [None]:
# Specify the model expression and names of tables to draw data from (the first table
# is the primary one; additional tables must be able to merge onto it unambiguously)

tables = ['buildings', 'parcels', 'nodes']

model_expression = (
    "np.log1p(res_price_per_sqft) ~ "
        "parcel_acres + "
        "year_built + "
        "ave_income + "
        "distance_to_freeway + "
        "population_400m + "
        "jobs_3000m")

# Give the prospective model step some tags, and a name if desired

name = None
tags = ['residential-price-hedonic', 'sam', '201802']

# For prediction, specify destination column (if different from the dependent variable
# used for estimation), and how to reverse the left-hand-side transformation

out_fname = 'fitted_price'
ytransform = np.exp

In [None]:
# Generate a new column to store the fitted prices

zeros = np.repeat(0.0, len(orca.get_table('buildings')))
orca.get_table('buildings').update_col('fitted_price', zeros)

In [None]:
# Create the model object

model = RegressionStep(model_expression, tables, name=name, tags=tags,
                       out_fname=out_fname, ytransform=ytransform)

### Fit the model

In [None]:
model.fit()

### If we like it, register it as an Orca step

In [None]:
model.register()

In [None]:
orca.list_steps()

### Run the Orca step

In [None]:
orca.run(['RegressionStep-20180214-210159'])

### Check the fitted values

In [None]:
df = orca.get_table('buildings').to_frame(['res_price_per_sqft', 'fitted_price'])
df.loc[df.fitted_price > 0].describe()

### BONUS

Running "model.register()" also registered the step with the new ModelManager extention, which saves it to disk so that it can be automatically re-loaded in the future.

The "test" model steps here were estimated earlier and loaded from disk. They're fully functional: we can run them in Orca, inspect the estimation results, etc.

In [None]:
mm.list_steps()

In [None]:
rs = mm.get_step('test-1')
type(rs)

In [None]:
rs.model_expression

In [None]:
rs.model.report_fit()