# Residential hedonic comparisons

Sam Maurer, May 2016

This notebook compares the MTC and UAL model specification and estimation results, and estimates a new rental model with specifications closer to the MTC version.

In [1]:
import orca
from baus import models
from urbansim.utils import misc
from urbansim.models.regression import RegressionModel

In [2]:
# To load the config files, urbansim.utils.misc expects to be running from the
# root of an urbansim instance
import os
os.chdir("/Users/smmaurer/Dropbox/Git-rMBP/ual/bayarea_urbansim")

### MTC model from Mar 2016

In [12]:
fname = misc.config("rsh.yaml")
rsh = RegressionModel.from_yaml(str_or_buffer=fname)

In [32]:
d = rsh.to_dict()
print d['model_expression']; print
print d['fit_filters']

price_per_sqft ~ sqft_per_unit + residential_units_1500 + jobs_45 + sfdu + ave_income_1500 + is_sanfran + embarcadero + pacheights + stanford + I(transit_type == "bart1") + I(transit_type == "bart2") + I(transit_type == "bart3a") + I(transit_type == "lrt1") + modern_condo + historic + new_construction

["general_type == 'Residential'", 'sqft_per_unit > 100', 'sqft_per_unit < 4000', 'price_per_sqft > 100', 'price_per_sqft < 1500', 'redfin_sale_year > 2010', 'redfin_sale_year <= 2014']


In [15]:
rsh.report_fit()

R-Squared: 0.476
Adj. R-Squared: 0.476

+-------------------------------------+-------------+------------+----------+
| Component                           | Coefficient | Std. Error | T-Score  |
+-------------------------------------+-------------+------------+----------+
| I(transit_type == "bart1")[T.True]  |    22.936   |   3.729    |  6.151   |
| I(transit_type == "bart2")[T.True]  |    28.592   |   3.441    |  8.309   |
| I(transit_type == "bart3a")[T.True] |    -0.000   |   0.000    | -124.050 |
| I(transit_type == "lrt1")[T.True]   |    65.319   |   4.923    |  13.268  |
| Intercept                           |  -3382.398  |   26.704   | -126.664 |
| ave_income_1500                     |   387.569   |   1.774    | 218.455  |
| embarcadero                         |    -2.114   |   0.049    | -43.322  |
| historic                            |    84.413   |   1.297    |  65.094  |
| is_sanfran                          |   129.789   |   2.189    |  59.291  |
| jobs_45               

### UAL model from summer 2015 (sales)

In [16]:
fname = misc.config("ual_rsh.yaml")
ual_rsh = RegressionModel.from_yaml(str_or_buffer=fname)

In [31]:
d = ual_rsh.to_dict()
print d['model_expression']; print
print d['fit_filters']

np.log(price_per_sqft) ~ I(year_built < 1940) + I(year_built > 2000) + np.log1p(sqft_per_unit) + ave_lot_size_per_unit + ave_income + pct_black + pct_hisp + pct_asian + pct_renters + poverty_rate + population + autoPeakTotal + transitPeakTotal + autoOffPeakRetail + jobs

['sqft_per_unit > 100', 'sqft_per_unit < 4000', 'price_per_sqft > 100', 'price_per_sqft < 1500', 'redfin_sale_year > 2003', 'redfin_sale_year <= 2005']


In [19]:
ual_rsh.report_fit()

R-Squared: 0.433
Adj. R-Squared: 0.433

+------------------------------+-------------+------------+----------+
| Component                    | Coefficient | Std. Error | T-Score  |
+------------------------------+-------------+------------+----------+
| I(year_built < 1940)[T.True] |    0.107    |   0.004    |  30.576  |
| I(year_built > 2000)[T.True] |    0.047    |   0.003    |  15.720  |
| Intercept                    |    2.596    |   0.087    |  29.869  |
| autoOffPeakRetail            |    -0.254   |   0.007    | -38.446  |
| autoPeakTotal                |    0.358    |   0.006    |  59.942  |
| ave_income                   |    0.417    |   0.007    |  63.276  |
| ave_lot_size_per_unit        |    0.042    |   0.002    |  17.876  |
| jobs                         |    0.006    |   0.001    |  8.135   |
| np.log1p(sqft_per_unit)      |    -0.464   |   0.003    | -165.036 |
| pct_asian                    |    -0.002   |   0.000    | -14.277  |
| pct_black                    |    -

### UAL model from summer 2015 (rental listings)

In [5]:
fname = misc.config("ual_rrh_old.yaml")
ual_rrh_old = RegressionModel.from_yaml(str_or_buffer=fname)

In [6]:
d = ual_rrh_old.to_dict()
print d['model_expression']; print
print d['fit_filters']

np.log(price_sqft) ~ np.log1p(sqft_per_unit) + ave_lot_size_per_unit + ave_income  + pct_black + pct_hisp + pct_asian + pct_renters + population + autoPeakTotal + transitPeakTotal + autoOffPeakRetail + jobs

['price_sqft > 0.5', 'price_sqft < 7']


In [7]:
ual_rrh_old.report_fit()

R-Squared: 0.507
Adj. R-Squared: 0.507

+-------------------------+-------------+------------+----------+
| Component               | Coefficient | Std. Error | T-Score  |
+-------------------------+-------------+------------+----------+
| Intercept               |    7.620    |   0.083    |  92.195  |
| autoOffPeakRetail       |    0.081    |   0.016    |  5.166   |
| autoPeakTotal           |    -0.481   |   0.015    | -32.796  |
| ave_income              |    0.059    |   0.002    |  38.921  |
| ave_lot_size_per_unit   |    -0.062   |   0.001    | -50.676  |
| jobs                    |    0.067    |   0.001    | 100.906  |
| np.log1p(sqft_per_unit) |    -0.337   |   0.002    | -145.517 |
| pct_asian               |    0.006    |   0.000    |  61.288  |
| pct_black               |    -0.007   |   0.000    | -68.005  |
| pct_hisp                |    -0.004   |   0.000    | -36.551  |
| pct_renters             |    -0.003   |   0.000    | -20.793  |
| population              |    0.035

### Which network aggregations is MTC calculating?

In [34]:
print orca.get_table('nodes').local_columns

['retail_sqft_3000', 'sum_income_3000', 'residential_units_500', 'residential_units_1500', 'office_1500', 'retail_1500', 'industrial_1500', 'ave_sqft_per_unit', 'ave_lot_size_per_unit', 'population', 'poor', 'renters', 'sfdu', 'ave_hhsize', 'jobs_500', 'jobs_1500', 'ave_income_1500', 'ave_income_500']


In [35]:
print orca.get_table('logsums').local_columns

['autoPeakRetail', 'autoPeakTotal', 'autoOffPeakRetail', 'autoOffPeakTotal', 'transitPeakRetail', 'transitPeakTotal', 'transitOffPeakRetail', 'transitOffPeakTotal', 'nonMotorizedRetail', 'nonMotorizedTotal']


### Estimating a new rental price model

Presuming that MTC wants to keep their basic model specifications, here we estimate a new model for rental prices that's as similar as possible to the MTC hedonic.

'ual_rrh.yaml' has been set up separately from this notebook.

In [3]:
%%capture
orca.run([
    "ual_load_rental_listings",
    "neighborhood_vars",
    "regional_vars",
])

In [9]:
fname = misc.config("ual_rrh.yaml")
ual_rrh = RegressionModel.from_yaml(str_or_buffer=fname)

In [10]:
d = ual_rrh.to_dict()
print d['model_expression']; print
print d['fit_filters']

price_per_sqft ~ sqft_per_unit + residential_units_1500 + jobs_45 + sfdu + ave_income_1500

['sqft_per_unit > 100', 'sqft_per_unit < 4000', 'price_per_sqft > 0.5', 'price_per_sqft < 7']


In [4]:
orca.run([
    "ual_rrh_estimate"
])

Running step 'ual_rrh_estimate'
                            OLS Regression Results                            
Dep. Variable:         price_per_sqft   R-squared:                       0.427
Model:                            OLS   Adj. R-squared:                  0.427
Method:                 Least Squares   F-statistic:                 1.086e+04
Date:                Mon, 30 May 2016   Prob (F-statistic):               0.00
Time:                        20:33:34   Log-Likelihood:                -78802.
No. Observations:               72820   AIC:                         1.576e+05
Df Residuals:                   72814   BIC:                         1.577e+05
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [95.0% Conf. Int.]
---------------------------------------------------------------------------------------