## Rental hedonics estimation

Sam Maurer, August 2015

In [1]:
%load_ext autoreload
%autoreload 2
if 'sim' not in globals():
    import os; os.chdir('..');os.chdir('..')
import models
import orca
import pandas as pd
pd.set_option('display.max_columns', 500)

### 1. Look at the Craigslist data

In [2]:
cl = orca.get_table('craigslist').to_frame()
cl[1:5]

Filling column _node_id with value 91839.0 (4 values)
Filling column shape_area with value 604.336158108 (0 values)
Filling column zone_id with value 1178 (0 values)


Unnamed: 0,neighborhood,price,bedrooms,date,sqft_per_unit,longitude,latitude,price_sqft,node_id,zone_id
1,sunnyvale,2650,2,2014-05-11,1040,-122.008131,37.353699,2.548077,143129,960
2,glen park,3100,2,2014-05-11,1000,-122.439743,37.731584,3.1,125905,1031
3,redwood city,1850,1,2014-05-11,792,-122.234294,37.491715,2.335859,143879,963
4,walnut creek,1325,1,2014-05-11,642,-122.087751,37.923448,2.063863,70923,888


In [3]:
cl.describe()

Unnamed: 0,price,bedrooms,sqft_per_unit,longitude,latitude,price_sqft,node_id,zone_id
count,73644.0,73644.0,73644.0,73644.0,73644.0,73644.0,73644.0,73603.0
mean,3806.642944,1.858305,1196.62111,-122.108903,37.639416,4.697802,156857.443906,890.963031
std,146802.05571,0.932134,31192.63512,1.35975,0.863417,183.768413,100783.18632,82.4365
min,1.0,1.0,1.0,-159.609375,-85.561269,0.000154,62.0,747.0
25%,1831.0,1.0,720.0,-122.286807,37.383128,2.055256,80038.0,831.0
50%,2330.5,2.0,904.0,-122.050473,37.601374,2.587519,149462.0,879.0
75%,2986.0,2.0,1160.0,-121.95227,37.844037,3.135714,190924.0,956.0
max,26722744.0,8.0,8388607.0,146.425781,79.858833,31145.389277,354659.0,1157.0


### 2. Set up the network vars

In [6]:
%%capture
orca.run(["neighborhood_vars"])

### 3. Estimate a rental listings hedonic

In [6]:
# The model expression is in rrh.yaml; price_per_sqft is the asking monthly rent per square 
# foot from the Craigslist listings. Price, sqft, and bedrooms are specific to the unit, 
# while all the other variables are aggregations at the node or zone level. Note that we 
# can't use bedrooms in the simulation stage because it's not in the unit data.

In [7]:
orca.run(["rrh_estimate"])

Running step 'rrh_estimate'
                            OLS Regression Results                            
Dep. Variable:     np.log(price_sqft)   R-squared:                       0.508
Model:                            OLS   Adj. R-squared:                  0.508
Method:                 Least Squares   F-statistic:                     6303.
Date:                Sun, 28 Feb 2016   Prob (F-statistic):               0.00
Time:                        15:19:49   Log-Likelihood:                -4426.4
No. Observations:               73168   AIC:                             8879.
Df Residuals:                   73155   BIC:                             8998.
Df Model:                          12                                         
Covariance Type:            nonrobust                                         
                              coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------------

### 4.Create a new model

In [8]:
# to save variations, create a new yaml file and run this to register it
from urbansim.utils import misc
from urbansim import accounts
from urbansim.developer import sqftproforma
from urbansim_defaults import models
from urbansim_defaults import utils


@orca.step('rrh_estimate_new_new')
def rh_cl_estimate_NEW(craigslist, aggregations):
    return utils.hedonic_estimate("rrh_new.yaml", craigslist, aggregations)

orca.run(["rrh_estimate_new_new"])

Running step 'rrh_estimate_new_new'
                            OLS Regression Results                            
Dep. Variable:     np.log(price_sqft)   R-squared:                       0.445
Model:                            OLS   Adj. R-squared:                  0.445
Method:                 Least Squares   F-statistic:                     5863.
Date:                Sun, 28 Feb 2016   Prob (F-statistic):               0.00
Time:                        15:20:03   Log-Likelihood:                -8866.1
No. Observations:               73168   AIC:                         1.775e+04
Df Residuals:                   73157   BIC:                         1.786e+04
Df Model:                          10                                         
Covariance Type:            nonrobust                                         
                              coef    std err          t      P>|t|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------

### 5. Compare to sales hedonic

In [9]:
orca.run(["rsh_estimate"])

Running step 'rsh_estimate'
                              OLS Regression Results                              
Dep. Variable:     np.log(price_per_sqft)   R-squared:                       0.426
Model:                                OLS   Adj. R-squared:                  0.426
Method:                     Least Squares   F-statistic:                     5237.
Date:                    Sun, 28 Feb 2016   Prob (F-statistic):               0.00
Time:                            15:20:13   Log-Likelihood:                -30157.
No. Observations:                  105997   AIC:                         6.035e+04
Df Residuals:                      105981   BIC:                         6.050e+04
Df Model:                              15                                         
Covariance Type:                nonrobust                                         
                                   coef    std err          t      P>|t|      [95.0% Conf. Int.]
---------------------------------------------

### 6.change directly in utils.hedonic_estimate model, not just calling with name, but get the exact form!

In [10]:
utils.hedonic_estimate??

In [25]:
utils.hedonic_estimate("rrh_new.yaml", orca.get_table('craigslist'), orca.get_injectable('aggregations'))

                            OLS Regression Results                            
Dep. Variable:     np.log(price_sqft)   R-squared:                       0.445
Model:                            OLS   Adj. R-squared:                  0.445
Method:                 Least Squares   F-statistic:                     5863.
Date:                Tue, 23 Feb 2016   Prob (F-statistic):               0.00
Time:                        17:21:33   Log-Likelihood:                -8866.1
No. Observations:               73168   AIC:                         1.775e+04
Df Residuals:                   73157   BIC:                         1.786e+04
Df Model:                          10                                         
Covariance Type:            nonrobust                                         
                              coef    std err          t      P>|t|      [95.0% Conf. Int.]
-------------------------------------------------------------------------------------------
Intercept                 

<urbansim.models.regression.RegressionModel at 0x10e631650>

In [18]:
craigslist

NameError: name 'craigslist' is not defined

In [27]:
orca.get_injectable('aggregations')[1].to_frame().head()

Unnamed: 0_level_0,autoPeakRetail,autoPeakTotal,autoOffPeakRetail,autoOffPeakTotal,transitPeakRetail,transitPeakTotal,transitOffPeakRetail,transitOffPeakTotal,nonMotorizedRetail,nonMotorizedTotal
taz,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,10.5416,13.0616,10.5312,13.0511,8.3954,11.1182,8.3003,11.0119,8.6157,11.361
2,10.5336,13.0557,10.5185,13.041,8.0848,10.8372,7.9369,10.6877,8.6104,11.3717
3,10.5082,13.0308,10.4998,13.0225,8.0456,10.7516,7.8563,10.5597,8.4067,11.1668
4,10.5609,13.0801,10.5458,13.0651,8.4306,11.1391,8.3426,11.0507,8.711,11.4524
5,10.5458,13.0613,10.5325,13.0484,8.3395,11.0483,8.2755,10.979,8.4145,11.1588


In [28]:
orca.get_injectable('aggregations')[0].to_frame().head()

Unnamed: 0,sum_residential_units,sum_nonresidential_units,ave_sqft_per_unit,ave_lot_size_per_unit,population,poor,blacks,whites,nonwhites,hispanics,asians,renters,singles,two_persons,three_plus,sfdu,ave_hhsize,jobs,jobs3k,ave_income,pct_nonwhite,pct_renters,pct_hisp,pct_three_plus,poverty_rate,pct_singles,pct_white,pct_black,pct_asian,pct_two_persons
8,4.098927,7.179239,7.467289,9.76091,5.143949,4.105754,2.044237,4.749975,4.03971,4.113796,2.745158,2.440746,2.462539,2.956371,3.343739,1.534239,1.388186,7.051473,7.616929,10.962228,78.533241,47.448875,79.973503,65.003349,0.798172,47.872547,92.341011,39.74062,53.366737,57.472782
9,3.380845,7.305961,7.457674,9.667663,4.421817,3.331052,1.298611,4.052716,3.284432,3.470514,1.912862,1.726067,1.830193,2.268978,2.651044,1.270033,1.384454,7.12923,7.718721,10.947813,74.277878,39.035255,78.486153,59.95372,0.753322,41.390057,91.65274,29.368269,43.259628,51.313251
10,2.168365,7.382687,7.572812,9.939272,3.153618,1.948358,0.0,2.864048,1.929821,2.53097,0.0,0.492377,0.935621,1.158451,1.538386,1.065371,1.399366,7.164987,7.807612,10.90142,61.193882,15.613094,80.256088,48.781616,0.617817,29.668177,90.817871,0.0,0.0,36.734043
11,2.094202,7.35476,7.724983,9.697163,3.073977,2.337076,0.441124,2.722166,2.003438,2.555888,0.441124,0.296291,0.814238,1.008297,1.530781,1.013869,1.481605,7.112618,7.866594,10.905808,65.174141,9.638684,83.145981,49.798054,0.760278,26.488108,88.555183,14.350271,14.350271,32.801071
12,2.089788,7.352323,7.704502,9.671385,3.069928,2.365849,0.46936,2.711132,2.014112,2.561186,0.46936,0.279602,0.804329,0.996041,1.532674,1.009823,1.486836,7.108247,7.872163,10.884773,65.607765,9.107755,83.428207,49.925415,0.770653,26.200258,88.312546,15.28896,15.28896,32.445099


In [30]:
utils.simple_relocation??

In [31]:
orca.run(["travel_model_output"])

  }).sort(columns=["building_id", "unit_num"]).reset_index(drop=True)
  households = households.sort(columns=["building_id"], ascending=True)


Running step 'travel_model_output'
Initial unit tenure assignment: 56% owner occupied, 4% unfilled


KeyError: 'unit_id'

In [34]:
orca.get_table('buildings').local_columns

['parcel_id',
 'residential_units',
 'residential_sqft',
 'non_residential_sqft',
 'building_sqft',
 'stories',
 'year_built',
 'redfin_sale_year',
 'building_type_id',
 'residential_price',
 'non_residential_price']

In [36]:
orca.get_table('buildings').to_frame().head()

Unnamed: 0_level_0,parcel_id,residential_units,residential_sqft,non_residential_sqft,building_sqft,stories,year_built,redfin_sale_year,building_type_id,residential_price,non_residential_price,zone_id,general_type,residential_rent,sqft_per_job,lot_size_per_unit,vacant_job_spaces,vacant_residential_units,node_id,base_price_per_sqft,job_spaces,sqft_per_unit
building_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,69516,1,794,0,794,1,1939,2012,1,0,0,893,Residential,0,400,5200.06232,0,0,11584,113.297994,0,794
2,69517,1,1600,0,1600,1,1939,2012,1,0,0,893,Residential,0,400,5200.06376,0,0,11584,113.297994,0,1600
3,69518,1,1334,0,1334,1,1939,2012,1,0,0,893,Residential,0,400,5200.029703,0,0,11585,113.297994,0,1334
4,69519,1,1220,0,1220,1,1939,2012,1,0,0,893,Residential,0,400,5200.144623,0,1,11585,113.297994,0,1220
5,69520,1,892,0,892,1,1943,2012,1,0,0,893,Residential,0,400,5200.027559,0,0,11585,113.297994,0,892


In [37]:
orca.get_table('buildings').to_frame().columns

Index([u'parcel_id', u'residential_units', u'residential_sqft',
       u'non_residential_sqft', u'building_sqft', u'stories', u'year_built',
       u'redfin_sale_year', u'building_type_id', u'residential_price',
       u'non_residential_price', u'zone_id', u'general_type',
       u'residential_rent', u'sqft_per_job', u'lot_size_per_unit',
       u'vacant_job_spaces', u'vacant_residential_units', u'node_id',
       u'base_price_per_sqft', u'job_spaces', u'sqft_per_unit'],
      dtype='object')

In [38]:
orca.list_tables()

['zones',
 'buildings',
 'residential_units',
 'jobs',
 'zoning_np',
 'household_controls',
 'craigslist',
 'parcels_geography',
 'development_projects',
 'households',
 'costar',
 'household_extras',
 'homesales',
 'zoning_lookup',
 'employment_controls',
 'logsums',
 'nodes',
 'zoning_baseline',
 'parcels']

In [7]:
orca.get_table('nodes').to_frame().columns

Index([u'sum_residential_units', u'sum_nonresidential_units',
       u'ave_sqft_per_unit', u'ave_lot_size_per_unit', u'population', u'poor',
       u'blacks', u'whites', u'nonwhites', u'hispanics', u'asians', u'renters',
       u'singles', u'two_persons', u'three_plus', u'sfdu', u'ave_hhsize',
       u'jobs', u'jobs3k', u'ave_income', u'pct_nonwhite', u'pct_renters',
       u'pct_hisp', u'pct_three_plus', u'poverty_rate', u'pct_singles',
       u'pct_white', u'pct_black', u'pct_asian', u'pct_two_persons',
       u'residential_price', u'residential_rent', u'retail', u'office',
       u'industrial'],
      dtype='object')

In [41]:
from urbansim.maps import dframe_explorer
d = {tbl: orca.get_table(tbl).to_frame() for tbl in ['buildings', 'jobs', 'households']}

In [42]:
dframe_explorer.start(d, 
        center=[37.7792, -122.2191],
        zoom=11,
        shape_json='data/zones.json',
        geom_name='ZONE_ID', # from JSON file
        join_name='zone_id', # from data frames
        precision=2)


Exception: Join name must be present on all dataframes - 'zone_id' not present on 'zones'

In [8]:
utils.hedonic_estimate??