# Replication notebook for displacement book chapter

Sam Maurer, August 2017, Python 3.6

In [20]:
import pandas as pd
import statsmodels.api as sm
import zipfile

  from pandas.core import datetools


In [2]:
# Load PUMS households table
z = zipfile.ZipFile('../data/csv_hca_2013_1yr.zip')
pums = pd.read_csv(z.open('ss13hca.csv'), low_memory=False)

In [15]:
print(len(pums))

31068


In [11]:
# Limit to Bay Area counties

pums = pums.loc[(pums.PUMA//100).isin([1,13,41,55,75,81,85,95,97])]
print(len(pums))

31068


In [27]:
# Load persons table
z = zipfile.ZipFile('../data/csv_pca_2013_1yr.zip')
persons = pd.read_csv(z.open('ss13pca.csv'), low_memory=False)

In [28]:
print(len(persons))

371403


In [30]:
# Keep householders and relevant colums
persons = persons.loc[persons.RELP.isin([0]), ['SERIALNO','AGEP']]
print(len(persons))

130145


In [31]:
# Merged with other table
pums = pd.merge(pums, persons, how='left', on='SERIALNO')
print(len(pums))

31068


In [13]:
# TEN = Tenure
# 1 - owned with mortgage loan
# 2 - owned free and clear
# 3 - rented
# 4 - occupied without payment of rent

print(pums.TEN.value_counts())

1.0    11974
3.0    10168
2.0     4847
4.0      351
Name: TEN, dtype: int64


In [14]:
# MV = When moved into this house or apartment
# 1 - 12 months or less

print(pums.MV.value_counts())

3.0    6602
4.0    5891
5.0    5627
7.0    3529
1.0    3392
6.0    3199
2.0    1851
Name: MV, dtype: int64


In [17]:
print(len(pums.loc[pums.MV.isin([1]) & pums.TEN.isin([3])]))  # recent movers who rent
print(len(pums.loc[pums.MV.isin([1]) & pums.TEN.isin([1,2])]))  # recent movers who own

2489
861


In [32]:
# Variables for regression

pums['moved'] = pums.MV.isin([1]).astype(int)
pums['renter'] = pums.TEN.isin([3]).astype(int)
pums['age'] = pums.AGEP
pums['income'] = pums.HINCP / 1000
pums['persons'] = pums.NP

print(pums.loc[:,['moved','renter','age','income','persons']].head())

   moved  renter   age  income  persons
0      0       0  45.0    16.0        2
1      0       1  64.0     9.4        2
2      0       0  73.0   193.5        2
3      0       0  44.0   224.0        4
4      0       1  32.0     8.4        4


In [34]:
results = sm.Logit.from_formula('moved ~ renter + income + age + persons - 1', pums).fit()
print(results.summary())

Optimization terminated successfully.
         Current function value: 0.310687
         Iterations 7
                           Logit Regression Results                           
Dep. Variable:                  moved   No. Observations:                27340
Model:                          Logit   Df Residuals:                    27336
Method:                           MLE   Df Model:                            3
Date:                Thu, 17 Aug 2017   Pseudo R-squ.:                  0.1714
Time:                        21:34:23   Log-Likelihood:                -8494.2
converged:                       True   LL-Null:                       -10251.
                                        LLR p-value:                     0.000
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
renter         1.2799      0.037     34.782      0.000       1.208       1.352
income         0.0008      0.