Primarily adapted from Sam Maurer's UAL code

In [1]:
import os; os.chdir('../')

In [2]:
import pandas as pd
import numpy as np
import orca
from scripts import datasources, models
from urbansim.utils import misc

### Create empty units

In [3]:
def _ual_create_empty_units(buildings):
    df = pd.DataFrame({
        'unit_residential_price': 0,
        'unit_residential_rent': 0,
        'num_units': 1,
        'building_id': np.repeat(buildings.index.values,
                                 buildings.residential_units.values.astype(int)),
        # counter of the units in a building
        'unit_num': np.concatenate([np.arange(i) for i in \
                                    buildings.residential_units.values.astype(int)]),
        'tenure': -1
    }).sort_values(by=['building_id', 'unit_num']).reset_index(drop=True)
    df.index.name = 'unit_id'
    return df

### Initialize residential units

In [4]:
@orca.table('residential_units', cache=True)
def residential_units(buildings):
    return _ual_create_empty_units(buildings)

### Subtract occupied units (\*\*\**cannot be called until households have been assigned to units*\*\*\*)

In [6]:
# @orca.column('residential_units', 'vacant_units')
# def vacant_units(residential_units, households):
#     return residential_units.num_units.sub(
#             households.unit_id[households.unit_id != -1].value_counts(), fill_value=0)

### Big merge to get data we need for tenure imputation

In [8]:
units = orca.merge_tables('residential_units', [
    'buildings', 'residential_units', 
])

In [9]:
len(units)

2785868

In [20]:
geog_mappings = pd.read_csv('./data/parcels_blocks_nodes.csv', dtype={'block_id': str})

In [38]:
print('{0} units ({1}%) have parcels that do not exist in the geography table'.format(
    len(units[~units.parcel_id.isin(geog_mappings.parcel_id)]),
    round(len(units[~units.parcel_id.isin(geog_mappings.parcel_id)])/len(units) * 100, 2),
))
print('The units with missing parcels are distributed among the following parcels:\n', 
      dict(units[~units.parcel_id.isin(geog_mappings.parcel_id)]['parcel_id'].value_counts()))

1843 units (0.07%) have parcels that do not exist in the geography table
The units with missing parcels are distributed among the following parcels:
 {2054505: 1843}


In [30]:
units_w_block_groups = pd.merge(units, geog_mappings, on='parcel_id')
units_w_block_groups['bg_id'] = units_w_block_groups['block_id'].str[:12]

In [27]:
block_group_characteristics = pd.read_csv('./data/tenure_by_bldg_type_and_block_grp.csv', dtype={'bg_id': str})

In [39]:
print('{0} units with block group IDs ({1}%) have block groups that are not in the block group characteristics table'.format(
    len(units_w_block_groups[~units_w_block_groups.bg_id.isin(block_group_characteristics['bg_id'])]),
    round(len(units_w_block_groups[~units_w_block_groups.bg_id.isin(block_group_characteristics['bg_id'])]) / len(units_w_block_groups) * 100, 3) 
))
print('The units with missing block group characteristics are distributed among the following block groups: \n',
      dict(units_w_block_groups[~units_w_block_groups.bg_id.isin(block_group_characteristics['bg_id'])]['bg_id'].value_counts())
)

17 units with block group IDs (0.001%) have block groups that are not in the block group characteristics table
The units with missing block group characteristics are distributed among the following block groups: 
 {'060871205002': 8, '060871210002': 3, '060871205001': 2, '060871233001': 2, '060330013001': 2}


In [40]:
units_w_block_group_char = pd.merge(units_w_block_groups, block_group_characteristics, on='bg_id')

### Assign tenure to units

Tenure Types: 1 = own, 2 = rent

In [45]:
# if building type == 1, sample tenure from rate of single-family ownership
units_w_block_group_char.loc[units_w_block_group_char['building_type_id'] == 1, 'tenure'] = [
    1 if x else 2 for x in 
    np.random.random(len(units_w_block_group_char[units_w_block_group_char['building_type_id'] == 1])) 
    <= units_w_block_group_char.loc[units_w_block_group_char['building_type_id'] == 1, 'sf_o']]

In [46]:
# if building type > 1, sample_tenure from rate of multi-family ownership
units_w_block_group_char.loc[units_w_block_group_char['building_type_id'] > 1, 'tenure'] = [
    1 if x else 2 for x in 
    np.random.random(len(units_w_block_group_char[units_w_block_group_char['building_type_id'] > 1])) <= 
    units_w_block_group_char.loc[units_w_block_group_char['building_type_id'] > 1, 'mf_o']]

In [47]:
# if building type > 1 and redfin_home_type == 'Condo/Coop', tenure = 1
units_w_block_group_char.loc[
    (units_w_block_group_char['redfin_home_type'] == 'Condo/Coop') & 
    (units_w_block_group_char['building_type_id'] > 1), 'tenure'] = 1 

In [48]:
units_w_block_group_char.tenure.value_counts()

 1    1576764
 2    1207161
-1         83
Name: tenure, dtype: int64

In [None]:
units_w_block_group_char.index.name = 'unit_id'

In [53]:
units_w_block_group_char[orca.get_table('residential_units').columns].to_csv('./data/units_w_tenure.csv')