# Travel Demand Model Data Inputs
> 

## Setup 

In [2]:
import pandas as pd
import pathlib
import os
from utils import *

In [3]:
# current working directory
local_path = pathlib.Path().absolute()
# set data path as a subfolder of the current working directory TravelDemandModel\2022\data\
data_dir = local_path.parents[0] / '2022/data'
# set workspace
arcpy.env.workspace = os.path.join(local_path, 'Workspace.gdb')
# overwrite true
arcpy.env.overwriteOutput = True
# Set spatial reference to NAD 1983 UTM Zone 10N
sr = arcpy.SpatialReference(26910)

# global variables

#### Notes
* old inputs: F:\Research and Analysis\misc\Reid_Haefer\Model\model_update_2018\data_inputs\lodging_occupancy
* Final inputs to produce: F:\Transportation\model\scenario_base\zonal

### Base Data

#### Get Data

In [4]:
# Get TAZ data
taz_url = 'https://maps.trpa.org/server/rest/services/Transportation_Planning/MapServer/6'
sdf_taz = get_fs_data_spatial(taz_url)
#set spatial reference
sdf_taz.spatial.sr

# Get Unit Data
units_url = 'https://maps.trpa.org/server/rest/services/Existing_Development/MapServer/2'
sdf_units = get_fs_data_spatial_query(units_url, "Year = 2022")
sdf_units.spatial.sr

# Get Block Group Data
block_groups_url = 'https://maps.trpa.org/server/rest/services/Demographics/MapServer/27'
sdf_block_groups = get_fs_data_spatial(block_groups_url)
sdf_block_groups = sdf_block_groups[(sdf_block_groups['YEAR'] == 2020) & (sdf_block_groups['GEOGRAPHY'] == 'Block Group')]
sdf_block_groups.spatial.sr

# Get VHR Data
vhr_url = 'https://maps.trpa.org/server/rest/services/VHR/MapServer/0'
sdf_vhr = get_fs_data_spatial(vhr_url)
sdf_vhr.spatial.sr
sdf_vhr = sdf_vhr[(sdf_vhr['Status'] == 'Active')]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._data[col] = array


#### TAZ to Block Group Crosswalk

In [None]:
taz_block_group_crosswalk = make_taz_crosswalk(sdf_units, sdf_taz, sdf_block_groups)
taz_block_group_crosswalk.to_csv('taz_block_group_crosswalk.csv', index=False)

In [None]:
#QAQC on vhrs and parcels
df_vhr_comparison = pd.merge(sdf_vhr, sdf_units, left_on='APN', right_on='APN', how='left')
missing_units = df_vhr_comparison[df_vhr_comparison['Residential_Units']==0]
missing_units.to_csv('missing_units.csv', index=False)


In [None]:
# Assign VHRs to TAZs and then group by TAZ
vhr_fc = sdf_vhr
taz_fc = sdf_taz
vhr_taz_feature_class = r"in_memory\vhr_geo"

arcpy.analysis.SpatialJoin(
    target_features=vhr_fc,
    join_features=taz_fc,
    out_feature_class=vhr_taz_feature_class,
    join_operation="JOIN_ONE_TO_MANY",
    join_type="KEEP_ALL",
    match_option="HAVE_THEIR_CENTER_IN"
)
sdf_vhr_geo = pd.DataFrame.spatial.from_featureclass(vhr_taz_feature_class)

In [None]:
taz_vhr_grouped = sdf_vhr_geo.groupby('TAZ').agg({'APN': 'count'}).reset_index()
taz_vhr_grouped = taz_vhr_grouped.rename(columns={'APN': 'VHR_Count'})

## Socio Econ
> TAZ by total_residential_units,census_occ_rate,total_occ_units,occ_units_low_inc,occ_units_med_inc,occ_units_high_inc,persons_per_occ_unit,total_persons,emp_retail,emp_srvc,emp_rec,emp_game,emp_other

In [None]:
census_url = 'https://maps.trpa.org/server/rest/services/Demographics/MapServer/28'
df_census = get_fs_data(census_url)
df_census_2022 = df_census[(df_census['year_sample'] == 2022) & (df_census['sample_level'] == 'block group')]

In [None]:
# Get Occupancy Data - B25002_003E = Vacant, B25002_002E = Occupied , B25004_006E = Vacant Seasonal
occupancy_codes = ['B25002_003E','B25002_002E', 'B25004_006E']
df_census_occupancy = df_census_2022[df_census_2022['variable_code'].isin(occupancy_codes)]
df_census_occupancy = df_census_occupancy[['TRPAID', 'variable_code', 'value']]
# pivot to wide format so we can calculate percentages and totals
df_census_occupancy = df_census_occupancy.pivot(index='TRPAID', columns='variable_code', values='value').reset_index()
# vacant units + occupied units = total units
df_census_occupancy['total_units'] = df_census_occupancy['B25002_003E'] + df_census_occupancy['B25002_002E']
# occupancy rate = occupied units / total units
df_census_occupancy['occupancy_rate'] = df_census_occupancy['B25002_002E'] / df_census_occupancy['total_units']
# seasonal rate = seasonal units / total units
df_census_occupancy['seasonal_rate'] = df_census_occupancy['B25004_006E'] / df_census_occupancy['total_units']

In [None]:
# Get Household Size Data - B25010_001E = Total Households
df_census_household_size = df_census_2022[df_census_2022['variable_code'] == 'B25010_001E']
df_census_household_size = df_census_household_size[['TRPAID', 'variable_code', 'value']]
df_census_household_size = df_census_household_size.pivot(index='TRPAID', columns='variable_code', values='value').reset_index()
df_census_household_size['household_size'] = df_census_household_size['B25010_001E']

### Categorize the income variables from the census

In [None]:
# List of Codes by the category they fall into - Census categroy to broader category
code_lookup = pd.read_csv('Lookup_Lists/occupancy_census_codes.csv')
#Filter census so only variable codes in the code lookup are included
df_census_income = df_census_2022[df_census_2022['variable_code'].isin(code_lookup['variable_code'])]
#Create a new column that has a value from code lookup based on the variable code
df_census_income['income_category'] = df_census_income['variable_code'].map(code_lookup.set_index('variable_code')['category'])
#group by block group and income category and sum the values
df_census_income = df_census_income.groupby(['TRPAID','income_category'])['value'].sum().reset_index()
df_census_income = df_census_income.pivot(index='TRPAID', columns='income_category', values='value').reset_index()


In [None]:
# TRPAID is a 16 digit ID, but it is imported as a float. Convert to string and to retain leading zeros
df_census_household_size['TRPAID']= df_census_household_size['TRPAID'].astype(str).str.zfill(16)
df_census_income['TRPAID']= df_census_income['TRPAID'].astype(str).str.zfill(16)
# merge all the census data together
df_census_occupancy_all = pd.merge(df_census_occupancy, df_census_household_size, on='TRPAID', how='left')
df_census_all = pd.merge(df_census_occupancy_all, df_census_income, on='TRPAID', how='left')
# rename columns of df_census_all
column_rename = {
    'B25002_003E': 'vacant_units',
    'B25002_002E': 'occupied_units',
    'B25004_006E': 'seasonal_units',
    'High Income': 'high_income',
    'Low Income': 'low_income',
    'Medium Income': 'middle_income',
}
df_census_all.rename(columns=column_rename, inplace=True)
# FIXME: remove this line once the data is fixed
df_census_all.drop(columns=['B25010_001E'], inplace=True)
# calculate proportions of income categories
df_census_all['high_income_proportion'] = df_census_all['high_income'] / df_census_all['occupied_units']
df_census_all['middle_income_proportion'] = df_census_all['middle_income'] / df_census_all['occupied_units']
df_census_all['low_income_proportion'] = df_census_all['low_income'] / df_census_all['occupied_units']

In [None]:
# use the proportions in sdf_taz_bg to assign proportion of block group values to taz
taz_values_acs = taz_block_group_crosswalk.merge(df_census_all, left_on='TRPAID', right_on='TRPAID', how='left')
taz_values_acs = taz_values_acs.merge(taz_vhr_grouped, on='TAZ', how='left')
taz_values_acs.fillna(0, inplace=True)
# Calculate the number of seasonal units we think are in each TAZ
#Remove VHRs from seasonal units so that we can get just vacation/second homes
#taz_values_acs['adjusted_seasonal_units'] = taz_values_acs['seasonal_units']-taz_values_acs['VHR_Count']
#taz_values_acs['seasonal_rate'] = taz_values_acs['adjusted_seasonal_units'] / taz_values_acs['total_units']
#Create proportional values based on how many proportion of residential units are in each TAZ
taz_values_acs['adjusted_occupancy'] = taz_values_acs['occupancy_rate'] * taz_values_acs['Residential_Units_Proportion']
taz_values_acs['adjusted_high_income'] = taz_values_acs['high_income_proportion'] * taz_values_acs['Residential_Units_Proportion']
taz_values_acs['adjusted_middle_income'] = taz_values_acs['middle_income_proportion'] * taz_values_acs['Residential_Units_Proportion']
taz_values_acs['adjusted_low_income'] = taz_values_acs['low_income_proportion'] * taz_values_acs['Residential_Units_Proportion']
taz_values_acs['adjusted_seasonal_rate'] = taz_values_acs['seasonal_rate'] * taz_values_acs['Residential_Units_Proportion']
#Group by TAZ and sum the values
taz_values_grouped_occupancy = taz_values_acs.groupby('TAZ').agg({'adjusted_occupancy': 'sum',
                                                                  'adjusted_seasonal_rate': 'sum',
                                                                  'adjusted_high_income':'sum',
                                                                  'adjusted_middle_income': 'sum',
                                                                  'adjusted_low_income': 'sum'}).reset_index()
#This is just the input values for the TAZs
taz_values_grouped_occupancy.to_csv('taz_calibration_values.csv', index=False)
#This provides the raw data for troubleshooting
taz_values_acs.to_csv('taz_values_acs.csv', index=False)

In [None]:
# Multiply the proportions by the total number of units in the TAZ
taz_residential_units = taz_block_group_crosswalk.groupby('TAZ')['Residential_Units'].sum().reset_index()
taz_residential_units = taz_residential_units.merge(taz_values_grouped_occupancy, on='TAZ', how='left')
taz_residential_units = taz_residential_units.merge(taz_vhr_grouped, on='TAZ', how='left')
taz_residential_units['occupancy'] = taz_residential_units['adjusted_occupancy'] * taz_residential_units['Residential_Units']
taz_residential_units['total_seasonal_units'] = taz_residential_units['adjusted_seasonal_rate'] * taz_residential_units['Residential_Units']
taz_residential_units['adjusted_season_units'] = taz_residential_units['total_seasonal_units'] - taz_residential_units['VHR_Count']
# Adjust income proportions by the total number of residential units and adjusted occupancy in each TAZ
taz_residential_units['high_income'] = (taz_residential_units['adjusted_high_income'] * 
                                                    taz_residential_units['Residential_Units'] * 
                                                    taz_residential_units['adjusted_occupancy'])

taz_residential_units['middle_income'] = (taz_residential_units['adjusted_middle_income'] * 
                                                    taz_residential_units['Residential_Units'] * 
                                                    taz_residential_units['adjusted_occupancy'])

taz_residential_units['low_income'] = (taz_residential_units['adjusted_low_income'] * 
                                                taz_residential_units['Residential_Units'] * 
                                                taz_residential_units['adjusted_occupancy'])
taz_residential_units.to_csv('taz_calibration_values_final.csv', index=False)


In [None]:
# total persons in each TAZ
# total regional population is 55,836 from the decenial census

# total persons in each TAZ = total persons in region * propotion of residents in TAZ

# proportion of residents in TAZ = residential units in TAZ * household size in TAZ * residential occupancy rate in TAZ

# propoportion of residents in TAZ/ sum(proportion of residents in TAZ)


### Employment

> Purpose: Assign employees to TAZs
* Data Axle data and CBP data sources

## Overnight Visitation - 
>TAZ by hotelmotel,resort,casino,campground,percentHouseSeasonal
* hotelmotel = TAUs Available Per Day where TAU_TYPE = Hotel/Motel
* resort     = TAUs Available Per Day where TAU_TYPE = Resort
* casino     = TAUs Available Per Day where TAU_TYPE = Casino

* campground = Sites available per day
    
* percentHouseSeasonal = (TRPA Residenital Units * Census Unoccupied Rate) - VHRs / (TRPA Residential Units * Census Unoccupied Rate)

### Tourist Accommodation Units

In [None]:
# (TAUs by TAZ * Occupancy Rate of TAU) * number of people per room

# percent house seasonal = seasonal units * seasonal rate 


In [3]:
# Get Tourist Accomodation Units data
tau_url = 'https://maps.trpa.org/server/rest/services/Existing_Development/MapServer/2'
sdf_tau = get_fs_data_spatial_query(tau_url, "YEAR = 2022")

# Get TAZ data
taz_url = 'https://maps.trpa.org/server/rest/services/Transportation_Planning/MapServer/6'
sdf_taz = get_fs_data_spatial(taz_url)

> Set TAU Type - Casino, Hotel/Motel, Resort, or VHR

In [None]:
# set TAU_Type to null text field
sdf_tau['TAU_Type'] = None
# columns to keep
tau_columns = ['APN',
                'TouristAccommodation_Units', 'YEAR',
                'JURISDICTION', 'COUNTY', 'OWNERSHIP_TYPE',
                'EXISTING_LANDUSE', 'TAZ', 'TAU_Type',
                'WITHIN_TRPA_BNDY', 'PARCEL_ACRES', 'PARCEL_SQFT', 'SHAPE']
# filter to columns
sdf_tau = sdf_tau[tau_columns]

# get only parcels with tourist accomodation units
sdf_tau = sdf_tau[sdf_tau['TouristAccommodation_Units'] > 0]

# casino or resort parcels
casino_apns = [# south shore
                '1318-27-002-002','1318-27-002-005','1318-27-001-009','1318-27-001-021'
               # north shore
               '123-031-01', '123-042-01', '123-052-04', '123-042-15']

resort_apns = [# LT resort, hyatt, hilton, >?
                '127-280-02','029-480-004','027-090-025']

# set TAU_Type feild values to Hotel/Motel, Casino, or Resort
sdf_tau['TAU_Type'] = "Hotel/Motel"
# if the APN is in the list of casino APNs, set the TAU Type to Casino
sdf_tau.loc[sdf_tau['APN'].isin(casino_apns), 'TAU_Type'] = "Casino"
# if the APN is in the list of resort APNs, set the TAU Type to Resort
sdf_tau.loc[sdf_tau['APN'].isin(resort_apns), 'TAU_Type'] = "Resort"

In [None]:
sdf_tau.TAU_Type.value_counts()
# get APNs with TAU_Type = Casino
sdf_casino = sdf_tau[sdf_tau['TAU_Type'] == 'Casino']
sdf_casino

In [None]:
# spatial join TAZ data to TAU data
arcpy.SpatialJoin_analysis(sdf_tau, sdf_taz, 'taz_tau', 
                           'JOIN_ONE_TO_ONE', 'KEEP_ALL', 
                           match_option='HAVE_THEIR_CENTER_IN')

# read in output of spatial join as sdf
sdf_tau_taz = pd.DataFrame.spatial.from_featureclass('taz_tau')
# set TAZ = to TAZ_1
sdf_tau_taz['TAZ'] = sdf_tau_taz['TAZ_1']
# keep columns
sdf_tau_taz = sdf_tau_taz[tau_columns]

# group by TAU_Type and sum of TouristAccommodation_Units within TAZ
sdf_tau_taz_grouped = sdf_tau_taz.groupby(['TAU_Type', 'TAZ']).agg(
                                                {'TouristAccommodation_Units': 'sum'}).reset_index()
# unstack by TAU_Type as columns and TAZ as a column
sdf_tau_taz_grouped_pivot = sdf_tau_taz_grouped.pivot(index='TAZ', 
                                                      columns='TAU_Type', 
                                                      values='TouristAccommodation_Units').reset_index()
# merge to sdf_taz to get all tazs
sdf_taz_tau = pd.merge(sdf_taz, sdf_tau_taz_grouped_pivot, how='left', on='TAZ')

# rename columns
sdf_taz_tau.rename(columns={'Hotel/Motel':'hotelmotel',
                            'Casino':'casino',
                            'Resort':'resort'}, inplace=True)

# set columns
sdf_taz_tau['campground'] = 0
sdf_taz_tau['percentHouseSeasonal'] = 0

# keep only columns of interest
sdf_taz_tau = sdf_taz_tau[['TAZ', 'hotelmotel', 'casino', 'resort','campground','percentHouseSeasonal']]
sdf_taz_tau = sdf_taz_tau.fillna(0)
# cast all fields to int
sdf_taz_tau = sdf_taz_tau.astype(int)

# export to csv
sdf_taz_tau.to_csv(os.path.join('OvernightVisitorZonalData_Summer.csv'), index=False)


### Campgrounds

> Purpose: calculate campground occupancy on model day and assign to TAZs

In [4]:
# Get the data - should be 18 campgrounds
campground_url = 'https://maps.trpa.org/server/rest/services/Recreation/MapServer/1'
sdf_campground =  get_fs_data_spatial_query(campground_url, "RECREATION_TYPE='Campground'")

> Spatial Interpolation of Campground Occupancy Rates

In [None]:
# merge to TAZ data
# Get the data - should be 18 campgrounds
campground_url = 'https://maps.trpa.org/server/rest/services/Recreation/MapServer/1'
sdf_campground =  get_fs_data_spatial_query(campground_url, "RECREATION_TYPE='Campground'")

# campground occupancy rate data
dfCamp = pd.read_csv(os.path.join(data_dir,'Campground_Visitation.csv'))
dfCamp_2022 = dfCamp.loc[dfCamp['Year'] == 2022]

# merge campground data with occupancy rate data on campground name
sdf_campground = sdf_campground.merge(dfCamp_2022, left_on='RECREATION_NAME', right_on='Campground', 
                                      how='left', indicator=True)

# keep only columns of interest
sdf_campground = sdf_campground[['RECREATION_NAME', 'Occupancy_Rate','SHAPE']]

# filter sdf_campground to only campgrounds with occupancy rate data
sdf_campground = sdf_campground[sdf_campground['Occupancy_Rate'].notnull()]

# IDW to get the occupancy rate for each campground
# set the output cell size
cell_size = 500
# set the power parameter
power = 2
# set the search radius
search_radius = 5000
# set the output raster
out_raster = 'campground_occupancy_rate'
# run the IDW
arcpy.sa.Idw(in_features=sdf_campground, 
             z_field='Occupancy_Rate', 
             cell_size=cell_size, 
             power=power, 
             search_radius=search_radius).save(out_raster)

# spatial join to campground points with NaN occupancy rate
sdf_campground_nan = sdf_campground[sdf_campground['Occupancy_Rate'].isnull()]
# spatial join to campground points with NaN occupancy rate


> Join and Merge Camground sites with tazs and group by

In [34]:
# Get the data - should be 18 campgrounds
campground_url = 'https://maps.trpa.org/server/rest/services/Recreation/MapServer/1'
sdf_campground =  get_fs_data_spatial_query(campground_url, "RECREATION_TYPE='Campground'")

# campground occupancy rate data
dfCamp = pd.read_csv(os.path.join(data_dir,'Campground_Visitation.csv'))
dfCamp_2022 = dfCamp.loc[dfCamp['Year'] == 2022]

# merge campground data with occupancy rate data on campground name
sdf_campground = sdf_campground.merge(dfCamp_2022, left_on='RECREATION_NAME', right_on='Campground', 
                                      how='left', indicator=True)

# spatial join TAZ data to campground data
arcpy.SpatialJoin_analysis(sdf_campground, sdf_taz, 'taz_campground', 
                           'JOIN_ONE_TO_ONE', 'KEEP_ALL', 
                           match_option='HAVE_THEIR_CENTER_IN')

# read in output of spatial join as sdf
sdf_campground_taz = pd.DataFrame.spatial.from_featureclass('taz_campground')

# get sites sold by multiplying the number of sites by the occupancy rate
sdf_campground_taz['SitesSold'] = sdf_campground_taz['Total_Sites'] * sdf_campground_taz['Occupancy_Rate']

# group by TAZ and sum of sites sold within TAZ
sdf_campground_taz_grouped = sdf_campground_taz.groupby('TAZ').agg(
                                                {'SitesSold': 'sum'}).reset_index()


## Occupancy Rates
> TAZ by hotelmotel,resort,casino,campground,house,seasonal

### Lodging Occupancy


> Purpose: calculate occupancy rates for hotels and VHRs and assign to TAZs

* Placer has Occupancy data at the TOT tax district. Josh will get the spatial file from Placer County GIS. 
* City of South Lake has hotel level occupancy data that needs to be converted to a spatial file. Reports live here: https://www.cityofslt.us/805/Zone-Detail-Reports 
* Douglas County has occupancy rates from the Casino reports: https://gaming.nv.gov/about/abstract/report/
* Washoe County has occupancy rates at the District level from RSCVA (single number by type of occupancy) "F:\Research and Analysis\Visitation\occupancy\Washoe B Occupied Rooms by Market Segment - 2022.xlsx"
* Rest of El Dorado County? is only VHRs, estimating Occupancy Rates based on TOT rates F:\Research and Analysis\Visitation\occupancy

In [1]:
# Get VHR data - this is current, no way to get historical data outside of the City of South Lake Tahoe
vhr_url = 'https://maps.trpa.org/server/rest/services/VHR/MapServer/0'
sdf_vhr = get_fs_data_spatial(vhr_url)
sdf_vhr.spatial.sr = sr

# Get Tourist Accomodation Units data
tau_url = 'https://maps.trpa.org/server/rest/services/Existing_Development/MapServer/2'
sdf_units_2022 = get_fs_data_spatial_query(tau_url, "YEAR = 2022")
sdf_units_2022.spatial.sr = sr

# occupancy feature class as a spatial dataframe
sdfOcc = pd.DataFrame.spatial.from_featureclass("Tahoe_OccupancyRate_Zones")
sdfOcc.spatial.sr = sr

# get table from geodatabase
occupancy_rate = pd.DataFrame.spatial.from_table("OccupancyRates")

NameError: name 'get_fs_data_spatial' is not defined

In [None]:
# use zip to create a list of tuples of the unique combinations of Zone_ID, Timeframe, and Temporal_Scale
zone_id = list(zip(occupancy_rate['Zone_ID'], occupancy_rate['Timeframe'], occupancy_rate['Temporal_Scale']))
for zone in zone_id:
    print(zone)

In [None]:
# if field is float64 fill NaN with 0
for field in occupancy_rate.columns:
    if occupancy_rate[field].dtype == 'float64':
        occupancy_rate[field].fillna(0, inplace=True)

In [None]:
# # occupancy rate table
# occupancy_rate = pd.read_excel(os.path.join(data_dir,"OccupancyRate_JS.xlsx"))
# occupancy_rate.Zone_ID.unique()
# # fill numeric columns with 0
# # occupancy_rate.fillna(0, inplace=True)                                                    
# # set timeframe to str
# occupancy_rate['Timeframe'] = occupancy_rate['Timeframe'].astype(str)
# # if field is float64 fill NaN with 0
# for field in occupancy_rate.columns:
#     if occupancy_rate[field].dtype == 'float64':
#         occupancy_rate[field].fillna(0, inplace=True)
# # convert to int
# occupancy_rate.Report_RoomsAvailable = occupancy_rate.Report_RoomsAvailable.astype(int)
# occupancy_rate.Report_RoomsRented = occupancy_rate.Report_RoomsRented.astype(int)
# # to csv
# occupancy_rate.to_csv('occupancy_rate.csv', index=False)
# # export to geodatabase
# occupancy_rate.spatial.to_table("Tahoe_OccupancyRates", overwrite=True, format="TABULAR")

In [None]:

# coerce Timeframe to string
occupancy_rate['Timeframe'] = occupancy_rate['Timeframe'].astype(str)

# # strip off 00:00:00
occupancy_rate['Timeframe'] = occupancy_rate['Timeframe'].str.slice(0,10)

# filter for date(2022, 8, 1, 0, 0), 'Q4 21-22', and  'Q3 2022' using the isin method and .loc
df = occupancy_rate.loc[occupancy_rate['Timeframe'].isin(['Q4 21-22', 'Q3 2022','2022-08-01'])]

# # filter for Hotel/Motel and Casino
df = df.loc[~df['RoomType'].isin(['VHR'])]
df.Zone_ID.unique()

In [None]:
# occupancy feature class as a spatial dataframe
sdfOcc = pd.DataFrame.spatial.from_featureclass("Tahoe_OccupancyRate_Zones")
sdfOcc.spatial.sr = sr

# filter for date(2022, 8, 1, 0, 0), 'Q4 21-22', and  'Q3 2022' using the isin method and .loc
df = occupancy_rate.loc[occupancy_rate.Timeframe.isin(['Q4 21-22', 'Q3 2022','2022-08-01']) & ~occupancy_rate.RoomType.isin(['VHR'])]

# cast Zone_ID as type string
df['Zone_ID'].astype(str)

# merge occupancy rate data to occupancy zones
sdf = pd.merge(sdfOcc, df, left_on='OccupancyRate_ZoneID', right_on='Zone_ID', how='outer', indicator=True)

# export sdf to feature class
sdf.spatial.to_featureclass(location=os.path.join('Workspace.gdb', 'OccupancyRate_Zones'), overwrite=True)

# check value counts of merge
sdf._merge.value_counts()

> Filter Occupancy Rates to August timeframe and join to zones

In [None]:
# occupance rate table with 

# get table from geodatabase
occupancy_rate = pd.DataFrame.spatial.from_table("OccupancyRates")

df = occupancy_rate
# filter for date(2022, 8, 1, 0, 0), 'Q4 21-22', and  'Q3 2022' using the isin method and .loc
# df = occupancy_rate.loc[occupancy_rate['Timeframe'].isin(['Q4 21-22', 'Q3 2022','2022-08-01'])]

# cast Zone_ID as type string
df['Zone_ID'].astype(str)

# # filter out rows with VHR room type
# df = df.loc[~df['RoomType'].isin(['VHR'])]
df.Zone_ID.unique()


> Merge CA parcels and VHR parcels

In [None]:
# filter vhr layer to active status
sdf_vhr = sdf_vhr.loc[sdf_vhr['Status'] == 'Active']
# merge parcel 2022 with parcel VHR
sdf_units_2022 = sdf_units_2022.merge(sdf_vhr, on='APN', how='left', indicator=True)
# calculate VHR = Yes if VHR is in the parcel
sdf_units_2022['VHR'] = 'No'
sdf_units_2022.loc[sdf_units_2022['_merge'] == 'both', 'VHR'] = 'Yes'

> Join parcels with units to occupancy rate zone

In [None]:
# get occupancy rate for VHRs using IDW
# get occupancy rate for TAUs using IDW
# set the output cell size
cell_size = 30
# set the power parameter
power = 2
# set the search radius
search_radius = 5000
# set the output raster
out_raster = 'tau_occupancy_rate'
# run the IDW
arcpy.sa.Idw(in_features=sdf_tau, 
             z_field='Occupancy_Rate', 
             cell_size=cell_size, 
             power=power, 
             search_radius=search_radius).save(out_raster)

In [None]:
# overlay units with occupancy zones
arcpy.SpatialJoin_analysis(sdf_units_2022, 'OccupancyRate_Zones', "SpJoin_Units_OccupancyRates", "JOIN_ONE_TO_ONE", "KEEP_ALL", "", "HAVE_THEIR_CENTER_IN", "", "")
# read in output of spatial join as sdf
sdf_units_occ = pd.DataFrame.spatial.from_featureclass('SpJoin_Units_OccupancyRates')
sdf_units_occ.spatial.sr = sr

> Join parcels to TAZ

In [None]:
# overlay taz with units
# Spatial Join
arcpy.SpatialJoin_analysis(sdf_units_2022, sdf_taz, "SpJoin_Units_TAZ", "JOIN_ONE_TO_ONE", "KEEP_ALL", "", "HAVE_THEIR_CENTER_IN", "", "")

> Interpolate Occupancy Rate - Inverse Distance Weighted

In [None]:
# occupancy rate interpolation 
# Interpolate Points
# TAUs to points using in_memory
arcpy.management.FeatureToPoint("SpJoin_Units_TAZ", "in_memory\\TAU_Points", "INSIDE")
# VHR to points using in_memory
arcpy.management.FeatureToPoint("VHR", "in_memory\\VHR_Points", "INSIDE")

# interpolate occupancy rates for TAUs
arcpy.ddd.IDW("TAU_Points", "Report_OccupancyRate", "in_memory\\TAU_OccupancyRate", "0.1", "2", "NBRTYPE=Standard SMOOTH=0.1", "")
# interpolate occupancy rates for VHRs
arcpy.ddd.IDW("VHR_Points", "Report_OccupancyRate", "in_memory\\VHR_OccupancyRate", "0.1", "2", "NBRTYPE=Standard SMOOTH=0.1", "")

# convert raster to polygon 
arcpy.RasterToPolygon_conversion("in_memory\\TAU_OccupancyRate", "in_memory\\TAU_OccupancyRate_Polygon", 
                                 "NO SIMPLIFY", "VALUE")
arcpy.RasterToPolygon_conversion("in_memory\\VHR_OccupancyRate", "in_memory\\VHR_OccupancyRate_Polygon",
                                 "NO_SIMPLIFY", "VALUE") 

# convert to spatial dataframes
sdf_tau_occ = pd.DataFrame.spatial.from_featureclass('in_memory\\TAU_OccupancyRate_Polygon')
sdf_vhr_occ = pd.DataFrame.spatial.from_featureclass('in_memory\\VHR_OccupancyRate_Polygon')

# merge occupancy rates to TAUs
sdf_tau_occ = pd.merge(sdf_tau_occ, sdf_units_occ, on='APN', how='left')

# set VHR_OccupancyRate to 0 if null
sdf['VHR_OccupancyRate'].fillna(0, inplace=True)

# zonal stats for TAUs

> Model Rooms Rented by TAZ

In [None]:
# calculate number of rooms rented for TAUs and VHRs
sdf_tau_occ['rooms_rented'] = sdf_tau_occ['Report_RoomsAvailable'] * sdf_tau_occ['Report_OccupancyRate']
sdf_vhr_occ['rooms_rented'] = sdf_vhr_occ['Report_RoomsAvailable'] * sdf_vhr_occ['Report_OccupancyRate']

# group by TAZ and sum rooms rented
sdf_tau_occ_grouped = sdf_tau_occ.groupby('TAZ').agg({'rooms_rented': 'sum'}).reset_index()
sdf_vhr_occ_grouped = sdf_vhr_occ.groupby('TAZ').agg({'rooms_rented': 'sum'}).reset_index()


### Housing Occupancy


> Purpose: estimate household size in each occupied housing unit
* get total Residential Units aggregated to TAZ
* get total VHRs aggregated to TAZ
* apply occupancy rate from ACS
* ACS 2022 by Block Group - household size

In [None]:
# Get Houshold Block Group data
df_census_household_size = df_census_2022[df_census_2022['variable_code'] == 'B25010_001E']
df_census_household_size = df_census_household_size[['TRPAID', 'variable_code', 'value']]
df_census_household_size_taz = taz_block_group_crosswalk.merge(df_census_household_size, on='TRPAID', how='left')
df_census_household_size_taz['household_size_proportion'] = df_census_household_size_taz['value']*df_census_household_size_taz['Residential_Units_Proportion']
taz_household_size = df_census_household_size_taz.groupby('TAZ')['household_size_proportion'].sum().reset_index()
taz_household_size.to_csv('taz_household_size.csv', index=False)

## School Enrollment
> TAZ by elementary_school_enrollment,middle_school_enrollment,high_school_enrollment,college_enrollment

> Purpose: Collate school enrollment files
* create spatial file 

In [None]:
# Get School Enrollment data
school_url_table     = 'https://maps.trpa.org/server/rest/services/Demographics/MapServer/32'
df_school_enrollment = get_fs_data(school_url_table)

# Get School Enrollment data - spatial
school_url_spatial = 'https://maps.trpa.org/server/rest/services/Datadownloader_PlanningandJurisdictions/MapServer/14'
sdf_school         = get_fs_data_spatial(school_url_spatial)

# Get TAZ data
taz_url = 'https://maps.trpa.org/server/rest/services/Transportation_Planning/MapServer/6'
sdf_taz = get_fs_data_spatial(taz_url)


In [None]:
# set Type to Null
sdf_school['TYPE'] = None
# set SchoolType to 'elementary' if it contains 'elementary' or 'magnet' or 'academy'
sdf_school.loc[sdf_school['NAME'].str.contains('elementary', case=False), 'TYPE'] = 'Elementary School'
# set SchoolType to 'middle' if it contains 'middle'
sdf_school.loc[sdf_school['NAME'].str.contains('middle', case=False), 'TYPE'] = 'Middle School'
# set SchoolType to 'high' if it contains 'high'
sdf_school.loc[sdf_school['NAME'].str.contains('high', case=False), 'TYPE'] = 'High School'
# set SchoolType to 'college' if it contains 'college'
sdf_school.loc[sdf_school['NAME'].str.contains('college', case=False), 'TYPE'] = 'College'
# set SchoolType to 'other' if it it does not contain any of the above
sdf_school.loc[sdf_school['TYPE'].isnull(), 'TYPE'] = 'Elementary School'

In [None]:
# spatial join TAZs to School points
sdf_school_taz = sdf_school.spatial.join(sdf_taz, how='inner')
# group by TYPE and sum of Enrollment within TAZ 
sdf_school_taz_grouped = sdf_school_taz.groupby(['TYPE', 'TAZ']).agg(
                                                {'ENROLLMENT': 'sum'}).reset_index()
# unstack by TYPE as columns and TAZ as a column
sdf_school_taz_grouped_pivot = sdf_school_taz_grouped.pivot(index='TAZ', 
                                                            columns='TYPE', 
                                                            values='ENROLLMENT').reset_index()
# merge to sdf_taz to get all tazs
sdf_taz_school = pd.merge(sdf_taz, sdf_school_taz_grouped_pivot, how='left', on='TAZ')

# drop SHAPE column
sdf_taz_school = sdf_taz_school.drop(columns='SHAPE')
# fill NA with 0 for all rows
sdf_taz_school = sdf_taz_school.fillna(0)
# cast all fields to int
sdf_taz_school = sdf_taz_school.astype(int)
# rename columns
sdf_taz_school.rename(columns={'Elementary School':'elementary_school_enrollment',
                               'Middle School':'middle_school_enrollment',
                               'High School':'high_school_enrollment',
                               'College':'college_enrollment'}, inplace=True)

# export to csv
sdf_taz_school.to_csv(os.path.join('SchoolEnrollment.csv'), index=False)


In [None]:
sdf_taz_school

#### other way 

In [None]:
df_school_enrollment_22 = df_school_enrollment[df_school_enrollment['Year'] == '2022-2023']
# Add a row for LTCC - Lake Tahoe Community College
ltcc = {'School_Name': 'Lake Tahoe Community College', 'Level_': 'College', 'Enrollment': 2909}
df_school_enrollment_22 = pd.concat([df_school_enrollment_22, pd.DataFrame([ltcc])], ignore_index=True)
# join school spatial to school table
sdf_school_enroll = pd.merge(sdf_school, df_school_enrollment, left_on='SchoolID', how='left')

In [None]:
# Get TAZ data
taz_url = 'https://maps.trpa.org/server/rest/services/Transportation_Planning/MapServer/6'
sdf_taz = get_fs_data_spatial(taz_url)

# Get School Enrollment data - spatial
school_url_spatial = 'https://maps.trpa.org/server/rest/services/Datadownloader_PlanningandJurisdictions/MapServer/14'
sdf_school         = get_fs_data_spatial(school_url_spatial)


# keep only the columns we need; SHAPE and TAZ for TAZ and SHAPE and ENROLLMENT for schools
sdf_taz = sdf_taz[['TAZ', 'SHAPE']]
sdf_school = sdf_school[['ENROLLMENT', 'SHAPE']]

# convert to feature class
sdf_taz.spatial.to_featureclass(location=os.path.join(arcpy.env.workspace, 'TAZ'), overwrite=True, sanitize_columns=False)
sdf_school.spatial.to_featureclass(location=os.path.join(arcpy.env.workspace, 'Schools'), overwrite=True, sanitize_columns=False)

In [None]:
field_mappings = arcpy.FieldMappings()
# add tables to field mappings
field_mappings.addTable('TAZ')
field_mappings.addTable('Schools')


# print names of fields in field mappings
for field in field_mappings.fields:
    print(field.name)

In [None]:
# create field mappings
field_mappings = arcpy.FieldMappings()

# add tables to field mappings
field_mappings.addTable('TAZ')
field_mappings.addTable('Schools')

# for the output.
enrollment = field_mappings.findFieldMapIndex("ENROLLMENT")
fieldmap = field_mappings.getFieldMap(enrollment)
 
# Get the output field's properties as a field object
field = fieldmap.outputField
 
# Rename the field and pass the updated field object back into the field map
field.name = "Total_Enrollment"
field.aliasName = "Total Enrollment"
fieldmap.outputField = field
 
# Set the merge rule to mean and then replace the old fieldmap in the mappings objecta
# with the updated one
fieldmap.mergeRule = "sum"
field_mappings.replaceFieldMap(enrollment, fieldmap)

# spatial join that sums enrollment for each TAZ
arcpy.analysis.SpatialJoin(
    target_features   =sdf_taz,
    join_features     =sdf_school,
    out_feature_class ="TAZ_School_Enrollment",
    join_operation    ="JOIN_ONE_TO_ONE",
    join_type         ="KEEP_ALL",
    field_mapping     =field_mappings,
    match_option      ="INTERSECT",
)

In [None]:
# get the data 
sdf_taz_school = pd.DataFrame.spatial.from_featureclass("TAZ_School_Enrollment")

## Forecasts
> Forecast 2040 and 2050 development and population change

In [None]:
## get 2022 development units
devhistoryURL = "https://maps.trpa.org/server/rest/services/Existing_Development/MapServer/2"
parcel_history = get_fs_data_spatial(devhistoryURL)

# global variables
years = [2012, 2018, 2019, 2020, 2021, 2022, 2023]

In [None]:
# get total residential units by year
def get_totals(parcels, years):
    # total
    total = pd.DataFrame(columns=['Year', 'Residential_Units'])
    for year in years:
        # filter parcel_history by year
        parcel_history_year = parcels.loc[parcels['YEAR'] == year]
        # get sum of Residential Units in parcel_history
        resTotal = parcel_history_year['Residential_Units'].sum()

        # add new row using concat
        total = pd.concat([total, pd.DataFrame({'Year': [year], 'Residential_Units': [resTotal]})])
    return total

# get total residential units by year
total = get_totals(parcel_history, years)
# calculate percentage change in residential units year over year
total['Percent_Change'] = (total['Residential_Units'].pct_change())*100
# create a new column for the difference in residential units year over year
total['Difference'] = total['Residential_Units'].diff()

total
# export to csv
total.to_csv('total_residential_units_by_year.csv', index=False)




> Old Data

In [None]:
# get unit table as pandas dataframe
unitsTable = pd.read_csv(r"C:\Users\mbindl\Documents\GitHub\Reporting\data\CumulativeAccounting_2012to2023_Updated.csv", low_memory=False)
# get rid of columns after YEAR
unitsTable.drop(unitsTable.columns[unitsTable.columns.get_loc("YEAR")+1:], axis=1,inplace=True)
# get total residential units by year
total = get_totals(unitsTable, years)
# calculate percentage change in residential units year over year
total['Percent_Change'] = (total['Residential_Units'].pct_change())*100
# create a new column for the difference in residential units year over year
total['Difference'] = total['Residential_Units'].diff()

# export to csv
total.to_csv('total_residential_units_by_year_OG.csv', index=False)


> Methods 

In [1]:
# total_residential_units - base_2018 
# forecast will be for 2040 and 2050 
# rate of development will be based on the current rate of development from the last 12 years (back to 2012)
    # current rate will not get us to full build out and will be adjusted to get to full build out by 2050
# total_residential_units = base_2018 + (rate_of_development * (2040 - 2018))

# forecast max build out will be 2050
#  still going to build out all the residential units and then revisit how conversions of TAUs and CFA will be handled

# GIS exercise of where the new residential units will be built
# 1. get the land use data and see if we can get the residential units on vacant and underbuilt parcels

# For TAUs and CFA we only built out what was in the pipeline

# Total Occupied Units = Total Residential Units - Vacant Units
    # based on block group rate and TAZ crosswalk assigned to Parcel level units
# Occupied Units by Income Level = Total Occupied Units * % of Income Level in Block Group
    # based on block group rate and TAZ crosswalk assigned to Parcel level units

# Lodging Occupany Rates by Tax Rate Zone
    # Air DNA? for VHR occupancy rates
    # Seasonal Units will be based on the % of seasonal units in the block group?

# Adjusted occupancy rates for Residential units to be based on population change to decennial census
    # double check total persons in the model against the decennial census population and then apply the rate?

# use adjusted ACS numbers to make all the input factors match the same source
    # use the 2022 ACS data at the Basin level for all the input factors
        # Block Group level data will be to noisy and not as accurate as the Basin level data

# forecast growth at the Basin level and show some population growth...
    # out year will be 2050 and show the growth in the model at 0.5% per year
    # show the growth in the model at 1.0% per year? or use the decennial census growth rate?
        # which was 0.04% per year from 2010 to 2020 annualized

# show the growth in the model at 0.004% per year? or use the decennial census growth rate?
    # adding 3000 units of affordable housing in the model and get 6,000 person increase in population

# 

