## Intro

> previous results and methods at 
* https://trpa-agency.github.io/travel_demand_model/base_2018.html
* TravelDemandModel\2018\scripts
> Metadata at 
* TravelDemandModel\2022\metadata\TDM_DataEngineering_Methods.docx

> Files 
* TravelDemandModel\2022\scripts\Lookup_Lists
* TravelDemandModel\2022\data\raw_data
* TravelDemandModel\2022\data\processed_data
* TravelDemandModel\2022\scripts\Workspace.gdb

> Setup

In [5]:
# import packages
import pandas as pd
import pathlib
import os
import arcpy
from utils import *
import numpy as np
import pickle

# pandas options
pd.options.mode.copy_on_write = True
pd.options.mode.chained_assignment = None
pd.options.display.max_columns = 999
pd.options.display.max_rows    = 999

# current working directory
local_path = pathlib.Path().absolute()
# set data path as a subfolder of the current working directory TravelDemandModel\2022\
data_dir = local_path.parents[0] / 'data/raw_data'
# folder to save processed data
out_dir  = local_path.parents[0] / 'data/processed_data'
# workspace gdb for stuff that doesnt work in memory
gdb = os.path.join(local_path,'Workspace.gdb')

# set environement workspace to in memory 
arcpy.env.workspace = 'memory'
# # clear memory workspace
# arcpy.management.Delete('memory')

# overwrite true
arcpy.env.overwriteOutput = True
# Set spatial reference to NAD 1983 UTM Zone 10N
sr = arcpy.SpatialReference(26910)

# schema for the final output
final_schema = ['APN', 'Residential_Units', 'TouristAccommodation_Units', 'CommercialFloorArea_SqFt',
                'RoomsRented_PerDay', 'VHR_Occupancy_Rate','TAU_Occupancy_Rate', 
                'PrimaryResidence_Rate', 'SecondaryResidence_Rate',
                'HighIncome_Rate',	'MediumIncome_Rate', 'LowIncome_Rate', 'PersonsPerUnit',
                'TAU_TYPE', 'VHR', 'BLOCK_GROUP', 'TAZ', 'OCCUPANCY_ZONE', 
                'JURISDICTION', 'COUNTY', 'OWNERSHIP_TYPE','EXISTING_LANDUSE', 'WITHIN_TRPA_BNDY', 
                'PARCEL_ACRES', 'PARCEL_SQFT', 'SHAPE']

# Pickle variables
# part 1 - spatial join categories, occupancy rates, and parcels
parcel_pickle_part1    = data_dir / 'parcel_pickle1.pkl'
# part 2 - known occupancy rates applied to the parcel and spatial interpolation of occupancy rates
parcel_pickle_part2    = data_dir / 'parcel_pickle2.pkl'
# part 3 - fill in missing occupancy rates with spatial interpolation
parcel_pickle_part3    = data_dir / 'parcel_pickle3.pkl'
# part 4 - attribute join with socioeconmic data
parcel_pickle_part4    = data_dir / 'parcel_pickle4.pkl'

# pickle for occupancry rates
occupancy_rates_pickle = data_dir / 'occupancy_rates.pkl'
# campground pickles
campground_pickle      = data_dir / 'campground.pkl'
# school pickle
school_pickle          = data_dir / 'school.pkl'
# visitor pickle
visitor_pickle         = data_dir / 'visitor.pkl'
# socioeconmic pickle
socioeconomic_pickle   = data_dir / 'socioeconomic.pkl'
# employment pickle
employment_pickle      = data_dir / 'employment.pkl'
# summary pickle
summary_pickle         = data_dir / 'summary.pkl'


> Future Utils

In [6]:
# function to do a spatial join and
# map values from the source to the target
def spatial_join_map(target, source, join_field,  map_field, target_field):
    # spatial join
    arcpy.SpatialJoin_analysis(target, source, 'memory\\temp', 
                               'JOIN_ONE_TO_ONE', 'KEEP_ALL','HAVE_THEIR_CENTER_IN')
    # get result as a spatial dataframe
    join = pd.DataFrame.spatial.from_featureclass('memory\\temp')
    join.info()
    # map values
    target[target_field] = target[join_field].map(dict(zip(join[join_field], join[map_field])))
    return target

# check for duplicates
def check_dupes(df, col):
    df['is_duplicate'] = df.duplicated(subset=col, keep=False)
    df.is_duplicate.value_counts()
    df.loc[df['is_duplicate'] == True]
    df = df.drop_duplicates(subset=col, keep='first', inplace=True)
    return df[df.duplicated([col], keep=False)]

# check if field exists in data frame and final_schema and if not add it
def check_field(df, fields):
    for field in fields:
        if field not in df.columns:
            df[field] = np.nan
    return df

# function to run interpolation and join by APN
def interpolate_join(df, sdf):
    # interpolate occupancy rate for VHR and TAU parcels where NULL
    return df

# function to fill missing values
def fill_missing_values(df, sdf):
    return df

# function to run zonal stats and map values
def zonal_stats_map(target, source, join_field,  map_field, target_field):
    # zonal stats
    arcpy.sa.ZonalStatisticsAsTable(target, join_field, source, 'memory\\temp', 'DATA', 'MEAN')
    # get result as a spatial dataframe
    join = pd.DataFrame.spatial.from_featureclass('memory\\temp')
    join.info()
    # map values
    target[target_field] = target[join_field].map(dict(zip(join[join_field], join[map_field])))
    return target

# save to pickle
def to_pickle(data, filename):
    with open(filename, 'wb') as f:
        pickle.dump(data, f)
    print(f'{filename} pickled')

# save to pickle and feature class
def to_pickle_fc(data, filename):
    data.spatial.to_featureclass(filename)
    with open(filename, 'wb') as f:
        pickle.dump(data, f)
    print(f'{filename} pickled and saved as feature class')

# get a pickled file as a dataframe
def from_pickle(filename):
    with open(filename, 'rb') as f:
        data = pickle.load(f)
    print(f'{filename} unpickled')
    return data

> Get Data

In [7]:
# TAZ feature layer polygons
taz_url = 'https://maps.trpa.org/server/rest/services/Transportation_Planning/MapServer/6'
# get as spatial dataframe
sdf_taz = get_fs_data_spatial(taz_url)
# set spatial reference to NAD 1983 UTM Zone 10N
sdf_taz.spatial.sr = sr

# parcel development layer polygons
units_url = 'https://maps.trpa.org/server/rest/services/Existing_Development/MapServer/2'
# query 2022 rows
sdf_units = get_fs_data_spatial_query(units_url, "Year = 2022")
sdf_units.spatial.sr = sr

# block group feature layer polygons
block_groups_url = 'https://maps.trpa.org/server/rest/services/Demographics/MapServer/27'
sdf_block = get_fs_data_spatial(block_groups_url)
sdf_block = sdf_block.loc[(sdf_block['YEAR'] == 2020) & (sdf_block['GEOGRAPHY'] == 'Block Group')]
sdf_block.spatial.sr = sr

# vhr feature layer polygons 
vhr_url = 'https://maps.trpa.org/server/rest/services/VHR/MapServer/0'
sdf_vhr = get_fs_data_spatial(vhr_url)
sdf_vhr.spatial.sr = sr
# filter vhr layer to active status
sdf_vhr = sdf_vhr.loc[sdf_vhr['Status'] == 'Active']

# ACS 2022 bolock group table
census_url = 'https://maps.trpa.org/server/rest/services/Demographics/MapServer/28'
df_census = get_fs_data(census_url)
df_census_2022 = df_census.loc[(df_census['year_sample'] == 2022) & (df_census['sample_level'] == 'block group')]

# campground points feature layer
campground_url = 'https://maps.trpa.org/server/rest/services/Recreation/MapServer/1'
sdf_campground =  get_fs_data_spatial_query(campground_url, "RECREATION_TYPE='Campground'")

# campground visits table
campvisits_url = 'https://maps.trpa.org/server/rest/services/Transportation_Planning/MapServer/14'
dfCamp = get_fs_data_query(campvisits_url, "Year = 2022")

# occupancy zone feature layer polygons
occupancyzones_url = 'https://maps.trpa.org/server/rest/services/Transportation_Planning/MapServer/15'
sdf_occ = get_fs_data_spatial(occupancyzones_url)
sdf_occ.spatial.sr = sr

# occupancy rate table
occupancyrate_url = 'https://maps.trpa.org/server/rest/services/Transportation_Planning/MapServer/13'
df_occ = get_fs_data(occupancyrate_url)

# school enrollment table - incomplete data - missing meyers elementary, bijou, sierra house, and LTCC and Sierra Nevada College
school_url_table     = 'https://maps.trpa.org/server/rest/services/Transportation_Planning/MapServer/17'
df_school_enrollment = get_fs_data_query(school_url_table, "Year = '2021-2022'")

# school feature layer points
school_url_spatial    = 'https://maps.trpa.org/server/rest/services/Transportation_Planning/MapServer/16'
sdf_school            =  get_fs_data_spatial(school_url_spatial)
sdf_school.spatial.sr = sr

## Occupancy

### Part 1

> general spatial joins and categorization

In [8]:
# spatial join to get TAZ
arcpy.SpatialJoin_analysis(sdf_units, sdf_taz, "Existing_Development_TAZ", 
                           "JOIN_ONE_TO_ONE", "KEEP_ALL", "", "HAVE_THEIR_CENTER_IN")
# spatial join to get Block Group
arcpy.SpatialJoin_analysis(sdf_units, sdf_block, "Existing_Development_BlockGroup", 
                           "JOIN_ONE_TO_ONE", "KEEP_ALL", "", "HAVE_THEIR_CENTER_IN")
# spatail join to get Occupancy Rate Zone
sdf_occ = sdf_occ.loc[sdf_occ['OccupancyRate_ZoneID'] != 'CSLT_ALL']
arcpy.SpatialJoin_analysis(sdf_units, sdf_occ, "Existing_Development_OccupancyZone", 
                           "JOIN_ONE_TO_ONE", "KEEP_ALL", "", "HAVE_THEIR_CENTER_IN")


'memory\\Existing_Development_OccupancyZone'

In [9]:
# List of Parcels APN with TAU Types
tau_lookup = pd.read_csv('Lookup_Lists/lookup_tau_type.csv')
#sro_lookup = pd.read_csv('Lookup_Lists/lookup_sro.csv')

# check if fields exist in the dataframes
sdfParcel   = check_field(sdf_units, final_schema)

# merge parcel 2022 with parcel VHR
sdfParcel = sdfParcel.merge(sdf_vhr, on='APN', how='left', indicator=True)

# calculate VHR = Yes if VHR is in the parcel
sdfParcel['VHR'] = 'No'
sdfParcel.loc[sdfParcel['_merge'] == 'both', 'VHR'] = 'Yes'

# setup TAU_Type
sdfParcel['TAU_TYPE'] = 'N/A'

# filter parcels so only APNs in the lookup are included
sdfTAU = sdfParcel[sdfParcel['APN'].isin(tau_lookup['APN'])]
# get TAU_Type from lookup
sdfTAU['TAU_TYPE'] = sdfTAU['APN'].map(tau_lookup.set_index('APN')['TAU_Type'])

# any row with ToursitAccommodation_Units > 0 and TAU_Type is null, set TAU_Type to 'HotelMotel'
sdfParcel.loc[(sdfParcel['TouristAccommodation_Units'] > 0) & (sdfParcel['TAU_TYPE']=='N/A'), 'TAU_TYPE'] = 'HotelMotel'
# for the rows in df that match rows by APN in dfTAU set TAU_Type to the value in dfTAU
sdfParcel.loc[sdfParcel['APN'].isin(sdfTAU['APN']), 'TAU_TYPE'] = sdfTAU['TAU_TYPE']

# remove _x from column names
sdfParcel.columns = sdfParcel.columns.str.replace('_x', '')

# get results of spatial joins as spatial dataframes
sdf_units_taz   = pd.DataFrame.spatial.from_featureclass("Existing_Development_TAZ", sr=sr)  
sdf_units_block = pd.DataFrame.spatial.from_featureclass("Existing_Development_BlockGroup", sr=sr)
sdf_units_occ   = pd.DataFrame.spatial.from_featureclass("Existing_Development_OccupancyZone", sr=sr)

# map dictionary to sdf_units dataframe to fill in TAZ and Block Group fields
sdfParcel['TAZ']           = sdfParcel.APN.map(dict(zip(sdf_units_taz.APN,   sdf_units_taz.TAZ)))
sdfParcel['BLOCK_GROUP']   = sdfParcel.APN.map(dict(zip(sdf_units_block.APN, sdf_units_block.TRPAID)))
sdfParcel['OCCUPANCY_ZONE']= sdfParcel.APN.map(dict(zip(sdf_units_occ.APN,   sdf_units_occ.OccupancyRate_ZoneID)))

# if df.JURISDICTION == "CSLT" and VHR == "Yes" then set OCCUPANCY_ZONE to "CSLT_ALL"
sdfParcel.loc[(sdfParcel['JURISDICTION'] == 'CSLT') & (sdfParcel['VHR'] == 'Yes'), 'OCCUPANCY_ZONE'] = 'CSLT_ALL'

# columns to keep
sdfParcel = sdfParcel[final_schema]

# export to pickle
sdfParcel.to_pickle(parcel_pickle_part1)

  if (arr.astype(int) == arr).all():
  if (arr.astype(int) == arr).all():
  if (arr.astype(int) == arr).all():
  if (arr.astype(int) == arr).all():
  if (arr.astype(int) == arr).all():
  if (arr.astype(int) == arr).all():


### Occupancy Rates Table Fix

> Fill in missing data
* Washoe Rooms Available, Washoe Reported Occupancy Rates, Washoe Quarterly VHRs added to Monthly Rows
* Rest of El Dorado County VHR zones need Rooms Available & Rooms Rented
    * Why are we using CSLT rate data instead of IDW interpolated data?
* Weight the rates and calculate rooms rented per day

In [None]:
# make a copy of occupancy rates table and parcel layer
dfOcc     = df_occ.copy()
sdfParcel = pd.read_pickle(parcel_pickle_part1)

# filter to columns 
columns = ['Zone_ID', 'Period', 'RoomType', 'Report_OccRate','TRPA_OccRate']

# dictinary to convert the time frames to make things cleaner
timeframe_dict = {
    '2022-06-01': 'June',
    '2022-08-01': 'August',
    '2022-09-01': 'September',
    'Q4 21-22'  : 'April-June',
    'Q1 22-23'  : 'July-September',
    'Q2 2022'   : 'April-June',
    'Q3 2022'   : 'July-September'
}

# Period field based on Timeframe and timeframe_dict
dfOcc['Period'] = dfOcc['Timeframe'].map(timeframe_dict)

## Fill in Missing Data for Washoe County ##

# get total WA taus and vhrs from the parcel layer
tauWA = sdfParcel.loc[(sdfParcel.COUNTY == 'WA'), 'TouristAccommodation_Units'].sum()
vhrWA = sdfParcel.loc[(sdfParcel.COUNTY == 'WA')&(sdfParcel.VHR == 'Yes'), 'APN'].count()

# Calculate Rooms available for HotelMotel, Casino, Resort in Washoe County using total TAUs from the parcel layer
dfOcc.loc[(dfOcc['Zone_ID'] == 'Washoe County') & (dfOcc['Temporal_Scale'] == 'Monthly') &
                (dfOcc.RoomType.isin(['HotelMotel', 'Casino', 'Resort'])) & (dfOcc['Period'].isin(['June', 'September'])), 
                'Report_RoomsAvailable'] = tauWA * 30
dfOcc.loc[(dfOcc['Zone_ID'] == 'Washoe County') & (dfOcc['Temporal_Scale'] == 'Monthly') &
              (dfOcc.RoomType.isin(['HotelMotel', 'Casino', 'Resort'])) & (dfOcc['Period'].isin(['July','August'])), 
              'Report_RoomsAvailable'] = tauWA * 31

# caclulate Rooms available for VHRs in Washoe County using total VHRs from the parcel layer
dfOcc.loc[(dfOcc['Zone_ID'] == 'Washoe County') & (dfOcc['Temporal_Scale'] == 'Monthly') & 
              (dfOcc.RoomType == 'VHR') & (dfOcc['Period'].isin(['July', 'August'])), 
              'Report_RoomsAvailable'] = vhrWA * 31
dfOcc.loc[(dfOcc['Zone_ID'] == 'Washoe County') & (dfOcc['Temporal_Scale'] == 'Monthly') & 
              (dfOcc.RoomType == 'VHR') & (dfOcc['Period'].isin(['June', 'September'])), 
              'Report_RoomsAvailable'] = vhrWA * 30

# if the Zone_ID is Washoe County and VHR and Timeframe is the Q3 or Q2 then /3 to get monthly rooms available for that quarter/row
waVHRZoneQ2 = dfOcc.loc[(dfOcc['Zone_ID'] == 'Washoe County') & (dfOcc.RoomType == 'VHR') & (dfOcc.Timeframe == 'Q2 2022')]
extraVHRQ2 = int((waVHRZoneQ2['Report_RoomsRented'] / 3).round(0).iloc[0])
waVHRZoneQ3 = dfOcc.loc[(dfOcc['Zone_ID'] == 'Washoe County') & (dfOcc.RoomType == 'VHR') & (dfOcc.Timeframe == 'Q3 2022')]
extraVHRQ3 = int((waVHRZoneQ3['Report_RoomsRented'] / 3).round(0).iloc[0])

# add the extra VHR rooms rented to the monthly rows that fall within that quarter Zone_ID is Washoe County and Temporal_Scale is Monthly
dfOcc.loc[(dfOcc['Zone_ID'] == 'Washoe County') & (dfOcc['Temporal_Scale'] == 'Monthly') 
              & (dfOcc.RoomType =='VHR') & (dfOcc.Timeframe == 'June'), 
              'Report_RoomsRented'] = dfOcc['Report_RoomsRented'] + extraVHRQ2
dfOcc.loc[(dfOcc['Zone_ID'] == 'Washoe County') & (dfOcc['Temporal_Scale'] == 'Monthly')
              & (dfOcc.RoomType =='VHR') & (dfOcc.Timeframe.isin(['July', 'August'])),
              'Report_RoomsRented'] = dfOcc['Report_RoomsRented'] + extraVHRQ3

# if the Zone_ID is Washoe County set Report_OccRate to Report_RoomsRented by Report _RoomsAvailable
dfOcc.loc[dfOcc['Zone_ID'] == 'Washoe County', 'Report_OccRate'] = dfOcc['Report_RoomsRented']/dfOcc['Report_RoomsAvailable']

## Fill in Missing Data for El Dorado County ##

# get total VHRs in El Dorado County from the parcel layer
vhrEL = sdfParcel.loc[(sdfParcel.JURISDICTION == 'EL') & (sdfParcel.VHR == 'Yes'), 'APN'].count()

# caclulate Rooms available for VHRs in El Dorado County using total VHRs from the parcel layer
dfOcc.loc[(dfOcc['Zone_ID'] == 'Rest of El Dorado County') & (dfOcc['Temporal_Scale'] == 'Monthly') & 
              (dfOcc.RoomType == 'VHR') & (dfOcc['Period'].isin(['July', 'August'])), 
              'Report_RoomsAvailable'] = vhrEL * 31
dfOcc.loc[(dfOcc['Zone_ID'] == 'Rest of El Dorado County') & (dfOcc['Temporal_Scale'] == 'Monthly') &
              (dfOcc.RoomType == 'VHR') & (dfOcc['Period'].isin(['June', 'September'])),
              'Report_RoomsAvailable'] = vhrEL * 30

# calculate Rooms Rented for VHRs in El Dorado County using Report_OccRate and Report_RoomsAvailable
dfOcc.loc[(dfOcc['Zone_ID'] == 'Rest of El Dorado County') & (dfOcc['Temporal_Scale'] == 'Monthly') & (dfOcc.RoomType == 'VHR'), 
              'Report_RoomsRented'] = (dfOcc['Report_OccRate'] * dfOcc['Report_RoomsAvailable']).fillna(0).astype(int)

## Calculate Weighted Average Occupancy Rate ##

# df copy
df = dfOcc.copy()

# Define the weights for each month based on the number of days they contribute
weights = {
    'June'          : 8/20,
    'August'        : 3/20,
    'September'     : 9/20,
    'April-June'    : 8/20,
    'July-September': 12/20
}

# calculate the weighted occupancy rates
for key,value in weights.items():
    # Apply weights to the occupancy rates
    df.loc[df['Period'] == key, 'TRPA_OccRate'] = df['Report_OccRate'] * value

# Calculate RoomsRentedPerDay based on the period
df['RoomsRentedPerDay'] = df.apply(lambda row: row['Report_RoomsRented'] / 30 if row['Period'] in ['June', 'September'] else
                                   (row['Report_RoomsRented'] / 31 if row['Period'] == 'August' else
                                    (row['Report_RoomsRented'] / 91 if row['Period'] == 'April-June' else
                                     (row['Report_RoomsRented'] / 92 if row['Period'] == 'July-September' else 0))), axis=1).fillna(0).astype(int)

# filter by Temporal_Scale
df_monthly   = df.loc[df['Temporal_Scale'] == 'Monthly']
df_quarterly = df.loc[df['Temporal_Scale'] == 'Quarterly']

# group by for montthly and quarterly and mean for Report_OccRate and sum for TRPA_OccRate
dfMonthly   = df_monthly.groupby(['Zone_ID', 'RoomType', 'Temporal_Scale']).agg({'RoomsRentedPerDay': 'mean','Report_RoomsAvailable':'sum',
                                                                                 'Report_RoomsRented':'sum', 'Report_OccRate': 'mean', 
                                                                                 'TRPA_OccRate': 'sum'}).reset_index()

dfQuarterly = df_quarterly.groupby(['Zone_ID', 'RoomType', 'Temporal_Scale']).agg({'RoomsRentedPerDay': 'mean','Report_RoomsAvailable':'sum',
                                                                                   'Report_RoomsRented':'sum', 'Report_OccRate': 'mean', 
                                                                                   'TRPA_OccRate': 'sum'}).reset_index()

# concat the two dataframes into the final occupancy rate dataframe
dfOccFinal = pd.concat([dfMonthly, dfQuarterly]).reset_index(drop=True)

# cast RoomsRentedPerDay as int 
dfOccFinal['RoomsRentedPerDay'] = dfOccFinal['RoomsRentedPerDay'].astype(int)
# drop rows where Zone_ID is Washoe County and Temporal_Scale is Quarterly
df = dfOccFinal.loc[~((dfOccFinal['Zone_ID'] == 'Washoe County') & (dfOccFinal['Temporal_Scale'] == 'Quarterly'))].reset_index(drop=True)
df.info()
# save to pickle
df.to_pickle(occupancy_rates_pickle)

### Part 2


* Spatial join to apply Lodging Occupany Rates to parcel layer, 
* select parcels where Lodging Occupancy Rate is Null, 
* run interpolation, 
* apply interpoleted values to parcels where occupancy rate is null

> Filter Occupancy Rate table to Timeframe and Room Type, Merge with Occupancy Zone Feature Class, and Export to Feature Class

In [None]:
# read in the pickled parcel dataframe
sdfParcel = pd.read_pickle(parcel_pickle_part1)
# read in the pickled occupancy rates table
dfOcc     = pd.read_pickle(occupancy_rates_pickle)

# filter occupancy rate table by RoomType
dfOccTAU = dfOcc.loc[dfOcc['RoomType'].isin(['HotelMotel', 'Casino', 'Resort'])]    
dfOccVHR = dfOcc.loc[dfOcc['RoomType'] == 'VHR']

# specify the output feature classes
tau_occ_zones = os.path.join(gdb,'OccupancyRate_Zones_TAU')
vhr_occ_zones = os.path.join(gdb,'OccupancyRate_Zones_VHR')

# merge occupancy rate data to occupancy zones
sdfOccTAU = pd.merge(sdf_occ, dfOccTAU, left_on='OccupancyRate_ZoneID', right_on='Zone_ID', how='left')
sdfOccVHR = pd.merge(sdf_occ, dfOccVHR, left_on='OccupancyRate_ZoneID', right_on='Zone_ID', how='left')

# export sdf to feature class
sdfOccTAU.spatial.to_featureclass(location=tau_occ_zones, overwrite=True)
sdfOccVHR.spatial.to_featureclass(location=vhr_occ_zones, overwrite=True)

# filter rows where VHR = Yes and rows where TouristAccommodation_Units > 0
sdfTAU = sdfParcel.loc[sdfParcel['TouristAccommodation_Units'] > 0]
sdfVHR = sdfParcel.loc[sdfParcel['VHR'] == 'Yes']
# specify the output feature classes
tau_occ_zones = os.path.join(gdb,'OccupancyRate_Zones_TAU')
vhr_occ_zones = os.path.join(gdb,'OccupancyRate_Zones_VHR')

# spatial join TAU to occupancy rate zones with TAU values
spjoin_tau = arcpy.analysis.SpatialJoin(sdfTAU, tau_occ_zones, 'OccupancyRate_Zones_TAU_Parcels', 
                                        "JOIN_ONE_TO_ONE", "KEEP_ALL", None, "INTERSECT", None, None)
# spatial join VHR to occupancy rate zones with VHR values
spjoin_vhr = arcpy.analysis.SpatialJoin(sdfVHR, vhr_occ_zones, 'OccupancyRate_Zones_VHR_Parcels', 
                                        "JOIN_ONE_TO_ONE", "KEEP_ALL", None, "INTERSECT", None, None)

# get results of spatial joins as spatial dataframes
sdf_parcel_tau_rates = pd.DataFrame.spatial.from_featureclass("OccupancyRate_Zones_TAU_Parcels", sr=sr)  
sdf_parcel_vhr_rates = pd.DataFrame.spatial.from_featureclass("OccupancyRate_Zones_VHR_Parcels", sr=sr)

# map dictionary for TAU and VHR parcels respectively
sdfParcel['VHR_Occupancy_Rate'] = sdfVHR.APN.map(dict(zip(sdf_parcel_vhr_rates.APN, sdf_parcel_vhr_rates.trpa_occ_rate)))
sdfParcel['TAU_Occupancy_Rate'] = sdfTAU.APN.map(dict(zip(sdf_parcel_tau_rates.APN, sdf_parcel_tau_rates.trpa_occ_rate)))

# cast VHR_Occupancy_Rate and TAU_Occupancy_Rate as float and fill na as 0
sdfParcel['VHR_Occupancy_Rate'] = sdfParcel['VHR_Occupancy_Rate'].fillna(0).astype(float)
sdfParcel['TAU_Occupancy_Rate'] = sdfParcel['TAU_Occupancy_Rate'].fillna(0).astype(float)

# export to pickle
sdfParcel.to_pickle(parcel_pickle_part2)

### Part 3

> Generate Spatial Interpolated Occupancy Rate Surfaces and Fill in parcel level missing occupancy rates with interpolated values

In [None]:
# from pickle
sdfParcel = pd.read_pickle(parcel_pickle_part2)

# Set the extent environment using a feature class
arcpy.env.extent = os.path.join(gdb,"OccupancyRate_Zones_TAU")
# set the input feature class
tau_fc = os.path.join(gdb,'TAU_points')
vhr_fc = os.path.join(gdb,'VHR_points')
# set the output raster
tau_raster = os.path.join(gdb,'tau_occupancy_rate')
vhr_raster = os.path.join(gdb,'vhr_occupancy_rate')
# set the output cell size
cell_size = 30
# set the power parameter
power = 2
# set the search radius
search_radius = 10000

# select rows where TAU_TYPE is not null but TAU_Occupancy_Rate is null
tauParcel_NULL = sdfParcel.loc[(sdfParcel['TAU_TYPE'].isin(['HotelMotel','Casino','Resort'])) & 
                                  (sdfParcel['TAU_Occupancy_Rate']==0)]
vhrParcel_NULL = sdfParcel.loc[(sdfParcel['VHR'] == 'Yes') & 
                                  (sdfParcel['VHR_Occupancy_Rate']==0)] 
# get not null parcels for TAU and VHR
tauParcel_notNULL = sdfParcel.loc[(sdfParcel['TAU_TYPE'].isin(['HotelMotel','Casino','Resort'])) & 
                                  (sdfParcel['TAU_Occupancy_Rate']!=0)]
vhrParcel_notNULL = sdfParcel.loc[(sdfParcel['VHR'] == 'Yes') & 
                                  (sdfParcel['VHR_Occupancy_Rate']!=0)]
# NULL parcels
# to feature class
tauParcel_NULL.spatial.to_featureclass(location=os.path.join(gdb,"TAU_NULL_occ"), overwrite=True)
vhrParcel_NULL.spatial.to_featureclass(location=os.path.join(gdb,"VHR_NULL_occ"), overwrite=True)
# not NULL parcels
# to feature class
tauParcel_notNULL.spatial.to_featureclass(location=os.path.join(gdb,"TAU_occ"), overwrite=True)
vhrParcel_notNULL.spatial.to_featureclass(location=os.path.join(gdb,"VHR_occ"), overwrite=True)

# feature to point for TAU and VHR
arcpy.management.FeatureToPoint(tauParcel_NULL,    os.path.join(gdb,'TAU_NULL_points'),"INSIDE")
arcpy.management.FeatureToPoint(vhrParcel_NULL,    os.path.join(gdb, 'VHR_NULL_points'), "INSIDE")
arcpy.management.FeatureToPoint(tauParcel_notNULL, os.path.join(gdb,'TAU_points'), "INSIDE")
arcpy.management.FeatureToPoint(vhrParcel_notNULL, os.path.join(gdb,'VHR_points'), "INSIDE")

# run the IDW for TAU parcels with rates
arcpy.sa.Idw(tau_fc, 
            z_field='TAU_Occupancy_Rate', 
            cell_size=cell_size, 
            power=power, 
            search_radius=search_radius).save(tau_raster)
# and for VHR parcels with rates
arcpy.sa.Idw(vhr_fc,
            z_field='VHR_Occupancy_Rate',
            cell_size=cell_size,
            power=power,
            search_radius=search_radius).save(vhr_raster)

# Set the local variables for ZonalStatisticsAsTable
zoneField   = "APN"
tauZoneData = os.path.join(gdb, 'TAU_NULL_occ')
vhrZoneData = os.path.join(gdb, 'VHR_NULL_occ')
tauRaster   = os.path.join(gdb, 'tau_occupancy_rate')
vhrRaster   = os.path.join(gdb, 'vhr_occupancy_rate')
tauTable    = os.path.join(gdb, 'zonalstat_TAU_Occupancy')
vhrTable    = os.path.join(gdb, 'zonalstat_VHR_Occupancy')

# Execute ZonalStatisticsAsTable
tauZSaT = arcpy.sa.ZonalStatisticsAsTable(tauZoneData, zoneField, tauRaster, 
                                            tauTable, "DATA", "MEAN")
vhrZSaT = arcpy.sa.ZonalStatisticsAsTable(vhrZoneData, zoneField, vhrRaster,
                                            vhrTable, "DATA", "MEAN")

# convert zonal stats tables to dataframes
tauZonalStats = arcpy.da.TableToNumPyArray(tauZSaT, '*')
vhrZonalStats = arcpy.da.TableToNumPyArray(vhrZSaT, '*')
dfTAU = pd.DataFrame(tauZonalStats)
dfVHR = pd.DataFrame(vhrZonalStats)

# Create a temporary column with the new mapped values
sdfParcel['New_TAU_Occupancy_Rate'] = sdfParcel['APN'].map(dict(zip(dfTAU['apn'], dfTAU['MEAN'])))
sdfParcel['New_VHR_Occupancy_Rate'] = sdfParcel['APN'].map(dict(zip(dfVHR['apn'], dfVHR['MEAN'])))

# Combine the new column with the existing column, preserving existing values where the new values are NaN or 0
sdfParcel['TAU_Occupancy_Rate'] = sdfParcel['New_TAU_Occupancy_Rate'].combine_first(sdfParcel['TAU_Occupancy_Rate'])
sdfParcel['VHR_Occupancy_Rate'] = sdfParcel['New_VHR_Occupancy_Rate'].combine_first(sdfParcel['VHR_Occupancy_Rate'])

# Drop the temporary column
sdfParcel.drop(columns=['New_TAU_Occupancy_Rate'], inplace=True)
sdfParcel.drop(columns=['New_VHR_Occupancy_Rate'], inplace=True)

### Why isnt the zonal stats working for these parcels?? ###

# those APNs to list
tau_apn_list = sdfParcel.loc[(sdfParcel['TAU_Occupancy_Rate'] == 0) & 
                             (sdfParcel['TouristAccommodation_Units'] > 0)]['APN'].tolist()
vhr_apn_list = sdfParcel.loc[(sdfParcel['VHR_Occupancy_Rate'] == 0) & 
                             (sdfParcel['VHR'] == 'Yes')]['APN'].tolist()

# # classify the occupancy rates for those parcels
sdfParcel.loc[sdfParcel['APN'].isin(tau_apn_list), 'TAU_Occupancy_Rate'] = 0.592337
sdfParcel.loc[sdfParcel['APN'].isin(vhr_apn_list), 'VHR_Occupancy_Rate'] = 0.592337

# pickle and save to feature class
outfc = 'sdf_units_attributed_occupancy_interpolated'
# export to feature class
sdfParcel.spatial.to_featureclass(location=os.path.join(gdb, outfc), sanitize_columns=False)
# export to pickle
sdfParcel.to_pickle(parcel_pickle_part3)

In [None]:
# create csv with APN field and List fiedl using tau_apn_list and vhr_apn_list
# dictionary of APN and "TUA" or "VHR"
apn_dict = {apn: 'TAU' for apn in tau_apn_list}
apn_dict.update({apn: 'VHR' for apn in vhr_apn_list})
# create a dataframe from the dictionary
df_apn = pd.DataFrame(list(apn_dict.items()), columns=['APN', 'List'])
# save to csv
df_apn.to_csv('missing_interpolation_zstats_apn_list.csv', index=False)

In [None]:
df_occ

### Campgrounds

> Campground Occupancy

In [None]:
# filter out Bayview Campground from dfCamp
dfCamp = dfCamp.loc[dfCamp['Campground'] != 'Bayview Campground']

# merge campground data with occupancy rate data on campground name
dfCampOcc = sdf_campground.merge(dfCamp, left_on='RECREATION_NAME', right_on='Campground', 
                                      how='left', indicator=True)

# spatial join TAZ data to campground data
arcpy.SpatialJoin_analysis(dfCampOcc, sdf_taz, 'taz_campground', 
                           'JOIN_ONE_TO_ONE', 'KEEP_ALL', 
                           match_option='HAVE_THEIR_CENTER_IN')

# read in output of spatial join as sdf
sdf_campground_taz = pd.DataFrame.spatial.from_featureclass('taz_campground')

# get sites sold by multiplying the number of sites by the occupancy rate
sdf_campground_taz['SitesSold'] = sdf_campground_taz['Total_Sites'] * sdf_campground_taz['Occupancy_Rate']

# group by TAZ and sum of sites sold within TAZ
sdf_campground_taz_grouped = sdf_campground_taz.groupby('TAZ').agg(
                                                {'Total_Sites': 'sum',
                                                'SitesSold': 'sum',
                                                'Occupancy_Rate': 'mean'
                                                 }).reset_index()

# sdf_campground to pickle
sdf_campground_taz_grouped.to_pickle(campground_pickle)


> Interpolation to fill nan if neccesary

In [None]:
# merge campground data with occupancy rate data on campground name
dfCampOcc = sdf_campground.merge(dfCamp, left_on='RECREATION_NAME', right_on='Campground', 
                                      how='left', indicator=True)

# keep only columns of interest
sdf_campground = sdf_campground[['RECREATION_NAME', 'Occupancy_Rate','SHAPE']]

# filter sdf_campground to only campgrounds with occupancy rate data
sdf_campground = sdf_campground[sdf_campground['Occupancy_Rate'].notnull()]

# IDW to get the occupancy rate for each campground
# set the output cell size
cell_size = 500
# set the power parameter
power = 2
# set the search radius
search_radius = 5000
# set the output raster
out_raster = 'campground_occupancy_rate'
# run the IDW
arcpy.sa.Idw(in_features=sdf_campground, 
             z_field='Occupancy_Rate', 
             cell_size=cell_size, 
             power=power, 
             search_radius=search_radius).save(out_raster)

# spatial join to campground points with NaN occupancy rate
sdf_campground_nan = sdf_campground[sdf_campground['Occupancy_Rate'].isnull()]
# spatial join to campground points with NaN occupancy rate

## Overnight Visitation

In [None]:
sdfParcel    = pd.read_pickle(parcel_pickle_part4)
sdfParcel
dfCampground = pd.read_pickle(campground_pickle)
sdfTAZ       = sdf_taz.copy()
overnight_fields = ['taz', 
                    'hotelmotel', # Hotel/Motel total rooms available by TAU_TYPE = hotelmotel
                    'resort',     # Resort total rooms is coming from ### We dont have occupany rates for these ###
                    'casino',     # Casino total rooms available by TAU_TYPE = casino
                    'campground', # Campground total sites avaialble
                    'percentHouseSeasonal', # Percent of houses that are seasonal ### total units- (probability unit is seasonal) / total units ###
                    # 'beach'      # NA
                    ]

df_overnight = pd.DataFrame(columns=overnight_fields)
df_overnight['taz'] = sdfTAZ['TAZ'].astype(int)

df_overnight.campground = df_overnight.taz.map(dict(zip(dfCampground['TAZ'], dfCampground['SitesSold'])))
df_overnight.hotelmotel = sdfParcel.loc[sdfParcel['TAU_TYPE'] == 'HotelMotel'].groupby('TAZ')['TouristAccommodation_Units'].sum()
df_overnight.casino     = sdfParcel.loc[sdfParcel['TAU_TYPE'] == 'Casino'].groupby('TAZ')['TouristAccommodation_Units'].sum()
df_overnight.resort     = sdfParcel.loc[sdfParcel['TAU_TYPE'] == 'Resort'].groupby('TAZ')['TouristAccommodation_Units'].sum()
# df_overnight.percentHouseSeasonal = sdfParcel.groupby('TAZ')['Seasonal'].mean()
df_overnight.fillna(0, inplace=True)
df_overnight.astype(int)
df_overnight.to_pickle(visitor_pickle)

## School Enrollment

In [None]:
## df_school_enrollment is missing meyers elementary, bijou, sierra house, and LTCC and Sierra Nevada College
df_school_enrollment_22 = df_school_enrollment[df_school_enrollment['Year'] == '2022-2023']
# Add a row for LTCC - Lake Tahoe Community College
ltcc = {'School_Name': 'Lake Tahoe Community College', 'Level_': 'College', 'Enrollment': 2909}
df_school_enrollment_22 = pd.concat([df_school_enrollment_22, pd.DataFrame([ltcc])], ignore_index=True)
# join school spatial to school table
sdf_school_enroll = pd.merge(sdf_school, df_school_enrollment, left_on='SchoolID', how='left')

In [None]:
# get a copy of the school data
sdf_school = sdf_school.copy()
# set Type to Null
sdf_school['TYPE'] = None
# set SchoolType to 'elementary' if it contains 'elementary' or 'magnet' or 'academy'
sdf_school.loc[sdf_school['NAME'].str.contains('elementary', case=False), 
               'TYPE'] = 'Elementary School'
# set SchoolType to 'middle' if it contains 'middle'
sdf_school.loc[sdf_school['NAME'].str.contains('middle', case=False), 
               'TYPE'] = 'Middle School'
# set SchoolType to 'high' if it contains 'high'
sdf_school.loc[sdf_school['NAME'].str.contains('high', case=False), 
               'TYPE'] = 'High School'
# set SchoolType to 'college' if it contains 'college'
sdf_school.loc[sdf_school['NAME'].str.contains('college', case=False), 
               'TYPE'] = 'College'
# set School Type to 'other' if it it does not contain any of the above
sdf_school.loc[sdf_school['TYPE'].isnull(), 
               'TYPE'] = 'Elementary School'

# spatial join TAZs to School points, gettting the TAZ for each school
sdf_school_taz = sdf_school.spatial.join(sdf_taz, how='inner')

# group by TYPE and sum of Enrollment within TAZ 
sdf_school_taz_grouped = sdf_school_taz.groupby(['TYPE', 'TAZ']).agg(
                                                {'ENROLLMENT': 'sum'}).reset_index()

# unstack by TYPE as columns and TAZ as a column
sdf_school_taz_grouped_pivot = sdf_school_taz_grouped.pivot(index='TAZ', 
                                                            columns='TYPE', 
                                                            values='ENROLLMENT').reset_index()

# merge to sdf_taz to get all tazs - the join only gets us the tazs with schools
df_school = pd.merge(sdf_taz, sdf_school_taz_grouped_pivot, how='left', on='TAZ')

# rename columns
df_school.rename(columns={'Elementary School':'elementary_school_enrollment',
                          'Middle School':'middle_school_enrollment',
                          'High School':'high_school_enrollment',
                          'College':'college_enrollment'}, inplace=True)

# group by TAZ, sum of enrollment by school type
schools_final = df_school.groupby('TAZ').agg({'elementary_school_enrollment':'sum',
                                              'middle_school_enrollment':'sum',
                                              'high_school_enrollment':'sum',
                                              'college_enrollment':'sum'}).reset_index()

# fields to integer and fill na with 0
schools_final = schools_final.fillna(0).astype(int)
# to pickle
schools_final.to_pickle(school_pickle)
# to csv
schools_final.to_csv(os.path.join(out_dir,'SchoolEnrollment.csv'), index=False)

## Socio Econ

In [10]:
# Get relevant census variables and calculate rates at block group level
# Get Occupancy Data - B25002_003E = Vacant, B25002_002E = Occupied , B25004_006E = Vacant Seasonal
occupancy_codes = ['B25002_003E','B25002_002E', 'B25004_006E']
df_census_occupancy = df_census_2022[df_census_2022['variable_code'].isin(occupancy_codes)]
df_census_occupancy = df_census_occupancy[['TRPAID', 'variable_code', 'value']]
# pivot to wide format so we can calculate percentages and totals
df_census_occupancy = df_census_occupancy.pivot(index='TRPAID', columns='variable_code', values='value').reset_index()
# vacant units + occupied units = total units
df_census_occupancy['total_units'] = df_census_occupancy['B25002_003E'] + df_census_occupancy['B25002_002E']
# occupancy rate = occupied units / total units
df_census_occupancy['occupancy_rate'] = df_census_occupancy['B25002_002E'] / df_census_occupancy['total_units']
# seasonal rate = seasonal units / total units
df_census_occupancy['seasonal_rate'] = df_census_occupancy['B25004_006E'] / df_census_occupancy['total_units']


In [11]:
# Get Household Size Data - B25010_001E = Total Households
df_census_household_size = df_census_2022[df_census_2022['variable_code'] == 'B25010_001E']
df_census_household_size = df_census_household_size[['TRPAID', 'variable_code', 'value']]
df_census_household_size = df_census_household_size.pivot(index='TRPAID', columns='variable_code', values='value').reset_index()
df_census_household_size['household_size'] = df_census_household_size['B25010_001E']

In [12]:
# List of Codes by the category they fall into - Census categroy to broader category
code_lookup = pd.read_csv('Lookup_Lists/income_census_codes.csv')
#Filter census so only variable codes in the code lookup are included
df_census_income = df_census_2022[df_census_2022['variable_code'].isin(code_lookup['variable_code'])]
#Create a new column that has a value from code lookup based on the variable code
df_census_income['income_category'] = df_census_income['variable_code'].map(code_lookup.set_index('variable_code')['category'])
#group by block group and income category and sum the values
df_census_income = df_census_income.groupby(['TRPAID','income_category'])['value'].sum().reset_index()
df_census_income = df_census_income.pivot(index='TRPAID', columns='income_category', values='value').reset_index()

In [13]:
# TRPAID is a 16 digit ID, but it is imported as a float. Convert to string and to retain leading zeros
df_census_household_size['TRPAID']= df_census_household_size['TRPAID'].astype(str).str.zfill(16)
df_census_income['TRPAID']= df_census_income['TRPAID'].astype(str).str.zfill(16)
# merge all the census data together
df_census_occupancy_all = pd.merge(df_census_occupancy, df_census_household_size, on='TRPAID', how='left')
df_census_all = pd.merge(df_census_occupancy_all, df_census_income, on='TRPAID', how='left')
# rename columns of df_census_all
column_rename = {
    'B25002_003E': 'vacant_units',
    'B25002_002E': 'occupied_units',
    'B25004_006E': 'seasonal_units',
    'High Income': 'high_income',
    'Low Income': 'low_income',
    'Medium Income': 'middle_income',
}
df_census_all.rename(columns=column_rename, inplace=True)

df_census_all.drop(columns=['B25010_001E'], inplace=True)
# calculate proportions of income categories
df_census_all['high_income_proportion'] = df_census_all['high_income'] / df_census_all['occupied_units']
df_census_all['middle_income_proportion'] = df_census_all['middle_income'] / df_census_all['occupied_units']
df_census_all['low_income_proportion'] = df_census_all['low_income'] / df_census_all['occupied_units']

In [15]:
# get pickle part 3
sdfParcel = pd.read_pickle(parcel_pickle_part3)

# map values from Census data to parcel data via left_on BLOCK_GROUP and right_on TRPAID
sdfParcel.PrimaryResidence_Rate   = sdfParcel.BLOCK_GROUP.map(df_census_all.set_index('TRPAID')['occupancy_rate'])
sdfParcel.SecondaryResidence_Rate = sdfParcel.BLOCK_GROUP.map(df_census_all.set_index('TRPAID')['seasonal_rate'])
sdfParcel.HighIncome_Rate         = sdfParcel.BLOCK_GROUP.map(df_census_all.set_index('TRPAID')['high_income_proportion'])
sdfParcel.MediumIncome_Rate       = sdfParcel.BLOCK_GROUP.map(df_census_all.set_index('TRPAID')['middle_income_proportion'])
sdfParcel.LowIncome_Rate          = sdfParcel.BLOCK_GROUP.map(df_census_all.set_index('TRPAID')['low_income_proportion'])
sdfParcel.PersonsPerUnit          = sdfParcel.BLOCK_GROUP.map(df_census_all.set_index('TRPAID')['household_size'])

# seasonal rate calculation
# group by BLOCK_GROUP
# filter sdfParcel where VHR   = 'Yes'
vhrs = sdfParcel.loc[sdfParcel['VHR']=='Yes']
totalRes = sdfParcel.groupby('BLOCK_GROUP').agg({'Residential_Units':'sum', 'PrimaryResidence_Rate':'mean', 'SecondaryResidence_Rate':'mean'}).reset_index()
totalVHR = vhrs.groupby('BLOCK_GROUP').agg({'Residential_Units':'sum'}).reset_index()
totalVHR.rename(columns={'Residential_Units':'VHR_Units'}, inplace=True)

# merge totalRes and totalVHR
totalResVHR = pd.merge(totalRes, totalVHR, on='BLOCK_GROUP', how='left')
# fill NA with 0
totalResVHR.VHR_Units = totalResVHR.VHR_Units.fillna(0)
# calculate seasonal rate
totalResVHR['non_vhr_units'] = totalResVHR['Residential_Units'] - totalResVHR['VHR_Units']
# calculate the non-adjusted number of seasonal units and then subtract the number of VHRs
totalResVHR['non_adjusted_seasonal_units'] = totalResVHR['SecondaryResidence_Rate'] * totalResVHR['Residential_Units']
totalResVHR['non_primary_residence_units'] = totalResVHR['Residential_Units']-(totalResVHR['PrimaryResidence_Rate'] * totalResVHR['Residential_Units'])
totalResVHR['adjusted_seasonal_units']     = totalResVHR['non_adjusted_seasonal_units'] - totalResVHR['VHR_Units']
# Manually adjust the seasonal units for block group 3200500170022020 because of a lag in the data
# The census reports 100% occupancy but I think it has to do with the beach club development
totalResVHR.loc[totalResVHR['BLOCK_GROUP'] == '3200500170022020', 'adjusted_seasonal_units'] = 0
# calculate the adjusted seasonal rate
totalResVHR['adjusted_seasonal_rate'] = totalResVHR['adjusted_seasonal_units'] / totalResVHR['non_primary_residence_units']

# map the adjusted seasonal rate to the parcel data
sdfParcel['SecondaryResidence_Rate'] = sdfParcel['BLOCK_GROUP'].map(totalResVHR.set_index('BLOCK_GROUP')['adjusted_seasonal_rate'])

# export to pickle part 4
sdfParcel.to_pickle(parcel_pickle_part4)
sdfParcel.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 61259 entries, 0 to 61258
Data columns (total 26 columns):
 #   Column                      Non-Null Count  Dtype   
---  ------                      --------------  -----   
 0   APN                         61259 non-null  string  
 1   Residential_Units           61185 non-null  Int32   
 2   TouristAccommodation_Units  61175 non-null  Int32   
 3   CommercialFloorArea_SqFt    61259 non-null  Float64 
 4   RoomsRented_PerDay          0 non-null      float64 
 5   VHR_Occupancy_Rate          61259 non-null  float64 
 6   TAU_Occupancy_Rate          61259 non-null  float64 
 7   PrimaryResidence_Rate       61087 non-null  float64 
 8   SecondaryResidence_Rate     61087 non-null  Float64 
 9   HighIncome_Rate             61087 non-null  float64 
 10  MediumIncome_Rate           61087 non-null  float64 
 11  LowIncome_Rate              61087 non-null  float64 
 12  PersonsPerUnit              61087 non-null  float64 
 13  TAU_TYPE        

## Employment 

In [None]:
# top line employment data for NV from 2018 lives here: ????
# we got employment data from NV at the Tahoe Basin level by NAICS code....

# get the employment data
nv_employ = pd.read_csv('')

In [None]:
# WE HAVE GOOD DATA FOR CASINO EMPLOYMENT on the South Shore ## 
# for employment data we have multiple years of CA EDD data
# california employment development department data for 2018 and 2022 was transformed to a feature class and spatial joined to TAZs and Block Group
# exported to a csv
# stacekd data by temporal scale
# grouped by TAZ and NAICS code, and summed employment
# F:\GIS\PROJECTS\ResearchAnalysis\Employment\Data\EDD_Grouped
# F:\GIS\PROJECTS\ResearchAnalysis\Employment\
# then looking at difference of total and trends over time (month-month) and year over year
#
# LODES data https://maps.trpa.org/server/rest/services/LTinfo_Climate_Resilience_Dashboard/MapServer/142

# compare 2018 to 2022 by block group 

# checking trends of each. 
#  - what is the trend of employment by NAICS code
#  - what is the trend of employment by TAZ
#  - what is the trend of employment by block group
#  - what is the trend of employment by zip code
# 
# CBP data for 2018 and 2022
# data is mostly in the service. or in Vector.sde>Census>Jobs
# look at comparisons of trends by same geography and temporal scale

# workflow is to get the data, clean it, join it to the spatial data, then group by the spatial data and sum the employment
# 
# establish trends for CA for the three datasources...compare the trends and see if they are similar
# 

### NAICS codes are one order higher in LODES data, CA EDD and CBP data have the same granularity of NAICS codes
### LODES is by year so the trend might be different if there is a sesaonal component to the data

# we'll have two of the three datasets analyzed for Nevada and all three in California.
    # where we have all three datasets we'll compare the trends and see if they are similar
    # we'll look at the trends for each dataset and see if they are similar
    # we'll look at the trends for each geography and see if they are similar
    # we'll look at the trends for each temporal scale and see if they are similar

# For Nevada we have block level data for 2018 so if consistent with 2022 we can use that as a proxy for 2022

# we subtract out any known employment from the 2018 data (e.g. Lakeside Inn) and compare the trends
# generate adjustment factors by sector and apply those adjustments to the 2018 data that was aggregated to the TAZ level.

## TAZ Summary

> Aggregations

### Get Data Again...

In [16]:
# get dataframe copies
sdfTAZ = sdf_taz.copy()
dfCamp = dfCamp.copy()
# filter out Bayview Campground from dfCamp
dfCamp = dfCamp.loc[dfCamp['Campground'] != 'Bayview Campground']
# read in the pickles 
sdfParcel    = pd.read_pickle(parcel_pickle_part4)
dfVisitor    = pd.read_pickle(visitor_pickle)
dfCampground = pd.read_pickle(campground_pickle)
dfSchool     = pd.read_pickle(school_pickle)
# sdfSocio  = pd.read_pickle(socioeconomic_pickle)
# sdfEmploy = pd.read_pickle(employment_pickle)
dfOccupancy = pd.read_pickle(occupancy_rates_pickle)


In [17]:
dfOccupancy.to_csv(os.path.join(data_dir,'occupancy_rates.csv'), index=False)

### Explore which fields we need

In [18]:
# list all csv files in the data directory
csv_files = list(out_dir.glob('*.csv'))
csv_files
# read in summary files as dataframes and get columns to lists
dfOverNight  = pd.read_csv(out_dir/'OvernightVisitorZonalData_Summer.csv')
dfEmployment = pd.read_csv(out_dir/'Employment.csv')
dfSchoolEnrl = pd.read_csv(out_dir/'SchoolEnrollment.csv')
dfSocioEcon  = pd.read_csv(out_dir/'SocioEcon_Summer.csv')
dfVisitor    = pd.read_csv(out_dir/'VisitorOccupancyRates_Summer.csv')
dfInputs     = pd.read_csv(out_dir/'inputs_summarized.csv')

# get lists of columns
overnight_fields  = dfOverNight.columns.tolist()
employment_fields = dfEmployment.columns.tolist()
school_fields     = dfSchoolEnrl.columns.tolist()
socio_fields      = dfSocioEcon.columns.tolist()
visitor_fields    = dfVisitor.columns.tolist()
inputs_fields     = dfInputs.columns.tolist()
# print the lists
overnight_fields, employment_fields, school_fields, socio_fields, visitor_fields, inputs_fields


(['taz',
  'hotelmotel',
  'resort',
  'casino',
  'campground',
  'percentHouseSeasonal'],
 ['TAZ', 'emp_other', 'emp_rec', 'emp_retail', 'emp_srvc', 'emp_gaming'],
 ['TAZ',
  'elementary_school_enrollment',
  'middle_school_enrollment',
  'high_school_enrollment',
  'college_enrollment'],
 ['taz',
  'total_residential_units',
  'census_occ_rate',
  'total_occ_units',
  'occ_units_low_inc',
  'occ_units_med_inc',
  'occ_units_high_inc',
  'persons_per_occ_unit',
  'total_persons',
  'emp_retail',
  'emp_srvc',
  'emp_rec',
  'emp_game',
  'emp_other'],
 ['taz', 'hotelmotel', 'resort', 'casino', 'campground', 'house', 'seasonal'],
 ['category',
  'RTP_20_base_year_2018',
  'RTP_17_base_year_2014',
  'RTP_24_base_year_2022'])

In [19]:
overnight_fields = ['taz', 
                    'hotelmotel', # Hotel/Motel total rooms available by TAU_TYPE = hotelmotel
                    'resort',     # Resort total rooms is coming from ### We dont have occupany rates for these ###
                    'casino',     # Casino total rooms available by TAU_TYPE = casino
                    'campground', # Campground total sites avaialble
                    'percentHouseSeasonal', # Percent of houses that are seasonal ### total units- (probability unit is seasonal) / total units ###
                    'beach'      # NA
                    ]

employment_fields = ['TAZ', 
                    'emp_other', # Other employment equals total employees in NAICS codes
                    'emp_rec',   # Recreation employment
                    'emp_retail',# Retail employment
                    'emp_srvc',  # Service employment
                    'emp_gaming' # Gaming employment
                    ]

school_fields     = ['taz',
                    'elementary_school_enrollment', # Elementary school enrollment
                    'middle_school_enrollment',     # Middle school enrollment
                    'high_school_enrollment',       # High school enrollment
                    'college_enrollment'            # College enrollment
                    ]

socio_fields      = ['taz',
                    'total_residential_units', # Total residential units in parcels
                    'census_occ_rate',         # Census occupancy rate 
                    'total_occ_units',         # Total occupied units 
                    'occ_units_low_inc',       # Low income occupied units
                    'occ_units_med_inc',       # Medium income occupied units
                    'occ_units_high_inc',      # High income occupied units
                    'persons_per_occ_unit',    # Persons per occupied unit
                    'total_persons',           # Total persons
                    'emp_retail',              # Retail employment
                    'emp_srvc',                # Service employment
                    'emp_rec',                 # Recreation employment
                    'emp_game',                # Gaming employment
                    'emp_other'                # Other employment
                    ]

visitor_fields    = ['taz', 
                    'hotelmotel',    # Hotel/Motel rooms
                    'resort',        # Resort rooms
                    'casino',        # Casino rooms
                    'campground',    # Campground sites sold?
                    'house',         # House units
                    'seasonal'       # Seasonal units
                    ]

### Input File Creation

In [20]:
# get the totals at the parcel level
sdfParcel['OccupiedUnits']   = sdfParcel.Residential_Units * sdfParcel.PrimaryResidence_Rate
sdfParcel['UnoccupiedUnits'] = sdfParcel.Residential_Units - sdfParcel.OccupiedUnits
sdfParcel['SeasonalUnits']   = sdfParcel.Residential_Units * sdfParcel.SecondaryResidence_Rate
sdfParcel['HighUnits']       = sdfParcel.OccupiedUnits * sdfParcel.HighIncome_Rate
sdfParcel['MediumUnits']     = sdfParcel.OccupiedUnits * sdfParcel.MediumIncome_Rate
sdfParcel['LowUnits']        = sdfParcel.OccupiedUnits * sdfParcel.LowIncome_Rate
sdfParcel['People']          = sdfParcel.OccupiedUnits * sdfParcel.PersonsPerUnit    

# group by TAZ and sum of units
sdfParcel_grouped = sdfParcel.groupby('TAZ').agg(
                                                {'Residential_Units':'sum', 
                                                 'OccupiedUnits':'sum', 
                                                 'SeasonalUnits':'sum', 
                                                 'HighUnits':'sum', 
                                                 'MediumUnits':'sum', 
                                                 'LowUnits':'sum',
                                                 'PersonsPerUnit':'mean',
                                                 'People':'sum',
                                                 'TAU_Occupancy_Rate': 'mean'}).reset_index()

# calculate the seasonal rate and occupied rate
sdfParcel_grouped['SeasonalRate'] = sdfParcel_grouped['SeasonalUnits'] / sdfParcel_grouped['Residential_Units']
sdfParcel_grouped['OccupiedRate'] = sdfParcel_grouped['OccupiedUnits'] / sdfParcel_grouped['Residential_Units']
# sdfParcel_grouped['UnoccupiedRate']= 1 - sdfParcel_grouped['OccupiedRate']

# # .loc for all SeasonalRate NaN and = 0
# sdfParcel_grouped_units.loc[sdfParcel_grouped_units['SeasonalRate']==np.nan, 'SeasonalRate'] = 0
# sdfParcel_grouped_units.loc[sdfParcel_grouped_units['OccupiedRate']==np.nan, 'OccupiedRate'] = 0

# cast SeasonalRate and OccupiedRate to float
sdfParcel_grouped.SeasonalRate = sdfParcel_grouped.SeasonalRate.astype(float)
sdfParcel_grouped.OccupiedRate = sdfParcel_grouped.OccupiedRate.astype(float)
# # fill NA with 0
# df1 = sdfParcel_grouped.SeasonalRate.fillna(0, inplace=True)
# df1 = sdfParcel_grouped.OccupiedRate.fillna(0, inplace=True)
sdfParcel_grouped


Unnamed: 0,TAZ,Residential_Units,OccupiedUnits,SeasonalUnits,HighUnits,MediumUnits,LowUnits,PersonsPerUnit,People,TAU_Occupancy_Rate,SeasonalRate,OccupiedRate
0,9,200,48.757396,177.505009,7.810651,0.0,40.946746,2.11,102.878107,0.067268,0.887525,0.243787
1,10,22,10.80403,21.406706,2.462073,1.814346,6.527611,2.320714,28.883211,0.027761,0.973032,0.491092
2,11,226,55.095858,200.58066,8.826036,0.0,46.269822,2.11,116.25226,0.011901,0.887525,0.243787
3,12,1134,652.090031,571.718523,138.388804,130.24434,383.456887,2.382725,1564.955131,0.002472,0.504161,0.575035
4,13,428,247.532058,344.535556,48.295129,45.950468,153.286461,2.661333,649.057862,0.015333,0.80499,0.578346
5,14,308,165.388961,231.548044,76.422265,4.004926,84.96177,2.615717,432.315125,0.0,0.751779,0.536977
6,15,283,145.977848,277.787927,33.733122,25.672996,86.57173,2.7,394.14019,0.000695,0.981583,0.515823
7,16,4,2.063291,3.926331,0.476793,0.362869,1.223629,2.7,5.570886,0.0,0.981583,0.515823
8,17,0,0.0,0.0,0.0,0.0,0.0,2.601667,0.0,0.0,,
9,18,314,173.038849,280.03299,42.469065,65.96259,64.607194,2.75,475.856835,0.003419,0.891825,0.551079


In [None]:
# percent House Seasonal 
# What is this?
# total residential units - total occupied units / total residential units
# total VHRs

# vacant seasonal B25002_006
# vacant total    B25002_003

# filter out VHR = 'Yes'
 
sdfParcel_NoVHR = sdfParcel.loc[sdfParcel['VHR'] == 'No']
sdfParcel_NoVHR['OccupiedUnits'] = sdfParcel_NoVHR.Residential_Units * sdfParcel_NoVHR.PrimaryResidence_Rate
sdfParcel_NoVHR['SeasonalUnits'] =sdfParcel_NoVHR['Residential_Units'] * sdfParcel_NoVHR['SecondaryResidence_Rate']
# get percent house seasonal
sdfParcel_NoVHR['PercentHouseSeasonal'] = sdfParcel_NoVHR['SeasonalUnits'] / (sdfParcel_NoVHR['Residential_Units']-sdfParcel_NoVHR['OccupiedUnits'])

# filter out NA and 0 from PercentHouseSeasonal
sdfParcel_NoVHR['PercentHouseSeasonal'] = sdfParcel_NoVHR['PercentHouseSeasonal'].astype(float)
sdfParcel_NoVHR['PercentHouseSeasonal'] = sdfParcel_NoVHR['PercentHouseSeasonal'].fillna(0)

sdfParcel_NoVHR.loc[sdfParcel_NoVHR['PercentHouseSeasonal'] < 0, 'PercentHouseSeasonal'] = 0

# group by TAZ and get mean of PercentHouseSeasonal
sdfParcel_grouped_percent = sdfParcel_NoVHR.groupby('TAZ').agg(
                                                {'PercentHouseSeasonal':'mean'}).reset_index()
# fill NA with 0
sdfParcel_grouped_percent['PercentHouseSeasonal'] = sdfParcel_grouped_percent['PercentHouseSeasonal'].astype(float)
sdfParcel_grouped_percent['PercentHouseSeasonal'] = sdfParcel_grouped_percent['PercentHouseSeasonal'].fillna(0)

sdfParcel_grouped_percent.mean()


> Generate 'OvernightVisitorZonalData_Summer.csv'

In [21]:
# create the overnight visitor dataframe, pickle and csv
overnight_fields = ['taz', 
                    'hotelmotel', # Hotel/Motel total rooms available by TAU_TYPE = hotelmotel
                    'resort',     # Resort total rooms is coming from ### We dont have occupany rates for these ###
                    'casino',     # Casino total rooms available by TAU_TYPE = casino
                    'campground', # Campground total sites avaialble
                    'percentHouseSeasonal', # Percent of houses that are seasonal ### total units- (probability unit is seasonal) / total units ###
                    # 'beach'      # NA
                    ]

df_overnight = pd.DataFrame(columns=overnight_fields)
df_overnight['taz'] = sdfTAZ['TAZ'].astype(int)

df_overnight.campground           = df_overnight.taz.map(dict(zip(dfCampground['TAZ'], dfCampground['Total_Sites'])))
df_overnight.hotelmotel           = sdfParcel.loc[sdfParcel['TAU_TYPE'] == 'HotelMotel'].groupby('TAZ')['TouristAccommodation_Units'].sum()
df_overnight.casino               = sdfParcel.loc[sdfParcel['TAU_TYPE'] == 'Casino'].groupby('TAZ')['TouristAccommodation_Units'].sum()
df_overnight.resort               = sdfParcel.loc[sdfParcel['TAU_TYPE'] == 'Resort'].groupby('TAZ')['TouristAccommodation_Units'].sum()
df_overnight.percentHouseSeasonal = df_overnight.taz.map(dict(zip(sdfParcel_grouped['TAZ'], sdfParcel_grouped['SeasonalRate'])))

# fill NA with 0
df_overnight.fillna(0, inplace=True)
df_overnight.astype(int)
df_overnight.to_pickle(visitor_pickle)
df_overnight.to_csv(os.path.join(out_dir,'OvernightVisitorZonalData_Summer.csv'), index=False)

> Generate 'SocioEcon_Summer.csv'

In [22]:
# create the socio-economic dataframe, pickle and csv
socio_fields      = ['taz',
                    'total_residential_units', # Total residential units in parcels
                    'census_occ_rate',         # Census occupancy rate 
                    'total_occ_units',         # Total occupied units 
                    'occ_units_low_inc',       # Low income occupied units
                    'occ_units_med_inc',       # Medium income occupied units
                    'occ_units_high_inc',      # High income occupied units
                    'persons_per_occ_unit',    # Persons per occupied unit
                    'total_persons',           # Total persons
                    'emp_retail',              # Retail employment
                    'emp_srvc',                # Service employment
                    'emp_rec',                 # Recreation employment
                    'emp_game',                # Gaming employment
                    'emp_other'                # Other employment
                    ]

df_socio = pd.DataFrame(columns=socio_fields)
df_socio['taz'] = sdfTAZ['TAZ'].astype(int)
df_socio.total_residential_units = df_socio.taz.map(dict(zip(sdfParcel_grouped['TAZ'], sdfParcel_grouped['Residential_Units'])))
df_socio.census_occ_rate         = df_socio.taz.map(dict(zip(sdfParcel_grouped['TAZ'], sdfParcel_grouped['OccupiedRate'])))
df_socio.total_occ_units         = df_socio.taz.map(dict(zip(sdfParcel_grouped['TAZ'], sdfParcel_grouped['OccupiedUnits'])))
df_socio.occ_units_low_inc       = df_socio.taz.map(dict(zip(sdfParcel_grouped['TAZ'], sdfParcel_grouped['LowUnits'])))
df_socio.occ_units_med_inc       = df_socio.taz.map(dict(zip(sdfParcel_grouped['TAZ'], sdfParcel_grouped['MediumUnits'])))
df_socio.occ_units_high_inc      = df_socio.taz.map(dict(zip(sdfParcel_grouped['TAZ'], sdfParcel_grouped['HighUnits'])))
df_socio.persons_per_occ_unit    = df_socio.taz.map(dict(zip(sdfParcel_grouped['TAZ'], sdfParcel_grouped['PersonsPerUnit'])))
df_socio.total_persons           = df_socio.taz.map(dict(zip(sdfParcel_grouped['TAZ'], sdfParcel_grouped['People'])))

# df_socio.emp_retail              = df_socio.taz.map(dict(zip(dfEmployment['TAZ'], dfEmployment['emp_retail'])))
# df_socio.emp_srvc                = df_socio.taz.map(dict(zip(dfEmployment['TAZ'], dfEmployment['emp_srvc'])))
# df_socio.emp_rec                 = df_socio.taz.map(dict(zip(dfEmployment['TAZ'], dfEmployment['emp_rec'])))
# df_socio.emp_game                = df_socio.taz.map(dict(zip(dfEmployment['TAZ'], dfEmployment['emp_game'])))
# df_socio.emp_other               = df_socio.taz.map(dict(zip(dfEmployment['TAZ'], dfEmployment['emp_other'])))

df_socio.fillna(0, inplace=True)
df_socio.to_pickle(visitor_pickle)
df_socio.to_csv(os.path.join(out_dir,'SocioEcon_Summer.csv'), index=False)

> Generate 'Employment.csv'

In [23]:
# create the employment dataframe, pickle and csv
employment_fields = ['TAZ', 
                    'emp_other', # Other employment equals total employees in NAICS codes
                    'emp_rec',   # Recreation employment
                    'emp_retail',# Retail employment
                    'emp_srvc',  # Service employment
                    'emp_gaming' # Gaming employment
                    ]
# create the employment dataframe
df_employ = pd.DataFrame(columns=employment_fields)
# set the field values
df_employ['taz'] = sdfTAZ['TAZ'].astype(int)
df_employ.emp_other = df_employ.taz.map(dict(zip(dfEmployment['TAZ'], dfEmployment['emp_other'])))
df_employ.emp_rec   = df_employ.taz.map(dict(zip(dfEmployment['TAZ'], dfEmployment['emp_rec'])))
df_employ.emp_retail= df_employ.taz.map(dict(zip(dfEmployment['TAZ'], dfEmployment['emp_retail'])))
df_employ.emp_srvc  = df_employ.taz.map(dict(zip(dfEmployment['TAZ'], dfEmployment['emp_srvc'])))
df_employ.emp_gaming= df_employ.taz.map(dict(zip(dfEmployment['TAZ'], dfEmployment['emp_gaming'])))
# fill NA with 0
df_employ.fillna(0, inplace=True)
# save to pickle and csv
df_employ.to_pickle(employment_pickle)
df_employ.to_csv(os.path.join(out_dir,'Employment.csv'), index=False)


> Generate 'VisitorOccupancyRates_Summer.csv'

In [24]:
# create the visitor dataframe, pickle and csv
visitor_occupany_fields  = ['taz', 
                            'hotelmotel',    # Hotel/Motel rooms
                            'resort',        # Resort rooms
                            'casino',        # Casino rooms
                            'campground',    # Campground sites sold?
                            'house',         # House units
                            'seasonal'       # Seasonal units
                            ]
# create the visitor dataframe
df_visitor_occ = pd.DataFrame(columns=visitor_occupany_fields)
# set the field values
df_visitor_occ['taz']     = sdfTAZ['TAZ'].astype(int)
df_visitor_occ.hotelmotel = sdfParcel.loc[sdfParcel['TAU_TYPE'] == 'HotelMotel'].groupby('TAZ')['TAU_Occupancy_Rate'].mean()
df_visitor_occ.resort     = sdfParcel.loc[sdfParcel['TAU_TYPE'] == 'Resort'].groupby('TAZ')['TAU_Occupancy_Rate'].mean()
df_visitor_occ.casino     = sdfParcel.loc[sdfParcel['TAU_TYPE'] == 'Casino'].groupby('TAZ')['TAU_Occupancy_Rate'].mean()
df_visitor_occ.campground = df_visitor_occ.taz.map(dict(zip(dfCampground['TAZ'], dfCampground['Total_Sites'])))   
df_visitor_occ.house      = df_visitor_occ.taz.map(dict(zip(sdfParcel_grouped['TAZ'], sdfParcel_grouped['OccupiedRate'])))
df_visitor_occ.seasonal   = df_visitor_occ.taz.map(dict(zip(sdfParcel_grouped['TAZ'], sdfParcel_grouped['SeasonalRate'])))
# fill NA with 0
df_visitor_occ.fillna(0, inplace=True)
# save to pickle and csv
df_visitor_occ.to_pickle(visitor_pickle)
df_visitor_occ.to_csv(os.path.join(out_dir,'VisitorOccupancyRates_Summer.csv'), index=False)

#### Basin Summary
> update and compare to 'RTP_20_base_year_2018', 'RTP_17_base_year_2014' in TravelDemandModel\2022\data\processed_data\inputs_summarized.csv


In [25]:
# set the field values
value_dict = {
'lodging occupancy rate'   :(sdfParcel.loc[sdfParcel['TAU_TYPE'].isin(['HotelMotel','Resort','Casino'])]['TAU_Occupancy_Rate'].mean()),
'campground occupancy rate':dfCamp['Occupancy_Rate'].mean(),
'house(VHR) rate'          :(sdfParcel_grouped['SeasonalUnits'].sum() / sdfParcel_grouped['Residential_Units'].sum()).mean(),
'seasonal rate'            :(sdfParcel_grouped['SeasonalUnits'].sum() / sdfParcel_grouped['Residential_Units'].sum()).mean(),
'lodging unit'             :sdfParcel.TouristAccommodation_Units.sum(),
'campground'               :dfCampground.Total_Sites.sum(),
'percentHouseSeasonal'     :(sdfParcel_grouped['SeasonalUnits'].sum() / sdfParcel_grouped['Residential_Units'].sum()).mean(),
'school enrollment'        :(dfSchool['elementary_school_enrollment'] + dfSchool['middle_school_enrollment'] + dfSchool['high_school_enrollment'] + dfSchool['college_enrollment']).sum(),
'employment'               :0, # need to get this data
'residential unit'         :sdfParcel.Residential_Units.sum(),
'total persons'            :sdfParcel_grouped['People'].sum(),
'census occupancy rate'    :(sdfParcel_grouped['OccupiedUnits'].sum() / sdfParcel_grouped['Residential_Units'].sum()).mean(),
'low income res unit'      :sdfParcel_grouped['LowUnits'].sum(),
'medium income res unit'   :sdfParcel_grouped['MediumUnits'].sum(),
'high income res unit'     :sdfParcel_grouped['HighUnits'].sum(),
'total occupied unit'      :sdfParcel_grouped['OccupiedUnits'].sum(),
'persons per occupied unit':(sdfParcel_grouped['People'].sum() / sdfParcel_grouped['OccupiedUnits'].sum()).mean(),
}

# get inputs_summarized.csv as a dataframe
dfInputs = pd.read_csv(os.path.join(out_dir,'inputs_summarized copy.csv'))
# add column to the dataframe 'RTP_24_base_year_2022'
dfInputs['RTP_24_base_year_2022'] = 0
# use dictionary to map values to dfInputs['RTP_24_base_year_2022']
dfInputs['RTP_24_base_year_2022'] = dfInputs['category'].map(value_dict)
# round all values to 2 decimal places
dfInputs = dfInputs.round(2)
# drop column 'Unnamed: 0'
dfInputs.drop(columns=['Unnamed: 0'], inplace=True)
# save to csv
dfInputs.to_csv(os.path.join(out_dir,'inputs_summarized.csv'), index=False)
# save to pickle
dfInputs.to_pickle(summary_pickle)
dfInputs


Unnamed: 0,category,RTP_20_base_year_2018,RTP_17_base_year_2014,RTP_24_base_year_2022
0,lodging occupancy rate,0.61,0.64,0.48
1,campground occupancy rate,,0.78,0.59
2,house(VHR) rate,0.38,0.44,0.71
3,seasonal rate,0.38,0.44,0.71
4,lodging unit,11107.0,10575.0,11610.0
5,campground,2104.0,2465.0,1964.0
6,percentHouseSeasonal,0.71,0.79,0.71
7,school enrollment,8667.0,9652.0,9089.0
8,employment,28053.5,26367.0,0.0
9,residential unit,47645.0,47540.0,48799.0


## Forecasts

In [None]:
# total_residential_units - base_2018 
# forecast will be for 2040 and 2050 
# rate of development will be based on the current rate of development from the last 12 years (back to 2012)
    # current rate will not get us to full build out and will be adjusted to get to full build out by 2050
# total_residential_units = base_2018 + (rate_of_development * (2040 - 2018))

# forecast max build out will be 2050
#  still going to build out all the residential units and then revisit how conversions of TAUs and CFA will be handled

# GIS exercise of where the new residential units will be built
# 1. get the land use data and see if we can get the residential units on vacant and underbuilt parcels

# For TAUs and CFA we only built out what was in the pipeline

# Total Occupied Units = Total Residential Units - Vacant Units
    # based on block group rate and TAZ crosswalk assigned to Parcel level units
# Occupied Units by Income Level = Total Occupied Units * % of Income Level in Block Group
    # based on block group rate and TAZ crosswalk assigned to Parcel level units

# Lodging Occupany Rates by Tax Rate Zone
    # Air DNA? for VHR occupancy rates
    # Seasonal Units will be based on the % of seasonal units in the block group?

# Adjusted occupancy rates for Residential units to be based on population change to decennial census
    # double check total persons in the model against the decennial census population and then apply the rate?

# use adjusted ACS numbers to make all the input factors match the same source
    # use the 2022 ACS data at the Basin level for all the input factors
        # Block Group level data will be to noisy and not as accurate as the Basin level data

# forecast growth at the Basin level and show some population growth...
    # out year will be 2050 and show the growth in the model at 0.5% per year
    # show the growth in the model at 1.0% per year? or use the decennial census growth rate?
        # which was 0.04% per year from 2010 to 2020 annualized

# show the growth in the model at 0.004% per year? or use the decennial census growth rate?
    # adding 3000 units of affordable housing in the model and get 6,000 person increase in population

# 
