## Intro

> Setup

In [None]:
# import packages
import pandas as pd
import pathlib
import os
import arcpy
from utils import *
import numpy as np
import pickle

# pandas options
pd.options.mode.copy_on_write = True
pd.options.mode.chained_assignment = None
pd.options.display.max_columns = 999
pd.options.display.max_rows = 999

# current working directory
local_path = pathlib.Path().absolute()
# set data path as a subfolder of the current working directory TravelDemandModel\2022\
data_dir = local_path.parents[0] / 'data/raw_data'
# folder to save processed data
out_dir  = local_path.parents[0] / 'data/processed_data'
# workspace gdb for stuff that doesnt work in memory
gdb = os.path.join(local_path,'Workspace.gdb')
# set environement workspace to in memory 
arcpy.env.workspace = 'memory'
# clear memory workspace
arcpy.management.Delete('memory')
# overwrite true
arcpy.env.overwriteOutput = True
# Set spatial reference to NAD 1983 UTM Zone 10N
sr = arcpy.SpatialReference(26910)

# schema for the final output
final_schema = ['APN', 'Residential_Units', 'TouristAccommodation_Units', 'CommercialFloorArea_SqFt',
                'RoomsRented_PerDay', 'VHR_Lodging_Occupancy_Rate','TAU_Lodging_Occupancy_Rate', 
                'PrimaryResidence_Rate', 'SecondaryResidence_Rate',
                'HighIncome_Rate',	'MediumIncome_Rate', 'LowIncome_Rate', 'PersonsPerUnit',
                'TAU_TYPE', 'VHR', 'BLOCK_GROUP', 'TAZ', 'OCCUPANCY_ZONE', 
                'JURISDICTION', 'COUNTY', 'OWNERSHIP_TYPE','EXISTING_LANDUSE', 'WITHIN_TRPA_BNDY', 
                'PARCEL_ACRES', 'PARCEL_SQFT', 'SHAPE']

# pickle variables for each part
# part 1 - spatial join categories, occupancy rates, and parcels
sdfParcel_pickle_part1 = out_dir / 'sdfParcel_pickle1.pkl'
# part 2 - spatial join categories, occupancy rates, and parcels
sdfParcel_pickle_part2 = out_dir / 'sdfParcel_pickle2.pkl'
# part 3 - spatial join categories, occupancy rates, and parcels
sdfParcel_pickle_part3 = out_dir / 'sdfParcel_pickle3.pkl'
# part 4 - spatial join categories, occupancy rates, and parcels
sdfParcel_pickle_part4 = out_dir / 'sdfParcel_pickle4.pkl'
# part 5 - spatial join categories, occupancy rates, and parcels
sdfParcel_pickle_part5 = out_dir / 'sdfParcel_pickle5.pkl'
# part 6 - spatial join categories, occupancy rates, and parcels
sdfParcel_pickle_part6 = out_dir / 'sdfParcel_pickle6.pkl'
# pickle variables for occupancry rates
occupancy_rates_pickle = out_dir / 'occupancy_rates.pkl'
# campground pickles
campground_pickle = out_dir / 'campground.pkl'

> Future Utils

In [None]:
# function to do a spatial join and
# map values from the source to the target
def spatial_join_map(target, source, join_field,  map_field, target_field):
    # spatial join
    arcpy.SpatialJoin_analysis(target, source, 'memory\\temp', 
                               'JOIN_ONE_TO_ONE', 'KEEP_ALL','HAVE_THEIR_CENTER_IN')
    # get result as a spatial dataframe
    join = pd.DataFrame.spatial.from_featureclass('memory\\temp')
    join.info()
    # map values
    target[target_field] = target[join_field].map(dict(zip(join[join_field], join[map_field])))
    return target

# check for duplicates
def check_dupes(df, col):
    df['is_duplicate'] = df.duplicated(subset=col, keep=False)
    df.is_duplicate.value_counts()
    df.loc[df['is_duplicate'] == True]
    df = df.drop_duplicates(subset=col, keep='first', inplace=True)
    return df[df.duplicated([col], keep=False)]

# check if field exists in data frame and final_schema and if not add it
def check_field(df, fields):
    for field in fields:
        if field not in df.columns:
            df[field] = np.nan
    return df

# function to run interpolation and join by APN
def interpolate_join(df, sdf):
    # interpolate occupancy rate for VHR and TAU parcels where NULL
    return df

# function to fill missing values
def fill_missing_values(df, sdf):
    return df

# function to run zonal stats and map values
def zonal_stats_map(target, source, join_field,  map_field, target_field):
    # zonal stats
    arcpy.sa.ZonalStatisticsAsTable(target, join_field, source, 'memory\\temp', 'DATA', 'MEAN')
    # get result as a spatial dataframe
    join = pd.DataFrame.spatial.from_featureclass('memory\\temp')
    join.info()
    # map values
    target[target_field] = target[join_field].map(dict(zip(join[join_field], join[map_field]))
    return target

# save to pickle
def to_pickle(data, filename):
    with open(filename, 'wb') as f:
        pickle.dump(data, f)
    print(f'{filename} pickled')

# save to pickle and feature class
def to_pickle_fc(data, filename):
    data.spatial.to_featureclass(filename)
    with open(filename, 'wb') as f:
        pickle.dump(data, f)
    print(f'{filename} pickled and saved as feature class')

# get a pickled file as a dataframe
def from_pickle(filename):
    with open(filename, 'rb') as f:
        data = pickle.load(f)
    print(f'{filename} unpickled')
    return data

> Get Data

In [None]:
# TAZ feature layer polygons
taz_url = 'https://maps.trpa.org/server/rest/services/Transportation_Planning/MapServer/6'
# get as spatial dataframe
sdf_taz = get_fs_data_spatial(taz_url)
# set spatial reference to NAD 1983 UTM Zone 10N
sdf_taz.spatial.sr = sr

# parcel development layer polygons
units_url = 'https://maps.trpa.org/server/rest/services/Existing_Development/MapServer/2'
# query 2022 rows
sdf_units = get_fs_data_spatial_query(units_url, "Year = 2022")
sdf_units.spatial.sr = sr

# block group feature layer polygons
block_groups_url = 'https://maps.trpa.org/server/rest/services/Demographics/MapServer/27'
sdf_block = get_fs_data_spatial(block_groups_url)
sdf_block = sdf_block.loc[(sdf_block['YEAR'] == 2020) & (sdf_block['GEOGRAPHY'] == 'Block Group')]
sdf_block.spatial.sr = sr

# vhr feature layer polygons 
vhr_url = 'https://maps.trpa.org/server/rest/services/VHR/MapServer/0'
sdf_vhr = get_fs_data_spatial(vhr_url)
sdf_vhr.spatial.sr = sr
# filter vhr layer to active status
sdf_vhr = sdf_vhr.loc[sdf_vhr['Status'] == 'Active']

# ACS 2022 bolock group table
census_url = 'https://maps.trpa.org/server/rest/services/Demographics/MapServer/28'
df_census = get_fs_data(census_url)
df_census_2022 = df_census.loc[(df_census['year_sample'] == 2022) & (df_census['sample_level'] == 'block group')]

# campground points feature layer
campground_url = 'https://maps.trpa.org/server/rest/services/Recreation/MapServer/1'
sdf_campground =  get_fs_data_spatial_query(campground_url, "RECREATION_TYPE='Campground'")

# campground visits table
campvisits_url = 'https://maps.trpa.org/server/rest/services/Transportation_Planning/MapServer/14'
dfCamp = get_fs_data_query(campvisits_url, "Year = 2022")

# occupancy zone feature layer polygons
occupancyzones_url = 'https://maps.trpa.org/server/rest/services/Transportation_Planning/MapServer/15'
sdf_occ = get_fs_data_spatial(occupancyzones_url)
sdf_occ.spatial.sr = sr

# occupancy rate table
occupancyrate_url = 'https://maps.trpa.org/server/rest/services/Transportation_Planning/MapServer/13'
df_occ = get_fs_data(occupancyrate_url)

# school enrollment table
school_url_table     = 'https://maps.trpa.org/server/rest/services/Transportation_Planning/MapServer/17'
df_school_enrollment = get_fs_data_query(school_url_table, "Year = '2021-2022'")

# school feature layer points
school_url_spatial    = 'https://maps.trpa.org/server/rest/services/Transportation_Planning/MapServer/16'
sdf_school            =  get_fs_data_spatial(school_url_spatial)
sdf_school.spatial.sr = sr

### Occupancy Rate Data Engineering

#### Fill in missing data
* Washoe Rooms Available, Washoe Reported Occupancy Rates, Washoe Quarterly VHRs added to Monthly Rows
* Rest of El Dorado County VHR zones need Rooms Available & Rooms Rented
    * Why are we using CSLT rate data instead of IDW interpolated data?
#### Weight the rates and calculate rooms rented per day

In [183]:
# make a copy of occupancy rates table source data
dfOcc = df_occ.copy()

# filter to columns 
columns = ['Zone_ID', 'Period', 'RoomType', 'Report_OccRate','TRPA_OccRate']

# dictinary to convert the time frames to make things cleaner
timeframe_dict = {
    '2022-06-01': 'June',
    '2022-08-01': 'August',
    '2022-09-01': 'September',
    'Q4 21-22'  : 'April-June',
    'Q1 22-23'  : 'July-September',
    'Q2 2022'   : 'April-June',
    'Q3 2022'   : 'July-September'
}

# Period field based on Timeframe and timeframe_dict
dfOcc['Period'] = dfOcc['Timeframe'].map(timeframe_dict)

## Fill in Missing Data for Washoe County ##

# get total WA taus and vhrs from the parcel layer
tauWA = sdfParcel.loc[(sdfParcel.COUNTY == 'WA'), 'TouristAccommodation_Units'].sum()
vhrWA = sdfParcel.loc[(sdfParcel.COUNTY == 'WA')&(sdfParcel.VHR == 'Yes'), 'APN'].count()

# Calculate Rooms available for HotelMotel, Casino, Resort in Washoe County using total TAUs from the parcel layer
dfOcc.loc[(dfOcc['Zone_ID'] == 'Washoe County') & (dfOcc['Temporal_Scale'] == 'Monthly') &
                (dfOcc.RoomType.isin(['HotelMotel', 'Casino', 'Resort'])) & (dfOcc['Period'].isin(['June', 'September'])), 
                'Report_RoomsAvailable'] = tauWA * 30
dfOcc.loc[(dfOcc['Zone_ID'] == 'Washoe County') & (dfOcc['Temporal_Scale'] == 'Monthly') &
              (dfOcc.RoomType.isin(['HotelMotel', 'Casino', 'Resort'])) & (dfOcc['Period'].isin(['July','August'])), 
              'Report_RoomsAvailable'] = tauWA * 31

# caclulate Rooms available for VHRs in Washoe County using total VHRs from the parcel layer
dfOcc.loc[(dfOcc['Zone_ID'] == 'Washoe County') & (dfOcc['Temporal_Scale'] == 'Monthly') & 
              (dfOcc.RoomType == 'VHR') & (dfOcc['Period'].isin(['July', 'August'])), 
              'Report_RoomsAvailable'] = vhrWA * 31
dfOcc.loc[(dfOcc['Zone_ID'] == 'Washoe County') & (dfOcc['Temporal_Scale'] == 'Monthly') & 
              (dfOcc.RoomType == 'VHR') & (dfOcc['Period'].isin(['June', 'September'])), 
              'Report_RoomsAvailable'] = vhrWA * 30

# if the Zone_ID is Washoe County and VHR and Timeframe is the Q3 or Q2 then /3 to get monthly rooms available for that quarter/row
waVHRZoneQ2 = dfOcc.loc[(dfOcc['Zone_ID'] == 'Washoe County') & (dfOcc.RoomType == 'VHR') & (dfOcc.Timeframe == 'Q2 2022')]
extraVHRQ2 = int((waVHRZoneQ2['Report_RoomsRented'] / 3).round(0).iloc[0])
waVHRZoneQ3 = dfOcc.loc[(dfOcc['Zone_ID'] == 'Washoe County') & (dfOcc.RoomType == 'VHR') & (dfOcc.Timeframe == 'Q3 2022')]
extraVHRQ3 = int((waVHRZoneQ3['Report_RoomsRented'] / 3).round(0).iloc[0])

# add the extra VHR rooms rented to the monthly rows that fall within that quarter Zone_ID is Washoe County and Temporal_Scale is Monthly
dfOcc.loc[(dfOcc['Zone_ID'] == 'Washoe County') & (dfOcc['Temporal_Scale'] == 'Monthly') 
              & (dfOcc.RoomType =='VHR') & (dfOcc.Timeframe == 'June'), 
              'Report_RoomsRented'] = dfOcc['Report_RoomsRented'] + extraVHRQ2
dfOcc.loc[(dfOcc['Zone_ID'] == 'Washoe County') & (dfOcc['Temporal_Scale'] == 'Monthly')
              & (dfOcc.RoomType =='VHR') & (dfOcc.Timeframe.isin(['July', 'August'])),
              'Report_RoomsRented'] = dfOcc['Report_RoomsRented'] + extraVHRQ3

# if the Zone_ID is Washoe County set Report_OccRate to Report_RoomsRented by Report _RoomsAvailable
dfOcc.loc[dfOcc['Zone_ID'] == 'Washoe County', 'Report_OccRate'] = dfOcc['Report_RoomsRented']/dfOcc['Report_RoomsAvailable']

## Fill in Missing Data for El Dorado County ##

# get total VHRs in El Dorado County from the parcel layer
vhrEL = sdfParcel.loc[(sdfParcel.JURISDICTION == 'EL') & (sdfParcel.VHR == 'Yes'), 'APN'].count()

# caclulate Rooms available for VHRs in El Dorado County using total VHRs from the parcel layer
dfOcc.loc[(dfOcc['Zone_ID'] == 'Rest of El Dorado County') & (dfOcc['Temporal_Scale'] == 'Monthly') & 
              (dfOcc.RoomType == 'VHR') & (dfOcc['Period'].isin(['July', 'August'])), 
              'Report_RoomsAvailable'] = vhrEL * 31
dfOcc.loc[(dfOcc['Zone_ID'] == 'Rest of El Dorado County') & (dfOcc['Temporal_Scale'] == 'Monthly') &
              (dfOcc.RoomType == 'VHR') & (dfOcc['Period'].isin(['June', 'September'])),
              'Report_RoomsAvailable'] = vhrEL * 30

# calculate Rooms Rented for VHRs in El Dorado County using Report_OccRate and Report_RoomsAvailable
dfOcc.loc[(dfOcc['Zone_ID'] == 'Rest of El Dorado County') & (dfOcc['Temporal_Scale'] == 'Monthly') & (dfOcc.RoomType == 'VHR'), 
              'Report_RoomsRented'] = (dfOcc['Report_OccRate'] * dfOcc['Report_RoomsAvailable']).fillna(0).astype(int)

## Calculate Weighted Average Occupancy Rate ##

# df copy
df = dfOcc.copy()

# Define the weights for each month based on the number of days they contribute
weights = {
    'June'          : 8/20,
    'August'        : 3/20,
    'September'     : 9/20,
    'April-June'    : 8/20,
    'July-September': 12/20
}

# calculate the weighted occupancy rates
for key,value in weights.items():
    # Apply weights to the occupancy rates
    df.loc[df['Period'] == key, 'TRPA_OccRate'] = df['Report_OccRate'] * value

# Calculate RoomsRentedPerDay based on the period
df['RoomsRentedPerDay'] = df.apply(lambda row: row['Report_RoomsRented'] / 30 if row['Period'] in ['June', 'September'] else
                                   (row['Report_RoomsRented'] / 31 if row['Period'] == 'August' else
                                    (row['Report_RoomsRented'] / 91 if row['Period'] == 'April-June' else
                                     (row['Report_RoomsRented'] / 92 if row['Period'] == 'July-September' else 0))), axis=1).fillna(0).astype(int)

# filter by Temporal_Scale
df_monthly   = df.loc[df['Temporal_Scale'] == 'Monthly']
df_quarterly = df.loc[df['Temporal_Scale'] == 'Quarterly']

# group by for montthly and quarterly and mean for Report_OccRate and sum for TRPA_OccRate
dfMonthly   = df_monthly.groupby(['Zone_ID', 'RoomType', 'Temporal_Scale']).agg({'RoomsRentedPerDay': 'mean','Report_RoomsAvailable':'sum',
                                                                                 'Report_RoomsRented':'sum', 'Report_OccRate': 'mean', 
                                                                                 'TRPA_OccRate': 'sum'}).reset_index()

dfQuarterly = df_quarterly.groupby(['Zone_ID', 'RoomType', 'Temporal_Scale']).agg({'RoomsRentedPerDay': 'mean','Report_RoomsAvailable':'sum',
                                                                                   'Report_RoomsRented':'sum', 'Report_OccRate': 'mean', 
                                                                                   'TRPA_OccRate': 'sum'}).reset_index()

# concat the two dataframes into the final occupancy rate dataframe
dfOccFinal = pd.concat([dfMonthly, dfQuarterly]).reset_index(drop=True)

# cast RoomsRentedPerDay as int 
dfOccFinal['RoomsRentedPerDay'] = dfOccFinal['RoomsRentedPerDay'].astype(int)
# drop rows where Zone_ID is Washoe County and Temporal_Scale is Quarterly
df = dfOccFinal.loc[~((dfOccFinal['Zone_ID'] == 'Washoe County') & (dfOccFinal['Temporal_Scale'] == 'Quarterly'))].reset_index(drop=True)
df.info()
# save to pickle
df.to_pickle(out_dir / 'occupancy_rates.pkl')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27 entries, 0 to 26
Data columns (total 8 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Zone_ID                27 non-null     object 
 1   RoomType               27 non-null     object 
 2   Temporal_Scale         27 non-null     object 
 3   RoomsRentedPerDay      27 non-null     int32  
 4   Report_RoomsAvailable  27 non-null     int64  
 5   Report_RoomsRented     27 non-null     int64  
 6   Report_OccRate         27 non-null     float64
 7   TRPA_OccRate           27 non-null     float64
dtypes: float64(2), int32(1), int64(2), object(3)
memory usage: 1.7+ KB


In [184]:
df

Unnamed: 0,Zone_ID,RoomType,Temporal_Scale,RoomsRentedPerDay,Report_RoomsAvailable,Report_RoomsRented,Report_OccRate,TRPA_OccRate
0,CSLT_ALL,VHR,Monthly,169,28695,15457,0.538273,0.5258
1,CSLT_Zone1,HotelMotel,Monthly,1386,320069,126244,0.394281,0.387592
2,CSLT_Zone2,HotelMotel,Monthly,433,110877,39481,0.357057,0.347694
3,CSLT_Zone3,HotelMotel,Monthly,205,48412,18705,0.385772,0.369246
4,CSLT_Zone4,HotelMotel,Monthly,62,35887,5746,0.16012,0.16099
5,CSLT_Zone5,HotelMotel,Monthly,49,22113,4545,0.205531,0.203785
6,Rest of Douglas County,HotelMotel,Monthly,481,189918,44173,0.788152,0.780477
7,Rest of Douglas County,VHR,Monthly,82,21485,7586,0.446855,0.429239
8,Rest of El Dorado County,VHR,Monthly,379,64246,34601,0.538273,0.5258
9,Stateline Casino Core,Casino,Monthly,1731,199937,157602,0.788152,0.780477


### Occupancy Rate Data Engineering

#### Fill in missing data
* Washoe Rooms Available, Washoe Reported Occupancy Rates, Washoe Quarterly VHRs added to Monthly Rows
* Rest of El Dorado County VHR zones need Rooms Available & Rooms Rented
    * Why are we using CSLT rate data instead of IDW interpolated data?
#### Weight the rates and calculate rooms rented per day

In [None]:
# make a copy of occupancy rates table source data
dfOccCopy = df_occ.copy()

# filter to columns 
columns = ['Zone_ID', 'Period', 'RoomType', 'Report_OccRate','TRPA_OccRate']

# dictinary to convert the time frames to make things cleaner
timeframe_dict = {
    '2022-06-01': 'June',
    '2022-08-01': 'August',
    '2022-09-01': 'September',
    'Q4 21-22'  : 'April-June',
    'Q1 22-23'  : 'July-September',
    'Q2 2022'   : 'April-June',
    'Q3 2022'   : 'July-September'
}

# Period field based on Timeframe and timeframe_dict
dfOccCopy['Period'] = dfOccCopy['Timeframe'].map(timeframe_dict)

## Fill in Missing Data for Washoe County ##
# get total WA taus and vhrs from the parcel layer
tauWA = sdfParcel.loc[(sdfParcel.COUNTY == 'WA'), 'TouristAccommodation_Units'].sum()
vhrWA = sdfParcel.loc[(sdfParcel.COUNTY == 'WA')&(sdfParcel.VHR == 'Yes'), 'APN'].count()

# Calculate Rooms available for HotelMotel, Casino, Resort in Washoe County using total TAUs from the parcel layer
dfOccCopy.loc[(dfOccCopy['Zone_ID'] == 'Washoe County') & (dfOccCopy['Temporal_Scale'] == 'Monthly') &
                (dfOccCopy.RoomType.isin(['HotelMotel', 'Casino', 'Resort'])) & (dfOccCopy['Period'].isin(['June', 'September'])), 
                'Report_RoomsAvailable'] = tauWA * 30
dfOccCopy.loc[(dfOccCopy['Zone_ID'] == 'Washoe County') & (dfOccCopy['Temporal_Scale'] == 'Monthly') &
              (dfOccCopy.RoomType.isin(['HotelMotel', 'Casino', 'Resort'])) & (dfOccCopy['Period'].isin(['July','August'])), 
              'Report_RoomsAvailable'] = tauWA * 31

# caclulate Rooms available for VHRs in Washoe County using total VHRs from the parcel layer
dfOccCopy.loc[(dfOccCopy['Zone_ID'] == 'Washoe County') & (dfOccCopy['Temporal_Scale'] == 'Monthly') & 
              (dfOccCopy.RoomType == 'VHR') & (dfOccCopy['Period'].isin(['July', 'August'])), 
              'Report_RoomsAvailable'] = vhrWA * 31
dfOccCopy.loc[(dfOccCopy['Zone_ID'] == 'Washoe County') & (dfOccCopy['Temporal_Scale'] == 'Monthly') & 
              (dfOccCopy.RoomType == 'VHR')& (dfOccCopy['Period'].isin(['June', 'September'])), 
              'Report_RoomsAvailable'] = vhrWA * 30

# if the Zone_ID is Washoe County and VHR and Timeframe is the Q3 or Q2 then /3 to get monthly rooms available for that quarter/row
waVHRZoneQ2 = dfOccCopy.loc[(dfOccCopy['Zone_ID'] == 'Washoe County') & (dfOccCopy.RoomType == 'VHR') & (dfOccCopy.Timeframe == 'Q2 2022')]
extraVHRQ2 = int((waVHRZoneQ2['Report_RoomsRented'] / 3).round(0).iloc[0])
waVHRZoneQ3 = dfOccCopy.loc[(dfOccCopy['Zone_ID'] == 'Washoe County') & (dfOccCopy.RoomType == 'VHR') & (dfOccCopy.Timeframe == 'Q3 2022')]
extraVHRQ3 = int((waVHRZoneQ3['Report_RoomsRented'] / 3).round(0).iloc[0])

# add the extra VHR rooms rented to the monthly rows that fall within that quarter Zone_ID is Washoe County and Temporal_Scale is Monthly
dfOccCopy.loc[(dfOccCopy['Zone_ID'] == 'Washoe County') & (dfOccCopy['Temporal_Scale'] == 'Monthly') 
              & (dfOccCopy.RoomType =='VHR') & (dfOccCopy.Timeframe == 'June'), 
              'Report_RoomsRented'] = dfOccCopy['Report_RoomsRented'] + extraVHRQ2
dfOccCopy.loc[(dfOccCopy['Zone_ID'] == 'Washoe County') & (dfOccCopy['Temporal_Scale'] == 'Monthly')
              & (dfOccCopy.RoomType =='VHR') & (dfOccCopy.Timeframe.isin(['July', 'August'])),
              'Report_RoomsRented'] = dfOccCopy['Report_RoomsRented'] + extraVHRQ3

# drop row where the Zone_ID is Washoe County and Temporal_Scale is Quarterly
dfOccCopy = dfOccCopy.loc[~((dfOccCopy['Zone_ID'] == 'Washoe County') & (dfOccCopy['Temporal_Scale'] == 'Quarterly'))]

# if the Zone_ID is Washoe County set Report_OccRate to Report_RoomsRented by Report _RoomsAvailable
dfOccCopy.loc[dfOccCopy['Zone_ID'] == 'Washoe County', 'Report_OccRate'] = dfOccCopy['Report_RoomsRented']/dfOccCopy['Report_RoomsAvailable']

## Fill in Missing Data for El Dorado County ##
# get total VHRs in El Dorado County from the parcel layer
vhrEL = sdfParcel.loc[(sdfParcel.JURISDICTION == 'EL') & (sdfParcel.VHR == 'Yes'), 'APN'].count()

# caclulate Rooms available for VHRs in El Dorado County using total VHRs from the parcel layer
dfOccCopy.loc[(dfOccCopy['Zone_ID'] == 'Rest of El Dorado County') & (dfOccCopy['Temporal_Scale'] == 'Monthly') & 
              (dfOccCopy.RoomType == 'VHR') & (dfOccCopy['Period'].isin(['July', 'August'])), 
              'Report_RoomsAvailable'] = vhrEL * 31
dfOccCopy.loc[(dfOccCopy['Zone_ID'] == 'Rest of El Dorado County') & (dfOccCopy['Temporal_Scale'] == 'Monthly') &
              (dfOccCopy.RoomType == 'VHR') & (dfOccCopy['Period'].isin(['June', 'September'])),
              'Report_RoomsAvailable'] = vhrEL * 30

# calculate Rooms Rented for VHRs in El Dorado County using Report_OccRate and Report_RoomsAvailable
dfOccCopy.loc[(dfOccCopy['Zone_ID'] == 'Rest of El Dorado County') & (dfOccCopy['Temporal_Scale'] == 'Monthly') & 
              (dfOccCopy.RoomType == 'VHR'), 'Report_RoomsRented'] = (dfOccCopy['Report_OccRate'] * dfOccCopy['Report_RoomsAvailable']).astype(int)

In [None]:
# df
df = dfOccCopy.copy()

# filter to columns 
columns = ['Zone_ID', 'Period', 'RoomType', 'Report_OccRate','TRPA_OccRate']

# Define the weights for each month based on the number of days they contribute
weights = {
    'June'          : 8/20,
    'August'        : 3/20,
    'September'     : 9/20,
    'April-June'    : 8/20,
    'July-September': 12/20
}

# calculate the weighted occupancy rates
for key,value in weights.items():
    # Apply weights to the occupancy rates
    df.loc[df['Period'] == key, 'TRPA_OccRate'] = df['Report_OccRate'] * value

# Calculate RoomsRentedPerDay based on the period
df['RoomsRentedPerDay'] = df.apply(lambda row: row['Report_RoomsRented'] / 30 if row['Period'] in ['June', 'September'] else
                                   (row['Report_RoomsRented'] / 31 if row['Period'] == 'August' else
                                    (row['Report_RoomsRented'] / 91 if row['Period'] == 'April-June' else
                                     (row['Report_RoomsRented'] / 92 if row['Period'] == 'July-September' else 0))), axis=1).fillna(0).astype(int)

# filter by Temporal_Scale
df_monthly   = df.loc[df['Temporal_Scale'] == 'Monthly']
df_quarterly = df.loc[df['Temporal_Scale'] == 'Quarterly']

# group by for montthly and quarterly and mean for Report_OccRate and sum for TRPA_OccRate
dfMonthly   = df_monthly.groupby(['Zone_ID', 'RoomType', 'Temporal_Scale']).agg({'RoomsRentedPerDay': 'mean','Report_RoomsAvailable':'sum',
                                                                                 'Report_RoomsRented':'sum', 'Report_OccRate': 'mean', 
                                                                                 'TRPA_OccRate': 'sum'}).reset_index()

dfQuarterly = df_quarterly.groupby(['Zone_ID', 'RoomType', 'Temporal_Scale']).agg({'RoomsRentedPerDay': 'mean','Report_RoomsAvailable':'sum',
                                                                                   'Report_RoomsRented':'sum', 'Report_OccRate': 'mean', 
                                                                                   'TRPA_OccRate': 'sum'}).reset_index()

# concat the two dataframes into the final occupancy rate dataframe
dfOccFinal = pd.concat([dfMonthly, dfQuarterly]).reset_index(drop=True)

# cast RoomsRentedPerDay as int 
dfOccFinal['RoomsRentedPerDay'] = dfOccFinal['RoomsRentedPerDay'].astype(int)

# save to pickle
dfOccFinal.to_pickle(out_dir / 'occupancy_rates.pkl')

### Part1

> general spatial joins and categorization

In [None]:
# spatial join to get TAZ
arcpy.SpatialJoin_analysis(sdf_units, sdf_taz, "Existing_Development_TAZ", 
                           "JOIN_ONE_TO_ONE", "KEEP_ALL", "", "HAVE_THEIR_CENTER_IN")
# spatial join to get Block Group
arcpy.SpatialJoin_analysis(sdf_units, sdf_block, "Existing_Development_BlockGroup", 
                           "JOIN_ONE_TO_ONE", "KEEP_ALL", "", "HAVE_THEIR_CENTER_IN")
# spatail join to get Occupancy Rate Zone
sdf_occ = sdf_occ.loc[sdf_occ['OccupancyRate_ZoneID'] != 'CSLT_ALL']
arcpy.SpatialJoin_analysis(sdf_units, sdf_occ, "Existing_Development_OccupancyZone", 
                           "JOIN_ONE_TO_ONE", "KEEP_ALL", "", "HAVE_THEIR_CENTER_IN")


In [None]:
# List of Parcels APN with TAU Types
tau_lookup = pd.read_csv('Lookup_Lists/lookup_tau_type.csv')
#sro_lookup = pd.read_csv('Lookup_Lists/lookup_sro.csv')

# check if fields exist in the dataframes
sdfParcel   = check_field(sdf_units, final_schema)

# merge parcel 2022 with parcel VHR
sdfParcel = sdfParcel.merge(sdf_vhr, on='APN', how='left', indicator=True)

# calculate VHR = Yes if VHR is in the parcel
sdfParcel['VHR'] = 'No'
sdfParcel.loc[sdfParcel['_merge'] == 'both', 'VHR'] = 'Yes'

# setup TAU_Type
sdfParcel['TAU_TYPE'] = 'N/A'

# filter parcels so only APNs in the lookup are included
sdfTAU = sdfParcel[sdfParcel['APN'].isin(tau_lookup['APN'])]
# get TAU_Type from lookup
sdfTAU['TAU_TYPE'] = sdfTAU['APN'].map(tau_lookup.set_index('APN')['TAU_Type'])

# any row with ToursitAccommodation_Units > 0 and TAU_Type is null, set TAU_Type to 'HotelMotel'
sdfParcel.loc[(sdfParcel['TouristAccommodation_Units'] > 0) & (sdfParcel['TAU_TYPE']=='N/A'), 'TAU_TYPE'] = 'HotelMotel'
# for the rows in df that match rows by APN in dfTAU set TAU_Type to the value in dfTAU
sdfParcel.loc[sdfParcel['APN'].isin(sdfTAU['APN']), 'TAU_TYPE'] = sdfTAU['TAU_TYPE']

# remove _x from column names
sdfParcel.columns = sdfParcel.columns.str.replace('_x', '')

# get results of spatial joins as spatial dataframes
sdf_units_taz   = pd.DataFrame.spatial.from_featureclass("Existing_Development_TAZ", sr=sr)  
sdf_units_block = pd.DataFrame.spatial.from_featureclass("Existing_Development_BlockGroup", sr=sr)
sdf_units_occ   = pd.DataFrame.spatial.from_featureclass("Existing_Development_OccupancyZone", sr=sr)

# map dictionary to sdf_units dataframe to fill in TAZ and Block Group fields
sdfParcel['TAZ']           = sdfParcel.APN.map(dict(zip(sdf_units_taz.APN,   sdf_units_taz.TAZ)))
sdfParcel['BLOCK_GROUP']   = sdfParcel.APN.map(dict(zip(sdf_units_block.APN, sdf_units_block.TRPAID)))
sdfParcel['OCCUPANCY_ZONE']= sdfParcel.APN.map(dict(zip(sdf_units_occ.APN,   sdf_units_occ.OccupancyRate_ZoneID)))

# if df.JURISDICTION == "CSLT" and VHR == "Yes" then set OCCUPANCY_ZONE to "CSLT_ALL"
sdfParcel.loc[(sdfParcel['JURISDICTION'] == 'CSLT') & (sdfParcel['VHR'] == 'Yes'), 'OCCUPANCY_ZONE'] = 'CSLT_ALL'

# columns to keep
sdfParcel = sdfParcel[final_schema]

In [None]:
# pickle and save to feature class
outfc = 'sdf_units_attributed'
# export to feature class
sdfParcel.spatial.to_featureclass(location=os.path.join(gdb, outfc), sanitize_columns=False)
# export to pickle
sdfParcel.to_pickle(sdfParcel_pickle_part1)

### Part 2


* Spatial join to apply Lodging Occupany Rates to parcel layer, 
* select parcels where Lodging Occupancy Rate is Null, 
* run interpolation, 
* apply interpoleted values to parcels where occupancy rate is null

In [None]:
# read in the pickled parcel dataframe
sdfParcel = pd.read_pickle(sdfParcel_pickle_part1)
# read in the pickled occupancy rates table
dfOcc = pd.read_pickle(occupancy_rates_pickle)

> Filter Occupancy Rate table to Timeframe and Room Type, Merge with Occupancy Zone Feature Class, and Export to Feature Class

In [None]:

# filter occupancy rate table by RoomType
dfOccTAU = dfOcc.loc[dfOcc['RoomType'].isin(['HotelMotel', 'Casino', 'Resort'])]    
dfOccVHR = dfOcc.loc[dfOcc['RoomType'] == 'VHR']
# specify the output feature classes
tau_occ_zones = os.path.join(gdb,'OccupancyRate_Zones_TAU')
vhr_occ_zones = os.path.join(gdb,'OccupancyRate_Zones_VHR')
# merge occupancy rate data to occupancy zones
sdfTAU= pd.merge(sdf_occ, dfOccTAU, left_on='OccupancyRate_ZoneID', right_on='Zone_ID', how='left')
# export sdf to feature class
sdfTAU.spatial.to_featureclass(location=tau_occ_zones, overwrite=True)
# merge occupancy rate data to occupancy zones
sdfVHR = pd.merge(sdf_occ, dfOccVHR, left_on='OccupancyRate_ZoneID', right_on='Zone_ID', how='left')
# export sdf to feature class put in memory workspace
sdfVHR.spatial.to_featureclass(location=vhr_occ_zones, overwrite=True)

> Join TAU and VHR parcels to occupancy rate zones

In [None]:
# filter rows where VHR = Yes and rows where TouristAccommodation_Units > 0
sdfTAU = sdfParcel.loc[sdfParcel['TouristAccommodation_Units'] > 0]
sdfVHR = sdfParcel.loc[sdfParcel['VHR'] == 'Yes']
# specify the output feature classes
tau_occ_zones = os.path.join('Workspace.gdb','OccupancyRate_Zones_TAU')
vhr_occ_zones = os.path.join('Workspace.gdb','OccupancyRate_Zones_VHR')

# spatial join TAU to occupancy rate zones with TAU values
spjoin_tau = arcpy.analysis.SpatialJoin(sdfTAU, tau_occ_zones, 'OccupancyRate_Zones_TAU_Parcels', 
                                        "JOIN_ONE_TO_ONE", "KEEP_ALL", None, "INTERSECT", None, None)
# spatial join VHR to occupancy rate zones with VHR values
spjoin_vhr = arcpy.analysis.SpatialJoin(sdfVHR, vhr_occ_zones, 'OccupancyRate_Zones_VHR_Parcels', 
                                        "JOIN_ONE_TO_ONE", "KEEP_ALL", None, "INTERSECT", None, None)

# get results of spatial joins as spatial dataframes
sdf_parcel_tau_rates   = pd.DataFrame.spatial.from_featureclass("OccupancyRate_Zones_TAU_Parcels", sr=sr)  
sdf_parcel_vhr_rates = pd.DataFrame.spatial.from_featureclass("OccupancyRate_Zones_VHR_Parcels", sr=sr)

# map dictionary for TAU and VHR parcels respectively
sdfParcel['VHR_Lodging_Occupancy_Rate']   = sdfVHR.APN.map(dict(zip(sdf_parcel_vhr_rates.APN,   sdf_parcel_vhr_rates.trpa_occ_rate)))
sdfParcel['TAU_Lodging_Occupancy_Rate']   = sdfTAU.APN.map(dict(zip(sdf_parcel_tau_rates.APN,   sdf_parcel_tau_rates.trpa_occ_rate)))

# cast VHR_Lodging_Occupancy_Rate and TAU_Lodging_Occupancy_Rate as float and fill na as 0
sdfParcel['VHR_Lodging_Occupancy_Rate'] = sdfParcel['VHR_Lodging_Occupancy_Rate'].fillna(0).astype(float)
sdfParcel['TAU_Lodging_Occupancy_Rate'] = sdfParcel['TAU_Lodging_Occupancy_Rate'].fillna(0).astype(float)

In [None]:
# pickle and save to feature class
outfc = 'sdf_units_attributed_occupancy'
# export to feature class
sdfParcel.spatial.to_featureclass(location=os.path.join(gdb, outfc), sanitize_columns=False)
# export to pickle
sdfParcel.to_pickle(sdfParcel_pickle_part2)

### Part 3

> Fill in parcel level missing occupancy rates with interpolated values

In [None]:
# from pickle
sdfParcel = pd.read_pickle(sdfParcel_pickle_part2)
sdfParcel.info()

> Generate Spatial Interpolated Occupancy Rate Surfaces

In [None]:
# Set the extent environment using a feature class
arcpy.env.extent = os.path.join(gdb,"OccupancyRate_Zones_TAU")
# set the input feature class
tau_fc = os.path.join(gdb,'TAU_points')
vhr_fc = os.path.join(gdb,'VHR_points')
# set the output raster
tau_raster = os.path.join(gdb,'tau_occupancy_rate')
vhr_raster = os.path.join(gdb,'vhr_occupancy_rate')
# set the output cell size
cell_size = 30
# set the power parameter
power = 2
# set the search radius
search_radius = 10000

# select rows where TAU_TYPE is not null but TAU_Occupancy_Rate is null
tauParcel_NULLocc = sdfParcel.loc[(sdfParcel['TAU_TYPE'].isin(['HotelMotel','Casino','Resort'])) & (sdfParcel['TAU_Lodging_Occupancy_Rate']==0)]
tauParcel_NULLocc.spatial.to_featureclass(location=os.path.join(gdb,"TAU_NULL_occ"), overwrite=True)
vhrParcel_NULLocc = sdfParcel.loc[(sdfParcel['VHR'] == 'Yes') & (sdfParcel['VHR_Lodging_Occupancy_Rate']==0)] 
vhrParcel_NULLocc.spatial.to_featureclass(location=os.path.join(gdb,"VHR_NULL_occ"), overwrite=True)

# get not null parcels for TAU and VHR
tauParcel_notNULL = sdfParcel.loc[(sdfParcel['TAU_TYPE'].isin(['HotelMotel','Casino','Resort'])) & (sdfParcel['TAU_Lodging_Occupancy_Rate']!=0)]
tauParcel_notNULL.spatial.to_featureclass(location=os.path.join(gdb,"TAU_occ"), overwrite=True)
vhrParcel_notNULL = sdfParcel.loc[(sdfParcel['VHR'] == 'Yes') & (sdfParcel['VHR_Lodging_Occupancy_Rate']!=0)]
vhrParcel_notNULL.spatial.to_featureclass(location=os.path.join(gdb,"VHR_occ"), overwrite=True)

# feature to point for TAU and VHR
arcpy.management.FeatureToPoint(tauParcel_NULLocc, os.path.join(gdb,'TAU_NULL_points'),"INSIDE")
arcpy.management.FeatureToPoint(vhrParcel_NULLocc, os.path.join(gdb, 'VHR_NULL_points'), "INSIDE")
arcpy.management.FeatureToPoint(tauParcel_notNULL, os.path.join(gdb,'TAU_points'), "INSIDE")
arcpy.management.FeatureToPoint(vhrParcel_notNULL, os.path.join(gdb,'VHR_points'), "INSIDE")

# run the IDW for TAU parcels with rates
arcpy.sa.Idw(tau_fc, 
            z_field='TAU_Lodging_Occupancy_Rate', 
            cell_size=cell_size, 
            power=power, 
            search_radius=search_radius).save(tau_raster)
# and for VHR parcels with rates
arcpy.sa.Idw(vhr_fc,
            z_field='VHR_Lodging_Occupancy_Rate',
            cell_size=cell_size,
            power=power,
            search_radius=search_radius).save(vhr_raster)

# Set the local variables for ZonalStatisticsAsTable
zoneField = "APN"
tauZoneData = os.path.join(gdb, 'TAU_NULL_occ')
vhrZoneData = os.path.join(gdb, 'VHR_NULL_occ')
tauValueRaster = os.path.join(gdb,'tau_occupancy_rate')
vhrValueRaster = os.path.join(gdb,'vhr_occupancy_rate')
tauTable = os.path.join(gdb, "zonalstat_TAU_Occupancy")
vhrTable = os.path.join(gdb, "zonalstat_VHR_Occupancy")

# Execute ZonalStatisticsAsTable
tauZSaT = arcpy.sa.ZonalStatisticsAsTable(tauZoneData, zoneField, tauValueRaster, 
                                            tauTable, "DATA", "MEAN")
vhrZSaT = arcpy.sa.ZonalStatisticsAsTable(vhrZoneData, zoneField, vhrValueRaster,
                                            vhrTable, "DATA", "MEAN")

# convert to dataframe
tauZonalStats = arcpy.da.TableToNumPyArray(tauZSaT, '*')
vhrZonalStats = arcpy.da.TableToNumPyArray(vhrZSaT, '*')
dfTAU = pd.DataFrame(tauZonalStats)
dfVHR = pd.DataFrame(vhrZonalStats)

# Create a temporary column with the new mapped values
sdfParcel['New_TAU_Lodging_Occupancy_Rate'] = sdfParcel['APN'].map(dict(zip(dfTAU['apn'], dfTAU['MEAN'])))
sdfParcel['New_VHR_Lodging_Occupancy_Rate'] = sdfParcel['APN'].map(dict(zip(dfVHR['apn'], dfVHR['MEAN'])))

# Combine the new column with the existing column, preserving existing values where the new values are NaN
sdfParcel['TAU_Lodging_Occupancy_Rate'] = sdfParcel['New_TAU_Lodging_Occupancy_Rate'].combine_first(sdfParcel['TAU_Lodging_Occupancy_Rate'])
sdfParcel['VHR_Lodging_Occupancy_Rate'] = sdfParcel['New_VHR_Lodging_Occupancy_Rate'].combine_first(sdfParcel['VHR_Lodging_Occupancy_Rate'])

# Drop the temporary column
sdfParcel.drop(columns=['New_TAU_Lodging_Occupancy_Rate'], inplace=True)
sdfParcel.drop(columns=['New_VHR_Lodging_Occupancy_Rate'], inplace=True)

In [None]:
### Why isnt the zonal stats working for these parcels?? ###
# # are there any sdfParcel rows where TAU_Lodging_Occupancy_Rate is 0 and ToursitAccommodation_Units > 0
# sdfParcel.loc[(sdfParcel['TAU_Lodging_Occupancy_Rate'] == 0) & (sdfParcel['TouristAccommodation_Units'] > 0)]
# those APNs to list
apn_list = sdfParcel.loc[(sdfParcel['TAU_Lodging_Occupancy_Rate'] == 0) & (sdfParcel['TouristAccommodation_Units'] > 0)]['APN'].tolist()
# classify the occupancy rates for those parcels
sdfParcel.loc[sdfParcel['APN'].isin(apn_list), 'TAU_Lodging_Occupancy_Rate'] = 0.592337

In [None]:
vhr_apn_list = sdfParcel.loc[(sdfParcel['VHR_Lodging_Occupancy_Rate'] == 0) & (sdfParcel['VHR'] == 'Yes')]['APN'].tolist()
# sdfParcel.loc[sdfParcel['APN'].isin(vhr_apn_list), 'VHR_Lodging_Occupancy_Rate'] = 0.592337

In [None]:
# pickle and save to feature class
outfc = 'sdf_units_attributed_occupancy_interpolated'
# export to feature class
sdfParcel.spatial.to_featureclass(location=os.path.join(gdb, outfc), sanitize_columns=False)
# export to pickle
sdfParcel.to_pickle(sdfParcel_pickle_part3)

#### Campgrounds - seperate deal...

> Campground Occupancy

In [135]:
# merge campground data with occupancy rate data on campground name
dfCampOcc = sdf_campground.merge(dfCamp, left_on='RECREATION_NAME', right_on='Campground', 
                                      how='left', indicator=True)

# spatial join TAZ data to campground data
arcpy.SpatialJoin_analysis(dfCampOcc, sdf_taz, 'taz_campground', 
                           'JOIN_ONE_TO_ONE', 'KEEP_ALL', 
                           match_option='HAVE_THEIR_CENTER_IN')

# read in output of spatial join as sdf
sdf_campground_taz = pd.DataFrame.spatial.from_featureclass('taz_campground')

# get sites sold by multiplying the number of sites by the occupancy rate
sdf_campground_taz['SitesSold'] = sdf_campground_taz['Total_Sites'] * sdf_campground_taz['Occupancy_Rate']

# group by TAZ and sum of sites sold within TAZ
sdf_campground_taz_grouped = sdf_campground_taz.groupby('TAZ').agg(
                                                {'SitesSold': 'sum'}).reset_index()

# sdf_campground to pickle
sdf_campground_taz_grouped.to_pickle(out_dir / 'taz_campground_occupancy.pkl')


In [150]:
# list of campground names
campground_names1 = sdf_campground['RECREATION_NAME'].tolist()
campground_names2 = dfCamp['Campground'].tolist()

# check for names missing in one list
print(set(campground_names2).difference(campground_names1))

set()


In [153]:
# coutns of campgrounds in each list
print(len(campground_names1))
print(len(campground_names2))
# check if unique
print(len(set(campground_names1)))
print(len(set(campground_names2)))


18
20
18
18


In [165]:
dfCamp

Unnamed: 0,OBJECTID,Campground,Month,Year,Land_Owner,Total_Sites,Sites_Sold,Visitation_Total,Occupancy_Rate,Observed_Estimate,Data_Source,Notes
0,1,Lake Forest Campground,Model Day,2022,TCPUD,21,210.0,241.0,0.213,Observed,TCPUD,"Observed only model days in June, Aug, Sep"
1,2,Zephyr Cove RV Campground,June,2022,USFS/Private,140,3165.0,8738.0,0.703333,Observed,USFS,Unscaled
2,3,Zephyr Cove RV Campground,August,2022,USFS/Private,140,3618.0,9588.0,0.778065,Observed,USFS,Unscaled
3,4,Zephyr Cove RV Campground,September,2022,USFS/Private,140,2580.0,6559.0,0.573333,Observed,USFS,Unscaled
4,5,Nevada Beach Campground,Model Day,2022,USFS,52,1943.0,2311.0,0.88,Estimate from 2018,,Used 2018 value
5,6,Tahoe Valley Campground,Model Day,2022,Private,400,,,0.48,Estimate from 2018,,Used 2018 value
6,7,Fallen Leaf Campground,Model Day,2022,USFS,196,8348.0,11892.0,0.79,Estimate from 2018,,Used 2018 value
7,8,Camp Richardson Campground,Model Day,2022,USFS/Private,330,,,0.88,Estimate from 2018,,Used 2018 value
8,9,Camp Shelley,Model Day,2022,USFS/Private,25,,,0.79,Estimate from 2018,,Used 2018 value
9,10,Eagle Point Campground,Model Day,2022,CA SP,96,,13410.0,0.66,Estimate from 2018,,Used 2018 value


In [164]:
# get rows where the campground name is Zephyr Cove Resort RV Campground
dfCamp.loc[dfCamp['Campground'] == 'Zephyr Cove RV Campground']


Unnamed: 0,OBJECTID,Campground,Month,Year,Land_Owner,Total_Sites,Sites_Sold,Visitation_Total,Occupancy_Rate,Observed_Estimate,Data_Source,Notes
1,2,Zephyr Cove RV Campground,June,2022,USFS/Private,140,3165.0,8738.0,0.703333,Observed,USFS,Unscaled
2,3,Zephyr Cove RV Campground,August,2022,USFS/Private,140,3618.0,9588.0,0.778065,Observed,USFS,Unscaled
3,4,Zephyr Cove RV Campground,September,2022,USFS/Private,140,2580.0,6559.0,0.573333,Observed,USFS,Unscaled


In [163]:
dfCamp.Campground.value_counts()

Campground
Zephyr Cove RV Campground            3
Lake Forest Campground               1
Emerald Bay Boat Camp                1
Campground By The Lake               1
Tahoe State Recreation Area          1
William Kent Campground              1
Kaspian Campground                   1
General Creek Campground             1
Meeks Bay Campground                 1
D.L. Bliss Lower Pines Campground    1
Bayview Campground                   1
Eagle Point Campground               1
Camp Shelley                         1
Camp Richardson Campground           1
Fallen Leaf Campground               1
Tahoe Valley Campground              1
Nevada Beach Campground              1
Luther Pass Campground               1
Name: count, dtype: int64

In [152]:
# which campgrounds are in the occupancy rate table but not in the campground table
campgrounds = set(campground_names1).difference(campground_names2)
print(campgrounds)

set()


In [138]:
dfCampOcc.describe()

Unnamed: 0,OBJECTID_x,created_date,last_edited_date,OBJECTID_y,Year,Total_Sites,Sites_Sold,Visitation_Total,Occupancy_Rate
count,20.0,0,0,20.0,20.0,20.0,10.0,14.0,20.0
mean,127.35,NaT,NaT,10.5,2022.0,112.2,2482.6,6512.714286,0.578137
min,14.0,NaT,NaT,1.0,2022.0,0.0,0.0,0.0,0.0
25%,84.0,NaT,NaT,5.75,2022.0,22.5,661.0,2420.0,0.48
50%,152.5,NaT,NaT,10.5,2022.0,87.5,2261.5,5531.0,0.595
75%,167.25,NaT,NaT,15.25,2022.0,167.5,3121.75,9375.5,0.742016
max,189.0,NaT,NaT,20.0,2022.0,400.0,8348.0,20471.0,0.88
std,52.75891,,,5.91608,0.0,109.047358,2435.188708,5882.760124,0.233805


In [136]:
sdf_campground_taz_grouped

Unnamed: 0,TAZ,SitesSold
0,20,124.6
1,80,192.0
2,87,7.2
3,104,290.4
4,106,154.84
5,113,19.75
6,116,74.96
7,117,0.0
8,118,39.6
9,125,29.2


> Interpolation to fill nan if neccesary

In [None]:
# merge campground data with occupancy rate data on campground name
dfCampOcc = sdf_campground.merge(dfCamp, left_on='RECREATION_NAME', right_on='Campground', 
                                      how='left', indicator=True)

# keep only columns of interest
sdf_campground = sdf_campground[['RECREATION_NAME', 'Occupancy_Rate','SHAPE']]

# filter sdf_campground to only campgrounds with occupancy rate data
sdf_campground = sdf_campground[sdf_campground['Occupancy_Rate'].notnull()]

# IDW to get the occupancy rate for each campground
# set the output cell size
cell_size = 500
# set the power parameter
power = 2
# set the search radius
search_radius = 5000
# set the output raster
out_raster = 'campground_occupancy_rate'
# run the IDW
arcpy.sa.Idw(in_features=sdf_campground, 
             z_field='Occupancy_Rate', 
             cell_size=cell_size, 
             power=power, 
             search_radius=search_radius).save(out_raster)

# spatial join to campground points with NaN occupancy rate
sdf_campground_nan = sdf_campground[sdf_campground['Occupancy_Rate'].isnull()]
# spatial join to campground points with NaN occupancy rate

In [None]:
sdf_campground_nan

In [None]:
# apply weighting by model days (same as occupancy zone rate weighting)

## Overnight Visitation

In [None]:
sdf_units.groupby('TAZ','TAU_TYPE').agg({'TouristAccommodation_Units': 'sum'}).reset_index()

In [None]:
# TAUs 6190 were rented on model day....2,200 VHRs were rented on model day

# get the number of occupied units by multiplying the number of units by the occupancy rate

# multiply the number of TAUs by the occupancy rate then add up by zone? or by TAZ?

# 

In [None]:
# percentHouseSeasonal


## School Enrollment

In [None]:
# set Type to Null
sdf_school['TYPE'] = None
# set SchoolType to 'elementary' if it contains 'elementary' or 'magnet' or 'academy'
sdf_school.loc[sdf_school['NAME'].str.contains('elementary', case=False), 'TYPE'] = 'Elementary School'
# set SchoolType to 'middle' if it contains 'middle'
sdf_school.loc[sdf_school['NAME'].str.contains('middle', case=False), 'TYPE'] = 'Middle School'
# set SchoolType to 'high' if it contains 'high'
sdf_school.loc[sdf_school['NAME'].str.contains('high', case=False), 'TYPE'] = 'High School'
# set SchoolType to 'college' if it contains 'college'
sdf_school.loc[sdf_school['NAME'].str.contains('college', case=False), 'TYPE'] = 'College'
# set SchoolType to 'other' if it it does not contain any of the above
sdf_school.loc[sdf_school['TYPE'].isnull(), 'TYPE'] = 'Elementary School'

In [None]:
# spatial join TAZs to School points
sdf_school_taz = sdf_school.spatial.join(sdf_taz, how='inner')
# group by TYPE and sum of Enrollment within TAZ 
sdf_school_taz_grouped = sdf_school_taz.groupby(['TYPE', 'TAZ']).agg(
                                                {'ENROLLMENT': 'sum'}).reset_index()
# unstack by TYPE as columns and TAZ as a column
sdf_school_taz_grouped_pivot = sdf_school_taz_grouped.pivot(index='TAZ', 
                                                            columns='TYPE', 
                                                            values='ENROLLMENT').reset_index()
# merge to sdf_taz to get all tazs
sdf_taz_school = pd.merge(sdf_taz, sdf_school_taz_grouped_pivot, how='left', on='TAZ')

# drop SHAPE column
sdf_taz_school = sdf_taz_school.drop(columns='SHAPE')
# fill NA with 0 for all rows
sdf_taz_school = sdf_taz_school.fillna(0)
# cast all fields to int
sdf_taz_school = sdf_taz_school.astype(int)
# rename columns
sdf_taz_school.rename(columns={'Elementary School':'elementary_school_enrollment',
                               'Middle School':'middle_school_enrollment',
                               'High School':'high_school_enrollment',
                               'College':'college_enrollment'}, inplace=True)

# export to csv
sdf_taz_school.to_csv(os.path.join('SchoolEnrollment.csv'), index=False)

## Socio Econ

In [None]:
#Get Census data
census_url = 'https://maps.trpa.org/server/rest/services/Demographics/MapServer/28'
df_census = get_fs_data(census_url)
df_census_2022 = df_census[(df_census['year_sample'] == 2022) & (df_census['sample_level'] == 'block group')]

In [None]:
# Get relevant census variables and calculate rates at block group level
# Get Occupancy Data - B25002_003E = Vacant, B25002_002E = Occupied , B25004_006E = Vacant Seasonal
occupancy_codes = ['B25002_003E','B25002_002E', 'B25004_006E']
df_census_occupancy = df_census_2022[df_census_2022['variable_code'].isin(occupancy_codes)]
df_census_occupancy = df_census_occupancy[['TRPAID', 'variable_code', 'value']]
# pivot to wide format so we can calculate percentages and totals
df_census_occupancy = df_census_occupancy.pivot(index='TRPAID', columns='variable_code', values='value').reset_index()
# vacant units + occupied units = total units
df_census_occupancy['total_units'] = df_census_occupancy['B25002_003E'] + df_census_occupancy['B25002_002E']
# occupancy rate = occupied units / total units
df_census_occupancy['occupancy_rate'] = df_census_occupancy['B25002_002E'] / df_census_occupancy['total_units']
# seasonal rate = seasonal units / total units
df_census_occupancy['seasonal_rate'] = df_census_occupancy['B25004_006E'] / df_census_occupancy['total_units']


In [None]:
# Get Household Size Data - B25010_001E = Total Households
df_census_household_size = df_census_2022[df_census_2022['variable_code'] == 'B25010_001E']
df_census_household_size = df_census_household_size[['TRPAID', 'variable_code', 'value']]
df_census_household_size = df_census_household_size.pivot(index='TRPAID', columns='variable_code', values='value').reset_index()
df_census_household_size['household_size'] = df_census_household_size['B25010_001E']

In [None]:
# List of Codes by the category they fall into - Census categroy to broader category
code_lookup = pd.read_csv('Lookup_Lists/occupancy_census_codes.csv')
#Filter census so only variable codes in the code lookup are included
df_census_income = df_census_2022[df_census_2022['variable_code'].isin(code_lookup['variable_code'])]
#Create a new column that has a value from code lookup based on the variable code
df_census_income['income_category'] = df_census_income['variable_code'].map(code_lookup.set_index('variable_code')['category'])
#group by block group and income category and sum the values
df_census_income = df_census_income.groupby(['TRPAID','income_category'])['value'].sum().reset_index()
df_census_income = df_census_income.pivot(index='TRPAID', columns='income_category', values='value').reset_index()

In [None]:
# TRPAID is a 16 digit ID, but it is imported as a float. Convert to string and to retain leading zeros
df_census_household_size['TRPAID']= df_census_household_size['TRPAID'].astype(str).str.zfill(16)
df_census_income['TRPAID']= df_census_income['TRPAID'].astype(str).str.zfill(16)
# merge all the census data together
df_census_occupancy_all = pd.merge(df_census_occupancy, df_census_household_size, on='TRPAID', how='left')
df_census_all = pd.merge(df_census_occupancy_all, df_census_income, on='TRPAID', how='left')
# rename columns of df_census_all
column_rename = {
    'B25002_003E': 'vacant_units',
    'B25002_002E': 'occupied_units',
    'B25004_006E': 'seasonal_units',
    'High Income': 'high_income',
    'Low Income': 'low_income',
    'Medium Income': 'middle_income',
}
df_census_all.rename(columns=column_rename, inplace=True)

df_census_all.drop(columns=['B25010_001E'], inplace=True)
# calculate proportions of income categories
df_census_all['high_income_proportion'] = df_census_all['high_income'] / df_census_all['occupied_units']
df_census_all['middle_income_proportion'] = df_census_all['middle_income'] / df_census_all['occupied_units']
df_census_all['low_income_proportion'] = df_census_all['low_income'] / df_census_all['occupied_units']

In [None]:
#Generate a block group level number of units and vhrs so we can get to adjusted seasonal rates
df_units_block_grouped = sdf_units_block.groupby('TRPAID').agg({'Residential_Units': 'sum'}).reset_index()
df_vhr_grouped     = sdf_vhr_block.groupby('TRPAID').agg({'APN': 'count'}).reset_index()
# merge the two dataframes
df_units_vhr = pd.merge(df_units_block_grouped, df_vhr_grouped, on='TRPAID', how='left')
#rename APN to VHR
df_units_vhr.rename(columns={'APN':'VHR'}, inplace=True)
# fill in missing values with 0
df_units_vhr = df_units_vhr.fillna(0)
df_units_vhr['non_vhr_units'] = df_units_vhr['Residential_Units'] - df_units_vhr['VHR']
# join this to the census data 
df_census_vhr = pd.merge(df_census_all, df_units_vhr, on='TRPAID', how='left')
# calculate the non-adjusted number of seasonal units and then subtract the number of VHRs
df_census_vhr['non_adjusted_seasonal_units'] = df_census_vhr['seasonal_rate']*df_census_vhr['Residential_Units']
df_census_vhr['adjusted_seasonal_units'] = df_census_vhr['non_adjusted_seasonal_units'] - df_census_vhr['VHR']
# Manually adjust the seasonal units for block group 3200500170022020 because of a lag in the data
# The census reports 100% occupancy but I think it has to do with the beach club development
df_census_vhr.loc[df_census_vhr['TRPAID'] == '3200500170022020', 'adjusted_seasonal_units'] = 0
# calculate the adjusted seasonal rate
df_census_vhr['adjusted_seasonal_rate'] = df_census_vhr['adjusted_seasonal_units'] / df_census_vhr['Residential_Units']
# rename a final database
df_census_final = df_census_vhr


In [None]:
# make a parcel layer with all of the census rates applied by merging df_census_final with sdf_units_block
sdf_units_census_values = pd.merge(sdf_units_block, df_census_final, on='TRPAID', how='left')
#This can eventually be grouped at the TAZ level or joined to the master parcel dataframe

## Employment 

In [None]:
# top line employment data for NV from 2018 lives here: ????
# we got employment data from NV at the Tahoe Basin level by NAICS code....

# get the employment data
nv_employ = pd.read_csv('')

In [None]:
# WE HAVE GOOD DATA FOR CASINO EMPLOYMENT on the South Shore ## 
# for employment data we have multiple years of CA EDD data
# california employment development department data for 2018 and 2022 was transformed to a feature class and spatial joined to TAZs and Block Group
# exported to a csv
# stacekd data by temporal scale
# grouped by TAZ and NAICS code, and summed employment
# F:\GIS\PROJECTS\ResearchAnalysis\Employment\Data\EDD_Grouped
# F:\GIS\PROJECTS\ResearchAnalysis\Employment\
# then looking at difference of total and trends over time (month-month) and year over year
#
# LODES data https://maps.trpa.org/server/rest/services/LTinfo_Climate_Resilience_Dashboard/MapServer/142

# compare 2018 to 2022 by block group 

# checking trends of each. 
#  - what is the trend of employment by NAICS code
#  - what is the trend of employment by TAZ
#  - what is the trend of employment by block group
#  - what is the trend of employment by zip code
# 
# CBP data for 2018 and 2022
# data is mostly in the service. or in Vector.sde>Census>Jobs
# look at comparisons of trends by same geography and temporal scale

# workflow is to get the data, clean it, join it to the spatial data, then group by the spatial data and sum the employment
# 
# establish trends for CA for the three datasources...compare the trends and see if they are similar
# 

### NAICS codes are one order higher in LODES data, CA EDD and CBP data have the same granularity of NAICS codes
### LODES is by year so the trend might be different if there is a sesaonal component to the data

# we'll have two of the three datasets analyzed for Nevada and all three in California.
    # where we have all three datasets we'll compare the trends and see if they are similar
    # we'll look at the trends for each dataset and see if they are similar
    # we'll look at the trends for each geography and see if they are similar
    # we'll look at the trends for each temporal scale and see if they are similar

# For Nevada we have block level data for 2018 so if consistent with 2022 we can use that as a proxy for 2022

# we subtract out any known employment from the 2018 data (e.g. Lakeside Inn) and compare the trends
# generate adjustment factors by sector and apply those adjustments to the 2018 data that was aggregated to the TAZ level.

## TAZ Summary

> Scaling Factor Data Engineering

> Needs

* Place.ai data by Jurisdiction ?

* Model day(s) weighting logic
    * will we need July data? 

* for quarterly data by zone
    * 

> Aggregations

## Basin Summary Category
* lodging occupancy rate
* campground occupancy rate
* house(VHR) rate
* seasonal rate
* lodging unit
* campground
* percentHouseSeasonal
* school enrollment
* employment
* residential unit
* total persons
* census occupancy rate
* low income res unit
* medium income res unit
* high income res unit
* total occupied unit
* persons per occupied unit


## Forecasts