## Intro

> Setup

In [2]:
import pandas as pd
import pathlib
import os
import arcpy
from utils import *
import numpy as np

pd.options.mode.copy_on_write = True
pd.options.mode.chained_assignment = None
pd.options.display.max_columns = 999
pd.options.display.max_rows = 999

In [4]:
# current working directory
local_path = pathlib.Path().absolute()
# set data path as a subfolder of the current working directory TravelDemandModel\2022\data\
data_dir = local_path.parents[0] / '2022/data'
# set workspace
arcpy.env.workspace = os.path.join(local_path, 'Workspace.gdb')
# overwrite true
arcpy.env.overwriteOutput = True
# Set spatial reference to NAD 1983 UTM Zone 10N
sr = arcpy.SpatialReference(26910)
# Set the extent environment using a feature class
arcpy.env.extent = "Tahoe_OccupancyRate_Zones"
# setup in momory workspace


In [5]:
# globals
final_schema = ['APN', 'Residential_Units', 'TouristAccommodation_Units', 'CommercialFloorArea_SqFt',
                'Lodging_Occupancy_Rate', 'PrimaryResidence_Rate', 'SecondaryResidence_Rate',
                'HighIncome_Rate',	'MediumIncome_Rate', 'LowIncome_Rate', 'PersonsPerUnit',
                'TAU_TYPE', 'VHR', 'BLOCK_GROUP', 'TAZ', 'OCCUPANCY_ZONE', 
                'JURISDICTION', 'COUNTY', 'OWNERSHIP_TYPE','EXISTING_LANDUSE', 'WITHIN_TRPA_BNDY', 
                'PARCEL_ACRES', 'PARCEL_SQFT', 'SHAPE']

> Get Data

In [6]:
# Get TAZ data
taz_url = 'https://maps.trpa.org/server/rest/services/Transportation_Planning/MapServer/6'
sdf_taz = get_fs_data_spatial(taz_url)
#set spatial reference
sdf_taz.spatial.sr = sr

# Get Unit Data
units_url = 'https://maps.trpa.org/server/rest/services/Existing_Development/MapServer/2'
sdf_units = get_fs_data_spatial_query(units_url, "Year = 2022")
sdf_units.spatial.sr = sr

# Get Block Group Data
block_groups_url = 'https://maps.trpa.org/server/rest/services/Demographics/MapServer/27'
sdf_block = get_fs_data_spatial(block_groups_url)
sdf_block = sdf_block.loc[(sdf_block['YEAR'] == 2020) & (sdf_block['GEOGRAPHY'] == 'Block Group')]
sdf_block.spatial.sr = sr

# Get VHR Data
vhr_url = 'https://maps.trpa.org/server/rest/services/VHR/MapServer/0'
sdf_vhr = get_fs_data_spatial(vhr_url)
sdf_vhr.spatial.sr = sr
# filter vhr layer to active status
sdf_vhr = sdf_vhr.loc[sdf_vhr['Status'] == 'Active']

census_url = 'https://maps.trpa.org/server/rest/services/Demographics/MapServer/28'
df_census = get_fs_data(census_url)
df_census_2022 = df_census.loc[(df_census['year_sample'] == 2022) & (df_census['sample_level'] == 'block group')]

# Get the data - should be 18 campgrounds
campground_url = 'https://maps.trpa.org/server/rest/services/Recreation/MapServer/1'
sdf_campground =  get_fs_data_spatial_query(campground_url, "RECREATION_TYPE='Campground'")

# campground occupancy rate data
dfCamp = pd.read_csv(os.path.join(data_dir,'Campground_Visitation.csv'))
dfCamp_2022 = dfCamp.loc[dfCamp['Year'] == 2022]

# occupancy feature class as a spatial dataframe
sdf_occ = pd.DataFrame.spatial.from_featureclass("Tahoe_OccupancyRate_Zones")
sdf_occ.spatial.sr = sr

# get table from geodatabase
# occupancy_rate = pd.DataFrame.spatial.from_table("OccupancyRates")
occupancy_rate = pd.read_csv(os.path.join(data_dir, 'Occupancy_Rate_Export_JS.csv'))

# Get School Enrollment data
school_url_table     = 'https://maps.trpa.org/server/rest/services/Demographics/MapServer/32'
df_school_enrollment = get_fs_data(school_url_table)

# Get School Enrollment data - spatial
school_url_spatial    = 'https://maps.trpa.org/server/rest/services/Datadownloader_PlanningandJurisdictions/MapServer/14'
sdf_school            =  get_fs_data_spatial(school_url_spatial)
sdf_school.spatial.sr = sr

> Future Utils

In [36]:
# function to do a spatial join and
# map values from the source to the target
def spatial_join_map(target, source, join_field,  map_field, target_field):
    # spatial join
    arcpy.SpatialJoin_analysis(target, source, 'memory\\temp', 
                               'JOIN_ONE_TO_ONE', 'KEEP_ALL','HAVE_THEIR_CENTER_IN')
    # get result as a spatial dataframe
    join = pd.DataFrame.spatial.from_featureclass('memory\\temp')
    join.info()
    # map values
    target[target_field] = target[join_field].map(dict(zip(join[join_field], join[map_field])))
    return target

# check for duplicates
def check_dupes(df, col):
    df['is_duplicate'] = df.duplicated(subset=col, keep=False)
    df.is_duplicate.value_counts()
    df.loc[df['is_duplicate'] == True]
    df = df.drop_duplicates(subset=col, keep='first', inplace=True)
    return df[df.duplicated([col], keep=False)]

# check if field exists in data frame and final_schema and if not add it
def check_field(df, fields):
    for field in fields:
        if field not in df.columns:
            df[field] = np.nan
    return df



> general spatial joins

In [7]:
# spatial join to get TAZ
arcpy.SpatialJoin_analysis(sdf_units, sdf_taz, "Existing_Development_TAZ", 
                           "JOIN_ONE_TO_ONE", "KEEP_ALL", "", "HAVE_THEIR_CENTER_IN")
# spatial join to get Block Group
arcpy.SpatialJoin_analysis(sdf_units, sdf_block, "Existing_Development_BlockGroup", 
                           "JOIN_ONE_TO_ONE", "KEEP_ALL", "", "HAVE_THEIR_CENTER_IN")
# spatail join to get Occupancy Rate Zone
arcpy.SpatialJoin_analysis(sdf_units, sdf_occ, "Existing_Development_OccupancyZone", 
                           "JOIN_ONE_TO_ONE", "KEEP_ALL", "", "HAVE_THEIR_CENTER_IN")

'c:\\Users\\mbindl\\Documents\\GitHub\\Transportation\\TravelDemandModel\\data_engineering\\Workspace.gdb\\Existing_Development_OccupancyZone'

In [8]:
# List of Parcels APN with TAU Types
tau_lookup = pd.read_csv('Lookup_Lists/lookup_tau_type.csv')

sdfUnits = check_field(sdf_units, final_schema)

# merge parcel 2022 with parcel VHR
df = sdfUnits.merge(sdf_vhr, on='APN', how='left', indicator=True)

# calculate VHR = Yes if VHR is in the parcel
df['VHR'] = 'No'
df.loc[df['_merge'] == 'both', 'VHR'] = 'Yes'

# setup TAU_Type
df['TAU_TYPE'] = 'N/A'

# filter parcels so only APNs in the lookup are included
dfTAU = df[df['APN'].isin(tau_lookup['APN'])]
# get TAU_Type from lookup
dfTAU['TAU_TYPE'] = dfTAU['APN'].map(tau_lookup.set_index('APN')['TAU_Type'])

# any row with ToursitAccommodation_Units > 0 and TAU_Type is null, set TAU_Type to 'HotelMotel'
df.loc[(df['TouristAccommodation_Units'] > 0) & (df['TAU_TYPE']=='N/A'), 'TAU_TYPE'] = 'HotelMotel'
# for the rows in df that match rows by APN in dfTAU set TAU_Type to the value in dfTAU
df.loc[df['APN'].isin(dfTAU['APN']), 'TAU_TYPE'] = dfTAU['TAU_TYPE']

# remove _x from column names
df.columns = df.columns.str.replace('_x', '')

# get results of spatial joins as spatial dataframes
sdf_units_taz   = pd.DataFrame.spatial.from_featureclass("Existing_Development_TAZ", sr=sr)  
sdf_units_block = pd.DataFrame.spatial.from_featureclass("Existing_Development_BlockGroup", sr=sr)
sdf_units_occ   = pd.DataFrame.spatial.from_featureclass("Existing_Development_OccupancyZone", sr=sr)

### add logic to handle VHR="Yes"&JURISDICTION='CSLT' == OCCUPANCY_ZONE = CSLT_All elseif JURISDICTION='CSLT' == OCCUPANCY_ZONE = CSLT_Zone1 etc...

# map dictionary to sdf_units dataframe to fill in TAZ and Block Group fields
df['TAZ']           = df.APN.map(dict(zip(sdf_units_taz.APN,   sdf_units_taz.TAZ)))
df['BLOCK_GROUP']   = df.APN.map(dict(zip(sdf_units_block.APN, sdf_units_block.TRPAID)))
df['OCCUPANCY_ZONE']= df.APN.map(dict(zip(sdf_units_occ.APN,   sdf_units_occ.OccupancyRate_ZoneID)))

# columns to keep
df = df[final_schema]
df.info()

# export to feature class
# df.spatial.to_featureclass(location=os.path.join(arcpy.env.workspace, 'SDF'), sanitize_columns=False)

NameError: name 'check_field' is not defined

## Occupancy Rates

##### Notes
* get interopolated values weighted after spatial join
* merge by apn

In [None]:
# get table from geodatabase
# occupancy_rate = pd.DataFrame.spatial.from_table("OccupancyRates")
occupancy_rate = pd.read_csv(os.path.join(data_dir, 'Occupancy_Rate_Export.csv'))
df = occupancy_rate
df.info()

> Filter Occupancy Rate table to Timeframe and Room Type, Merge with Occupancy Zone Feature Class, and Export to Feature Class

In [None]:
dfOccTAU = dfOut.loc[dfOut['RoomType'] == 'TAU']
dfOccVHR = dfOut.loc[dfOut['RoomType'] == 'VHR']

# merge occupancy rate data to occupancy zones
sdf = pd.merge(sdf_occ, dfOccTAU, left_on='OccupancyRate_ZoneID', right_on='Zone_ID', how='left')
# export sdf to feature class
sdf.spatial.to_featureclass(location=os.path.join('Workspace.gdb', 'OccupancyRate_Zones_TAU'), overwrite=True)

# merge occupancy rate data to occupancy zones
sdf = pd.merge(sdf_occ, dfOccVHR, left_on='OccupancyRate_ZoneID', right_on='Zone_ID', how='left')
# export sdf to feature class
sdf.spatial.to_featureclass(location=os.path.join('Workspace.gdb', 'OccupancyRate_Zones_VHR'), overwrite=True)

> Join TAU and VHR parcels to respective occupancy rate zones

In [None]:
# filter rows where VHR = Yes
dfVHR = df.loc[df['VHR'] == 'Yes']
# filter rows where TUA > 0
dfTAU = df.loc[df['TouristAccommodation_Units'] > 0]

# spatial join VHR to occupancy rate zones with VHR values
spjoin_vhr = arcpy.analysis.SpatialJoin(dfVHR, "OccupancyRate_Zones_VHR", "OccupancyRate_Zones_VHR_Parcels", 
                                        "JOIN_ONE_TO_ONE", "KEEP_ALL", None, "INTERSECT", None, None)

# spatial join VHR to occupancy rate zones with VHR values
spjoin_tau = arcpy.analysis.SpatialJoin(dfTAU, "OccupancyRate_Zones_TAU", "OccupancyRate_Zones_TAU_Parcels", 
                                        "JOIN_ONE_TO_ONE", "KEEP_ALL", None, "INTERSECT", None, None)

# feature to point in memmory
vhr_points = arcpy.management.FeatureToPoint("OccupancyRate_Zones_VHR_Parcels", r"memory/vhr_points", "INSIDE")
tau_points = arcpy.management.FeatureToPoint("OccupancyRate_Zones_TAU_Parcels", r"memory/tau_points", "INSIDE")

> Fill in parcel level missing occupancy rates with interpolated values

In [None]:
# spatial join occupancy rate polygons to parcels with missing values
# first i need to fill in the known occupancy rate values
# then get parcels with a vhr or tau and fill in with missing values
# or should i fill in the zone data with interpolated values...?
# get parcels with missing values
vhr_missing_occ = dfVHR.loc[dfVHR['OCCUPANCY_ZONE'].isnull()]
tau_missing_occ = dfTAU.loc[dfTAU['OCCUPANCY_ZONE'].isnull()]

# spatial join VHR to occupancy rate interpolation polygons
spjoin_vhr_missing = arcpy.analysis.SpatialJoin(sdf_units_2022_vhr_missing, "vhr_occupancy_rate_poly", "OccupancyRate_Zones_VHR_Parcels_Missing", 
                                        "JOIN_ONE_TO_ONE", "KEEP_ALL", None, "INTERSECT", None, None)
# spatial join TAU to occupancy rate interpolation polygons
spjoin_tau_missing = arcpy.analysis.SpatialJoin(sdf_units_2022_tau_missing, "tau_occupancy_rate_poly", "OccupancyRate_Zones_TAU_Parcels_Missing", 
                                        "JOIN_ONE_TO_ONE", "KEEP_ALL", None, "INTERSECT", None, None)

# merge to original data
sdf_units_2022_vhr = pd.concat([spjoin_vhr, spjoin_vhr_missing])
sdf_units_2022_tau = pd.concat([spjoin_tau, spjoin_tau_missing])

# fill in missing report_occ_rate values
sdf_units_2022_vhr['report_occ_rate_x'] = sdf_units_2022_vhr['report_occ_rate_y']
sdf_units_2022_tau['report_occ_rate_x'] = sdf_units_2022_tau['report_occ_rate_y']

In [None]:
columns_to_keep = ['APN', 'Residential_Units', 'TouristAccommodation_Units',
                    'CommercialFloorArea_SqFt', 'YEAR',
                    'JURISDICTION', 'COUNTY', 'OWNERSHIP_TYPE',
                    'EXISTING_LANDUSE', 'TAZ', 'TAU_Type','VHR',
                    'WITHIN_TRPA_BNDY', 'PARCEL_ACRES', 'PARCEL_SQFT', 'SHAPE',
                    'report_occ_rate_x']

# remove _x from column names
sdf_units_2022_vhr.columns = sdf_units_2022_vhr.columns.str.replace('_x', '')

In [None]:
# get the occupancy rate featuer class as spatial dataframe
sdf_occ_rate_tau = pd.DataFrame.spatial.from_featureclass("OccupancyRate_Zones_TAU_Parcels")
# get the occupancy rate featuer class as spatial dataframe
sdf_occ_rate_vhr = pd.DataFrame.spatial.from_featureclass("OccupancyRate_Zones_VHR_Parcels")

In [None]:
# filter sdf_units_2022 to rows where TAU is null or 0
sdf_units_2022_no_tau = sdf_occ_rate_tau.loc[(sdf_occ_rate_tau['report_occ_rate'] == 0)|(sdf_occ_rate_tau['report_occ_rate'].isnull())]

# filter sdf_units_2022 to rows where VHR is null or 0
sdf_units_2022_no_vhr = sdf_occ_rate_vhr.loc[(sdf_occ_rate_vhr['VHR'] == 'Yes')&(sdf_occ_rate_vhr['report_occ_rate'] == 0)|(sdf_occ_rate_vhr['report_occ_rate'].isnull())]

In [None]:
# spatial join to occupancy rate raster
sdf_units_2022_no_tau = pd.DataFrame.spatial.from_featureclass("OccupancyRate_Zones_TAU_Parcels")
sdf_units_2022_no_vhr = pd.DataFrame.spatial.from_featureclass("OccupancyRate_Zones_VHR_Parcels")

# merge occupancy rate data to occupancy zones


> Generate Spatial Interpolated Occupancy Rate Surfaces

In [None]:
# Set the extent environment using a feature class
arcpy.env.extent = "OccupancyRate_Zones_TAU"

# make feature layer from in memory points
arcpy.management.MakeFeatureLayer("in_memory/vhr_points", "vhr_points_lyr")
arcpy.management.MakeFeatureLayer("in_memory/tau_points", "tau_points_lyr")
# filter out rows where occupancy rate is null or 0
arcpy.management.SelectLayerByAttribute("vhr_points_lyr", "NEW_SELECTION", "report_occ_rate IS NOT NULL OR report_occ_rate > 0")
arcpy.management.SelectLayerByAttribute("tau_points_lyr", "NEW_SELECTION", "report_occ_rate IS NOT NULL OR report_occ_rate > 0")

# set the output cell size
cell_size = 30
# set the power parameter
power = 2
# set the search radius
search_radius = 10000

# set the output raster
out_raster = 'tau_occupancy_rate'
# run the IDW for TAUs
arcpy.sa.Idw("tau_points_lyr", 
             z_field='report_occ_rate', 
             cell_size=cell_size, 
             power=power, 
             search_radius=search_radius).save(out_raster)

# set the output raster
out_raster = 'vhr_occupancy_rate'
# run the IDW for VHRs
arcpy.sa.Idw("vhr_points_lyr", 
             z_field='report_occ_rate', 
             cell_size=cell_size, 
             power=power, 
             search_radius=search_radius).save(out_raster)

In [None]:
# interpolate between 
arcpy.sa.Sample(
    in_rasters="tau_occupancy_rate",
    in_location_data="OccupancyRate_Zones_TAU_Parcels",
    out_table="tau_interpolated_occupancy_rate",
    resampling_type="BILINEAR",
    unique_id_field="OBJECTID",
    process_as_multidimensional="CURRENT_SLICE",
    acquisition_definition=None,
    statistics_type="MEAN",
    percentile_value=None,
    buffer_distance=None,
    layout="ROW_WISE",
    generate_feature_class="FEATURE_CLASS"
)
# interpolate VHR occupancy rate
arcpy.sa.Sample(
    in_rasters="vhr_occupancy_rate",
    in_location_data="OccupancyRate_Zones_VHR_Parcels",
    out_table="vhr_interpolated_occupancy_rate",
    resampling_type="BILINEAR",
    unique_id_field="OBJECTID",
    process_as_multidimensional="CURRENT_SLICE",
    acquisition_definition=None,
    statistics_type="MEAN",
    percentile_value=None,
    buffer_distance=None,
    layout="ROW_WISE",
    generate_feature_class="FEATURE_CLASS"
)

# spatial join null parcels with raster interpolated values
vhr_null = arcpy.management.MakeFeatureLayer("OccupancyRate_Zones_VHR_Parcels", "vhr_null_lyr")
tau_null = arcpy.management.MakeFeatureLayer("OccupancyRate_Zones_TAU_Parcels", "tau_null_lyr")

arcpy.management.SelectLayerByAttribute("vhr_points_lyr", "NEW_SELECTION", "report_occ_rate IS NULL OR report_occ_rate = 0")
arcpy.management.SelectLayerByAttribute("tau_points_lyr", "NEW_SELECTION", "report_occ_rate IS NULL OR report_occ_rate = 0")

# spatial join
arcpy.analysis.SpatialJoin("vhr_null_lyr", "vhr_interpolated_occupancy_rate", "OccupancyRate_Zones_VHR_Parcels_Null", 
                           "JOIN_ONE_TO_ONE", "KEEP_ALL", None, "INTERSECT", None, None)
arcpy.analysis.SpatialJoin("tau_null_lyr", "tau_interpolated_occupancy_rate", "OccupancyRate_Zones_TAU_Parcels_Null",
                            "JOIN_ONE_TO_ONE", "KEEP_ALL", None, "INTERSECT", None, None)

# get spatial enabled dataframes
sdf_vhr_null = pd.DataFrame.spatial.from_featureclass("OccupancyRate_Zones_VHR_Parcels_Null")
sdf_tau_null = pd.DataFrame.spatial.from_featureclass("OccupancyRate_Zones_TAU_Parcels_Null")

# merge null values with original data
sdf_vhr_null = pd.merge(sdf_vhr_null, sdf_units_2022_vhr, on='APN', how='left')
sdf_tau_null = pd.merge(sdf_tau_null, sdf_units_2022_tau, on='APN', how='left')

# set occupancy rate to the interpolated value
sdf_vhr_null['Lodging_Occupancy_Rate'] = sdf_vhr_null['MEAN']
sdf_tau_null['Lodging_Occupancy_Rate'] = sdf_tau_null['MEAN']

In [None]:
# get rows of missing values for report_occ_rate
dfTau = pd.DataFrame.spatial.from_featureclass("tau_interpolated_occupancy_rate")
dfTau.spatial.sr = sr
dfTau = dfTau.loc[dfTau['report_occ_rate'].isnull()]
# get rows of missing values for report_occ_rate
dfVhr = pd.DataFrame.spatial.from_featureclass("vhr_interpolated_occupancy_rate")
dfVhr.spatial.sr = sr
dfVhr = dfVhr.loc[dfVhr['report_occ_rate'].isnull()]

# spatial join
spjoin_tau = arcpy.analysis.SpatialJoin(sdf_units, "tau_interpolated_occupancy_rate", "OccupancyRate_Zones_TAU_Parcels", 
                                        "JOIN_ONE_TO_ONE", "KEEP_ALL", None, "INTERSECT", None, None)
spjoin_vhr = arcpy.analysis.SpatialJoin(sdf_units, "vhr_interpolated_occupancy_rate", "OccupancyRate_Zones_VHR_Parcels",
                                        "JOIN_ONE_TO_ONE", "KEEP_ALL", None, "INTERSECT", None, None)

# move values
sdf_units.loc[sdf_units['APN'].isin(spjoin_tau['APN']), 'Lodging_Occupancy_Rate'] = spjoin_tau['report_occ_rate']
sdf_units.loc[sdf_units['APN'].isin(spjoin_vhr['APN']), 'Lodging_Occupancy_Rate'] = spjoin_vhr['report_occ_rate']


> Calculate Weighted Occupance Rate

In [None]:
# For Washoe County

# if the Zone_ID is Washoe County set Report_OccRate to Report_RoomsRented by Report _RoomsAvailable
df.loc[df['Zone_ID'] == 'Washoe County', 'Report_OccRate'] = df['Report_RoomsRented']/df['Report_RoomsAvailable']

# if the Zone_ID is Washoe County and Temporal_Scale is Quarterly /3 and then add that to the three rows where the Zone_ID is Washoe County and Temporal_Scale is Monthly
df['ExtraUnits']   = 0
df.loc[(df['Zone_ID'] == 'Washoe County') & (df['Temporal_Scale'] == 'Quarterly'), 'ExtraUnits'] = df['Report_RoomsRented']/3
df.loc[(df['Zone_ID'] == 'Washoe County') & (df['Temporal_Scale'] == 'Monthly'), 'Report_RoomsAvailable'] = df['Report_RoomsRented'] + df['ExtraUnits']

# drop row where the Zone_ID is Washoe County and Temporal_Scale is Quarterly
df = df.loc[~((df['Zone_ID'] == 'Washoe County') & (df['Temporal_Scale'] == 'Quarterly'))]

# drop ExtraUnits column
df.drop(columns=['ExtraUnits'], inplace=True)






In [9]:
# df
df = occupancy_rate

# filter to columns 
columns = ['Zone_ID', 'Period', 'RoomType', 'Report_OccRate','TRPA_OccRate']

# dictinary to convert the time frames to make things cleaner
timeframe_dict = {
    '2022-06-01': 'June',
    '2022-08-01': 'August',
    '2022-09-01': 'September',
    'Q4 21-22'  : 'April-June',
    'Q1 22-23'  : 'July-September',
    'Q2 2022'   : 'April-June',
    'Q3 2022'   : 'July-September'
}

# Define the weights for each month based on the number of days they contribute
weights = {
    'June'          : 8/20,
    'August'        : 3/20,
    'September'     : 9/20,
    'April-June'    : 8/20,
    'July-September': 12/20
}

# Period field based on Timeframe and timeframe_dict
df['Period'] = df['Timeframe'].map(timeframe_dict)

# calculate the weighted occupancy rates
for key,value in weights.items():
    # Apply weights to the occupancy rates
    df.loc[df['Period'] == key, 'TRPA_OccRate'] = df['Report_OccRate'] * value

# Calculate RoomsRentedPerDay based on the period
df['RoomsRentedPerDay'] = df.apply(lambda row: row['Report_RoomsRented'] / 30 if row['Period'] in ['June', 'September'] else
                                   (row['Report_RoomsRented'] / 31 if row['Period'] == 'August' else
                                    (row['Report_RoomsRented'] / 91 if row['Period'] == 'April-June' else
                                     (row['Report_RoomsRented'] / 92 if row['Period'] == 'July-September' else 0))), axis=1).fillna(0).astype(int)

# filter by Temporal_Scale
df_monthly   = df.loc[df['Temporal_Scale'] == 'Monthly']
df_quarterly = df.loc[df['Temporal_Scale'] == 'Quarterly']

# group by for montthly and quarterly and mean for Report_OccRate and sum for TRPA_OccRate
dfMonthly   = df_monthly.groupby(['Zone_ID', 'RoomType', 'Temporal_Scale']).agg({'RoomsRentedPerDay': 'mean','Report_RoomsAvailable':'sum',
                                                                                 'Report_RoomsRented':'sum', 'Report_OccRate': 'mean', 
                                                                                 'TRPA_OccRate': 'sum'}).reset_index()

dfQuarterly = df_quarterly.groupby(['Zone_ID', 'RoomType', 'Temporal_Scale']).agg({'RoomsRentedPerDay': 'mean','Report_RoomsAvailable':'sum',
                                                                                   'Report_RoomsRented':'sum', 'Report_OccRate': 'mean', 
                                                                                   'TRPA_OccRate': 'sum'}).reset_index()

# concat the two dataframe
dfOut = pd.concat([dfMonthly, dfQuarterly])

# cast RoomsRentedPerDay as int 
dfOut['RoomsRentedPerDay'] = dfOut['RoomsRentedPerDay'].astype(int)

In [12]:
dfOut

Unnamed: 0,Zone_ID,RoomType,Temporal_Scale,RoomsRentedPerDay,Report_RoomsAvailable,Report_RoomsRented,Report_OccRate,TRPA_OccRate
0,CSLT_ALL,VHR,Monthly,169,28695,15457,0.538273,0.5258
1,CSLT_Zone1,HotelMotel,Monthly,1386,320069,126244,0.394281,0.387592
2,CSLT_Zone2,HotelMotel,Monthly,433,110877,39481,0.357057,0.347694
3,CSLT_Zone3,HotelMotel,Monthly,205,48412,18705,0.385772,0.369246
4,CSLT_Zone4,HotelMotel,Monthly,62,35887,5746,0.16012,0.16099
5,CSLT_Zone5,HotelMotel,Monthly,49,22113,4545,0.205531,0.203785
6,Rest of Douglas County,HotelMotel,Monthly,481,189918,44173,0.788152,0.780477
7,Rest of Douglas County,VHR,Monthly,82,21485,7586,0.446855,0.429239
8,Rest of El Dorado County,VHR,Monthly,0,0,0,0.538273,0.5258
9,Stateline Casino Core,Casino,Monthly,1731,199937,157602,0.788152,0.780477


In [10]:
# filter RoomType to HotelMoteland Casino
df1 = dfOut.loc[dfOut['RoomType'].isin(['VHR'])]
df1.RoomsRentedPerDay.sum()

1397

In [11]:
# filter RoomType to VHR
df2 = dfOut.loc[~dfOut['RoomType'].isin(['VHR'])] 
df2.RoomsRentedPerDay.sum()

5004

> Campground Occupancy

In [None]:
# merge campground data with occupancy rate data on campground name
sdf_campground = sdf_campground.merge(dfCamp_2022, left_on='RECREATION_NAME', right_on='Campground', 
                                      how='left', indicator=True)

# keep only columns of interest
sdf_campground = sdf_campground[['RECREATION_NAME', 'Occupancy_Rate','SHAPE']]

# filter sdf_campground to only campgrounds with occupancy rate data
sdf_campground = sdf_campground[sdf_campground['Occupancy_Rate'].notnull()]

# IDW to get the occupancy rate for each campground
# set the output cell size
cell_size = 500
# set the power parameter
power = 2
# set the search radius
search_radius = 5000
# set the output raster
out_raster = 'campground_occupancy_rate'
# run the IDW
arcpy.sa.Idw(in_features=sdf_campground, 
             z_field='Occupancy_Rate', 
             cell_size=cell_size, 
             power=power, 
             search_radius=search_radius).save(out_raster)

# spatial join to campground points with NaN occupancy rate
sdf_campground_nan = sdf_campground[sdf_campground['Occupancy_Rate'].isnull()]
# spatial join to campground points with NaN occupancy rate


In [None]:
# merge campground data with occupancy rate data on campground name
sdf_campground = sdf_campground.merge(dfCamp_2022, left_on='RECREATION_NAME', right_on='Campground', 
                                      how='left', indicator=True)

# spatial join TAZ data to campground data
arcpy.SpatialJoin_analysis(sdf_campground, sdf_taz, 'taz_campground', 
                           'JOIN_ONE_TO_ONE', 'KEEP_ALL', 
                           match_option='HAVE_THEIR_CENTER_IN')

# read in output of spatial join as sdf
sdf_campground_taz = pd.DataFrame.spatial.from_featureclass('taz_campground')

# get sites sold by multiplying the number of sites by the occupancy rate
sdf_campground_taz['SitesSold'] = sdf_campground_taz['Total_Sites'] * sdf_campground_taz['Occupancy_Rate']

# group by TAZ and sum of sites sold within TAZ
sdf_campground_taz_grouped = sdf_campground_taz.groupby('TAZ').agg(
                                                {'SitesSold': 'sum'}).reset_index()


In [None]:
# apply weighting by model days (same as occupancy zone rate weighting)




## Overnight Visitation

In [15]:
sdf_units.groupby('TAZ','TAU_TYPE').agg({'TouristAccommodation_Units': 'sum'}).reset_index()

ValueError: No axis named TAU_TYPE for object type DataFrame

In [None]:
# TAUs 6190 were rented on model day....2,200 VHRs were rented on model day

# get the number of occupied units by multiplying the number of units by the occupancy rate

# multiply the number of TAUs by the occupancy rate then add up by zone? or by TAZ?

# 

In [None]:
# 

## School Enrollment

In [None]:
# set Type to Null
sdf_school['TYPE'] = None
# set SchoolType to 'elementary' if it contains 'elementary' or 'magnet' or 'academy'
sdf_school.loc[sdf_school['NAME'].str.contains('elementary', case=False), 'TYPE'] = 'Elementary School'
# set SchoolType to 'middle' if it contains 'middle'
sdf_school.loc[sdf_school['NAME'].str.contains('middle', case=False), 'TYPE'] = 'Middle School'
# set SchoolType to 'high' if it contains 'high'
sdf_school.loc[sdf_school['NAME'].str.contains('high', case=False), 'TYPE'] = 'High School'
# set SchoolType to 'college' if it contains 'college'
sdf_school.loc[sdf_school['NAME'].str.contains('college', case=False), 'TYPE'] = 'College'
# set SchoolType to 'other' if it it does not contain any of the above
sdf_school.loc[sdf_school['TYPE'].isnull(), 'TYPE'] = 'Elementary School'

In [None]:
# spatial join TAZs to School points
sdf_school_taz = sdf_school.spatial.join(sdf_taz, how='inner')
# group by TYPE and sum of Enrollment within TAZ 
sdf_school_taz_grouped = sdf_school_taz.groupby(['TYPE', 'TAZ']).agg(
                                                {'ENROLLMENT': 'sum'}).reset_index()
# unstack by TYPE as columns and TAZ as a column
sdf_school_taz_grouped_pivot = sdf_school_taz_grouped.pivot(index='TAZ', 
                                                            columns='TYPE', 
                                                            values='ENROLLMENT').reset_index()
# merge to sdf_taz to get all tazs
sdf_taz_school = pd.merge(sdf_taz, sdf_school_taz_grouped_pivot, how='left', on='TAZ')

# drop SHAPE column
sdf_taz_school = sdf_taz_school.drop(columns='SHAPE')
# fill NA with 0 for all rows
sdf_taz_school = sdf_taz_school.fillna(0)
# cast all fields to int
sdf_taz_school = sdf_taz_school.astype(int)
# rename columns
sdf_taz_school.rename(columns={'Elementary School':'elementary_school_enrollment',
                               'Middle School':'middle_school_enrollment',
                               'High School':'high_school_enrollment',
                               'College':'college_enrollment'}, inplace=True)

# export to csv
sdf_taz_school.to_csv(os.path.join('SchoolEnrollment.csv'), index=False)

## Socio Econ

> 

## Employment 

In [None]:
# top line employment data for NV from 2018 lives here: ????
# we got employment data from NV at the Tahoe Basin level by NAICS code....

# get the employment data
nv_employ = pd.read_csv('')

In [None]:
# WE HAVE GOOD DATA FOR CASINO EMPLOYMENT on the South Shore ## 
# for employment data we have multiple years of CA EDD data
# california employment development department data for 2018 and 2022 was transformed to a feature class and spatial joined to TAZs and Block Group
# exported to a csv
# stacekd data by temporal scale
# grouped by TAZ and NAICS code, and summed employment
# F:\GIS\PROJECTS\ResearchAnalysis\Employment\Data\EDD_Grouped
# F:\GIS\PROJECTS\ResearchAnalysis\Employment\
# then looking at difference of total and trends over time (month-month) and year over year
#
# LODES data https://maps.trpa.org/server/rest/services/LTinfo_Climate_Resilience_Dashboard/MapServer/142

# compare 2018 to 2022 by block group 

# checking trends of each. 
#  - what is the trend of employment by NAICS code
#  - what is the trend of employment by TAZ
#  - what is the trend of employment by block group
#  - what is the trend of employment by zip code
# 
# CBP data for 2018 and 2022
# data is mostly in the service. or in Vector.sde>Census>Jobs
# look at comparisons of trends by same geography and temporal scale

# workflow is to get the data, clean it, join it to the spatial data, then group by the spatial data and sum the employment
# 
# establish trends for CA for the three datasources...compare the trends and see if they are similar
# 

### NAICS codes are one order higher in LODES data, CA EDD and CBP data have the same granularity of NAICS codes
### LODES is by year so the trend might be different if there is a sesaonal component to the data

# we'll have two of the three datasets analyzed for Nevada and all three in California.
    # where we have all three datasets we'll compare the trends and see if they are similar
    # we'll look at the trends for each dataset and see if they are similar
    # we'll look at the trends for each geography and see if they are similar
    # we'll look at the trends for each temporal scale and see if they are similar

# For Nevada we have block level data for 2018 so if consistent with 2022 we can use that as a proxy for 2022

# we subtract out any known employment from the 2018 data (e.g. Lakeside Inn) and compare the trends
# generate adjustment factors by sector and apply those adjustments to the 2018 data that was aggregated to the TAZ level.


## TAZ Summary

> Scaling Factor Data Engineering

> Needs

* Place.ai data by Jurisdiction ?

* Model day(s) weighting logic
    * will we need July data? 

* for quarterly data by zone
    * 

> Aggregations

## Forecasts