In [5]:
import pandas as pd
from utils import *
import re

In [3]:
property_data = pd.read_csv('data/Tahoe_Regional_Planning_Agency_property_file_v3_2025_02_25_00_32_01.csv')
monthly_data = pd.read_csv('data/Tahoe_Regional_Planning_Agency_monthly_file_v3_2025_02_25_00_32_06.csv')

In [6]:
def clean_column_names(df):
    df.columns = [
        re.sub(r"[^a-zA-Z0-9_]", "_", col)  # Replace invalid characters with '_'
        .lstrip("0123456789")  # Ensure column names don't start with a number
        for col in df.columns
    ]
    return df

In [None]:
tahoe_boundary  = get_fs_data_spatial('https://maps.trpa.org/server/rest/services/Boundaries/FeatureServer/4')

In [None]:
# create a feature layer in memory from the property data
import arcpy
import re
def clean_column_names(df):
    df.columns = [
        re.sub(r"[^a-zA-Z0-9_]", "_", col)  # Replace invalid characters with '_'
        .lstrip("0123456789")  # Ensure column names don't start with a number
        for col in df.columns
    ]
    return df
arcpy.management.Delete(r"in_memory")
# Create an in-memory feature class
arcpy.env.workspace = "in_memory"
feature_class = "property_data"
property_data = clean_column_names(property_data)
# Define the spatial reference (WGS 1984)
spatial_reference = arcpy.SpatialReference(4326)

# Create the feature class with a POINT geometry type
arcpy.management.CreateFeatureclass(arcpy.env.workspace, feature_class, "POINT", spatial_reference=spatial_reference)

# Dynamically add fields based on the columns in the property data DataFrame
for column in property_data.columns:
    if column not in ['Latitude', 'Longitude']:
        field_type = "TEXT" if property_data[column].dtype == object else "DOUBLE"
        arcpy.management.AddField(feature_class, column, field_type)

# Insert the property data into the feature class
with arcpy.da.InsertCursor(feature_class, ["SHAPE@XY"] + [column for column in property_data.columns if column not in ['Latitude', 'Longitude']]) as cursor:
    for _, row in property_data.iterrows():
        point = (row['Longitude'], row['Latitude'])
        row_values = [point] + [row[column] for column in property_data.columns if column not in ['Latitude', 'Longitude']]
        cursor.insertRow(row_values)

# Create a feature layer from the in-memory feature class
feature_layer = "property_layer"
arcpy.management.MakeFeatureLayer(feature_class, feature_layer)

# Print the feature layer name
print(f"Feature layer created: {feature_layer}")

In [None]:
desc = arcpy.Describe(feature_layer)
for field in desc.fields:
    print(f"Name: {field.name}, Type: {field.type}, Length: {field.length}")

In [None]:
arcpy.management.AddField(feature_layer, "Property_ID_Fixed", "TEXT", field_length=500)
arcpy.management.CalculateField(feature_layer, "Property_ID_Fixed", "!Property_ID!", "PYTHON3")
arcpy.management.DeleteField(feature_layer, ["Property_ID"])
arcpy.management.AlterField(feature_layer, "Property_ID_Fixed", new_field_name="Property_ID")


In [None]:


# use the feature layer to select the properties that are within the Tahoe boundary
arcpy.management.SelectLayerByLocation(feature_layer, "INTERSECT", tahoe_boundary)
#write the selected properties to a new feature class
arcpy.management.CopyFeatures(feature_layer, "in_memory/properties_in_tahoe_1")

In [None]:

import numpy as np

fc = "in_memory/properties_in_tahoe_1"

if arcpy.Exists(fc):
    # Exclude geometry fields and any complex types
    fields = [f.name for f in arcpy.ListFields(fc) if f.type not in ("Geometry", "Raster")]

    # Convert feature class to a NumPy array
    arr = arcpy.da.FeatureClassToNumPyArray(fc, fields)

    # Flatten structured NumPy array before creating DataFrame
    properties_in_tahoe = pd.DataFrame(arr.tolist(), columns=arr.dtype.names)

    print(properties_in_tahoe.head())  # Preview the first few rows
else:
    print("Feature class not found in memory!")


In [7]:
#join the monthly data to the property data
monthly_data = clean_column_names(monthly_data)
#tahoe_monthly_data = pd.merge(properties_in_tahoe, monthly_data, on='Property_ID', how='inner')

In [8]:
monthly_data.to_csv('data/tahoe_monthly_data_tabular.csv', index=False)

In [None]:
#how many features are in properties_in_tahoe_1
result = arcpy.GetCount_management(fc)
print(result)

In [None]:

# Define input and output paths
input_fc = "in_memory/properties_in_tahoe_1"
output_fc = "F:/GIS/PROJECTS/ResearchAnalysis/VHR/AirDNA.gdb/properties_in_tahoe_1"

# Check if the in-memory feature class contains data
count = int(arcpy.GetCount_management(input_fc)[0])
if count > 0:
    # Append data if there is data in the input feature class
    arcpy.management.Append(input_fc, output_fc, "NO_TEST")
    print("Features appended successfully.")
else:
    print("The in-memory feature class is empty.")

In [None]:
# create a feature class in F:\GIS\PROJECTS\ResearchAnalysis\VHR\AirDNA.gdb from in memory fc
arcpy.management.CopyFeatures("in_memory/properties_in_tahoe_1", "F:/GIS/PROJECTS/ResearchAnalysis/VHR/AirDNA.gdb/properties_in_tahoe_1")



In [9]:
monthly_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2418795 entries, 0 to 2418794
Data columns (total 35 columns):
 #   Column                         Dtype  
---  ------                         -----  
 0   Property_ID                    object 
 1   Property_Type                  object 
 2   Listing_Type                   object 
 3   Bedrooms                       float64
 4   Reporting_Month                object 
 5   Occupancy_Rate                 float64
 6   Active_Listing_Nights          int64  
 7   Currency                       object 
 8   Revenue__USD_                  float64
 9   Revenue_Potential__USD_        float64
 10  ADR__USD_                      float64
 11  Cleaning_Fee_Total__USD_       float64
 12  Number_of_Reservations         int64  
 13  Reservation_Days               int64  
 14  Available_Days                 int64  
 15  Blocked_Days                   int64  
 16  Country                        object 
 17  State                          object 
 18  Ci

In [11]:
#read in feature class from geodatabase
property_fc = "F:\GIS\PROJECTS\ResearchAnalysis\VHR\Data\VHR_Staging.gdb\properties_final"
property_data = pd.DataFrame.spatial.from_featureclass(property_fc)

  if (arr.astype(int) == arr).all():
  if (arr.astype(int) == arr).all():


In [12]:
tahoe_monthly_data = pd.merge(property_data, monthly_data, on='Property_ID', how='inner')

In [24]:
#This could be analyzed at different geographies
# Group by reporting month and zoningID and sum active listing nights and reservation days
tahoe_monthly_data_grouped_zoning = tahoe_monthly_data.groupby(['Reporting_Month', 'ZONING_ID','JURISDICTION', 'Real_Estate_Property_Type']).agg({
    'Active_Listing_Nights': 'sum',
    'Reservation_Days': 'sum'
}).reset_index()
# Calculate the ratio of reservation days to active listing nights but if the reservation days is 0 then make the ratio 0
tahoe_monthly_data_grouped_zoning['Occupancy_Rate'] = np.where(
    tahoe_monthly_data_grouped_zoning['Active_Listing_Nights'] == 0, 0,
    tahoe_monthly_data_grouped_zoning['Reservation_Days'] / tahoe_monthly_data_grouped_zoning['Active_Listing_Nights']
)


In [23]:
#This could be analyzed at different geographies
# Group by reporting month and zoningID and sum active listing nights and reservation days
tahoe_monthly_data_grouped_taz = tahoe_monthly_data.groupby(['Reporting_Month', 'TAZ','JURISDICTION', 'Real_Estate_Property_Type']).agg({
    'Active_Listing_Nights': 'sum',
    'Reservation_Days': 'sum'
}).reset_index()
# Calculate the ratio of reservation days to active listing nights but if the reservation days is 0 then make the ratio 0
tahoe_monthly_data_grouped_taz['Occupancy_Rate'] = np.where(
    tahoe_monthly_data_grouped_taz['Active_Listing_Nights'] == 0, 0,
    tahoe_monthly_data_grouped_taz['Reservation_Days'] / tahoe_monthly_data_grouped_taz['Active_Listing_Nights']
)


In [25]:
tahoe_monthly_data_grouped_taz.to_csv('data/tahoe_monthly_data_grouped_taz.csv', index=False)
tahoe_monthly_data_grouped_zoning.to_csv('data/tahoe_monthly_data_grouped_zoning.csv', index=False)

In [29]:
CSLT_monthly_data_active_2023 = tahoe_monthly_data.loc[tahoe_monthly_data['JURISDICTION'] == 'City of South Lake Tahoe' ]
CSLT_monthly_data_active_2023 = CSLT_monthly_data_active_2023.loc[tahoe_monthly_data['Reporting_Month']>'2022-12-01']
CSLT_monthly_data_active_2023.to_csv('data/CSLT_Data_2023.csv')

In [32]:
CSLT_monthly_data_active_2023_grouped = CSLT_monthly_data_active_2023.groupby(['Property_ID','ZONING_ID']).agg({
    'Active_Listing_Nights': 'sum',
    'Reservation_Days': 'sum'
}).reset_index()
CSLT_monthly_data_active_2023_grouped = CSLT_monthly_data_active_2023_grouped.loc[CSLT_monthly_data_active_2023_grouped['Active_Listing_Nights']>0]
CSLT_monthly_data_active_2023_grouped.to_csv('data/active_CSLT_Properties.csv')