In [1]:
# import packages
import pandas as pd
import pathlib
from pathlib import Path
import os
import arcpy
from utils import *
import numpy as np
import pickle
# external connection packages
from sqlalchemy.engine import URL
from sqlalchemy import create_engine

# pandas options
pd.options.mode.copy_on_write = True
pd.options.mode.chained_assignment = None
pd.options.display.max_columns = 999
pd.options.display.max_rows    = 999

# my workspace 
workspace = r"C:\Users\mbindl\Desktop\Workspace.gdb"
# current working directory
local_path = pathlib.Path().absolute()

# get bonus_condit
# set data path as a subfolder of the current working directory TravelDemandModel\2022\
data_dir = local_path.parents[0] / 'data'
# folder to save processed data
out_dir  = local_path.parents[0] / 'data/processed_data'
# workspace gdb for stuff that doesnt work in memory
# gdb = os.path.join(local_path,'Workspace.gdb')
gdb = workspace
# set environement workspace to in memory 
arcpy.env.workspace = 'memory'
# # clear memory workspace
# arcpy.management.Delete('memory')

# overwrite true
arcpy.env.overwriteOutput = True
# Set spatial reference to NAD 1983 UTM Zone 10N
sr = arcpy.SpatialReference(26910)

# get parcels from the database
# network path to connection files
filePath = "F:/GIS/PARCELUPDATE/Workspace/"
# database file path 
sdeBase    = os.path.join(filePath, "Vector.sde")
sdeCollect = os.path.join(filePath, "Collection.sde")
sdeTabular = os.path.join(filePath, "Tabular.sde")
sdeEdit    = os.path.join(filePath, "Edit.sde")

# Pickle variables
# part 1 - spatial joins and new categorical fields
parcel_pickle_part1    = data_dir / 'parcel_pickle1.pkl'
# part 2 - forecasting applied
parcel_pickle_part2    = data_dir / 'parcel_pickle2.pkl'


In [None]:
# import packages
import pandas as pd
from pathlib import Path
from arcgis import GeoAccessor, GeoSeriesAccessor

# current working directory
local_path = Path().absolute()
# folder to save processed data
out_dir  = local_path.parents[0] / 'data'
# path to the parcel master feature class
parcel_master = Path("F:/GIS/DB_CONNECT/Vector.sde") / "sde.SDE.Parcel_Master"
# get data frame the feature class
sdfParcels    = pd.DataFrame.spatial.from_featureclass(parcel_master)

# Filter the DataFrame to include only rows where 'YEAR_BUILT' is numeric, not '0', and not blank, space, or NaN
sdf = sdfParcels.loc[
    sdfParcels['YEAR_BUILT'].astype(str).str.isnumeric() & 
    (sdfParcels['YEAR_BUILT'].astype(str).str.strip() != '') & 
    (sdfParcels['YEAR_BUILT'] != '0')
].copy()

# Convert 'YEAR_BUILT' to integer
sdf.loc[:, 'YEAR_BUILT'] = sdf['YEAR_BUILT'].astype(int)

# Create 'Before1975' and 'After1975' columns
sdf.loc[:, 'Before1975'] = (sdf['YEAR_BUILT'] <= 1975).astype(int)
sdf.loc[:, 'After1975'] = (sdf['YEAR_BUILT'] > 1975).astype(int)

# Group by jurisdiction and sum the counts
df = sdf.groupby('JURISDICTION')[['Before1975', 'After1975']].sum().reset_index()

# Calculate percentage columns
df['Before1975_percent'] = (df['Before1975'] / (df['Before1975'] + df['After1975']) * 100).astype(int)
df['After1975_percent'] = (df['After1975'] / (df['Before1975'] + df['After1975']) * 100).astype(int)

# Rename columns to be more readable with % sign
df.rename(columns={
    'JURISDICTION':'Jurisdiction',
    'Before1975': 'Built 1975 or Before',
    'After1975': 'After 1975',
    'Before1975_percent': 'Built 1975 or Before (%)',
    'After1975_percent': 'Built After 1975 (%)'
}, inplace=True)

# Jurisdiction names
jurisdictions = {
    'CC': 'Carson City County',
    'DG': 'Douglas County',
    'CSLT': 'City of South Lake Tahoe',
    'EL': 'El Dorado County',
    'PL': 'Placer County',
    'WA': 'Washoe County'
}
# change values of Jurisdiction to jurisdiction names
df['Jurisdiction'] = df['Jurisdiction'].map(jurisdictions)
df.to_csv(out_dir / 'TahoeParcels_Built1975byJurisdiction.csv', index=False)

In [None]:
spjn_parcel_corridor = "C:\Users\mbindl\Documents\GitHub\Transportation\RegionalTransportationPlan\2023\data\SpJn_Parcel_Corridor.csv"

In [None]:
spjn_parcel_corridor = r"C:\Users\mbindl\Documents\GitHub\Transportation\RegionalTransportationPlan\2023\data\SpJn_Parcel_Corridor.csv"
# get csv as df
df = pd.read_csv(spjn_parcel_corridor)
df.info()

In [None]:
# get total Residential Units, Tourist Units, and Commercial Units by Corridor
df.melt(id_vars=['CORRIDOR_NAME'], 
        value_vars=['Residential_Units', 'TouristAccommodation_Units', 'CommercialFloorArea_SqFt'], 
        var_name='Unit Type', 
        value_name='Value').groupby(['CORRIDOR_NAME', 'Unit Type']).sum()

# get total Residential Units, Tourist Units, and Commercial Units by Corridor


In [None]:
# Use pivot_table to aggregate and get total units by corridor and unit type
pivot_df = df.pivot_table(index='CORRIDOR_NAME', 
                          values=['Residential_Units', 'TouristAccommodation_Units', 'CommercialFloorArea_SqFt'], 
                          aggfunc='sum', 
                          fill_value=0).reset_index()

pivot_df

In [None]:
# Filter out parcels with 0 or NaN Residential Units
df_filtered = df[df['Residential_Units'].notna() & (df['Residential_Units'] > 0)]

# Create categories based on the number of Residential Units
bins = [1, 2, 20, float('inf')]  # Defines the categories: (0, 1], (1, 20], (20, inf)
labels = ['1 Residential Unit', '2-20 Residential Units', '>20 Residential Units']  # Labels for each bin

# Add a new column to categorize parcels based on their Residential_Units
df_filtered['Residential_Unit_Category'] = pd.cut(df_filtered['Residential_Units'], bins=bins, labels=labels, right=False)

# Group by CORRIDOR_NAME and Residential_Unit_Category, and calculate counts
category_counts_by_corridor = df_filtered.groupby(['CORRIDOR_NAME', 'Residential_Unit_Category']).size().unstack(fill_value=0)

# Calculate the percentage of parcels in each category for each corridor
category_percentages_by_corridor = category_counts_by_corridor.div(category_counts_by_corridor.sum(axis=1), axis=0) * 100

category_percentages_by_corridor

In [None]:
# Filter out parcels with NaN in EXISTING_LANDUSE (optional, if needed)
df_filtered_landuse = df[df['EXISTING_LANDUSE'].notna()]

# Calculate the count of parcels for each residential land use type
landuse_counts = df_filtered_landuse['EXISTING_LANDUSE'].value_counts()

# Calculate the percentage of parcels for each land use type relative to the total number of parcels
total_parcels_landuse = len(df_filtered_landuse)
landuse_percentages = (landuse_counts / total_parcels_landuse) * 100

landuse_percentages


In [None]:
# Filter the data for the relevant land use types
landuse_filtered = df[df['EXISTING_LANDUSE'].isin(['Single Family Residential', 'Condominium', 'Multi-Family Residential'])]

# Calculate the count of parcels for each of these land use types
landuse_counts_filtered = landuse_filtered['EXISTING_LANDUSE'].value_counts()

# Calculate the percentage of each land use type relative to the total of these three types
total_filtered = landuse_counts_filtered.sum()
landuse_percentages_filtered = (landuse_counts_filtered / total_filtered) * 100

# Display the results
print(landuse_percentages_filtered)
