In [4]:
# import packages
import pandas as pd
import pathlib
from pathlib import Path
import os
import arcpy
from utils import *
import numpy as np
import pickle
# external connection packages
from sqlalchemy.engine import URL
from sqlalchemy import create_engine

# pandas options
pd.options.mode.copy_on_write = True
pd.options.mode.chained_assignment = None
pd.options.display.max_columns = 999
pd.options.display.max_rows    = 999

# my workspace 
workspace = r"C:\Users\mbindl\Desktop\Workspace.gdb"
# current working directory
local_path = pathlib.Path().absolute()

# get bonus_condit
# set data path as a subfolder of the current working directory TravelDemandModel\2022\
data_dir = local_path.parents[0] / 'data'
# folder to save processed data
out_dir  = local_path.parents[0] / 'data/processed_data'
# workspace gdb for stuff that doesnt work in memory
# gdb = os.path.join(local_path,'Workspace.gdb')
gdb = workspace
# set environement workspace to in memory 
arcpy.env.workspace = 'memory'
# # clear memory workspace
# arcpy.management.Delete('memory')

# overwrite true
arcpy.env.overwriteOutput = True
# Set spatial reference to NAD 1983 UTM Zone 10N
sr = arcpy.SpatialReference(26910)

# get parcels from the database
# network path to connection files
filePath = "F:/GIS/PARCELUPDATE/Workspace/"
# database file path 
sdeBase    = os.path.join(filePath, "Vector.sde")
sdeCollect = os.path.join(filePath, "Collection.sde")
sdeTabular = os.path.join(filePath, "Tabular.sde")
sdeEdit    = os.path.join(filePath, "Edit.sde")

# Pickle variables
# part 1 - spatial joins and new categorical fields
parcel_pickle_part1    = data_dir / 'parcel_pickle1.pkl'
# part 2 - forecasting applied
parcel_pickle_part2    = data_dir / 'parcel_pickle2.pkl'


In [None]:
spjn_parcel_corridor = "C:\Users\mbindl\Documents\GitHub\Transportation\RegionalTransportationPlan\2023\data\SpJn_Parcel_Corridor.csv"

In [17]:
spjn_parcel_corridor = r"C:\Users\mbindl\Documents\GitHub\Transportation\RegionalTransportationPlan\2023\data\SpJn_Parcel_Corridor.csv"
# get csv as df
df = pd.read_csv(spjn_parcel_corridor)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 61232 entries, 0 to 61231
Data columns (total 47 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   Join_Count                      61232 non-null  int64  
 1   TARGET_FID                      61232 non-null  int64  
 2   APN                             61232 non-null  object 
 3   PPNO                            61232 non-null  float64
 4   APO_ADDRESS                     61232 non-null  object 
 5   Residential_Units               61232 non-null  int64  
 6   TouristAccommodation_Units      61232 non-null  int64  
 7   CommercialFloorArea_SqFt        61232 non-null  float64
 8   YEAR                            61232 non-null  int64  
 9   JURISDICTION                    61232 non-null  object 
 10  COUNTY                          61232 non-null  object 
 11  OWNERSHIP_TYPE                  59607 non-null  object 
 12  COUNTY_LANDUSE_DESCRIPTION      

In [19]:
# get total Residential Units, Tourist Units, and Commercial Units by Corridor
df.melt(id_vars=['CORRIDOR_NAME'], 
        value_vars=['Residential_Units', 'TouristAccommodation_Units', 'CommercialFloorArea_SqFt'], 
        var_name='Unit Type', 
        value_name='Value').groupby(['CORRIDOR_NAME', 'Unit Type']).sum()

# get total Residential Units, Tourist Units, and Commercial Units by Corridor


Unnamed: 0_level_0,Unnamed: 1_level_0,Value
CORRIDOR_NAME,Unit Type,Unnamed: 2_level_1
Meyers Y,CommercialFloorArea_SqFt,1782245.0
Meyers Y,Residential_Units,12794.0
Meyers Y,TouristAccommodation_Units,472.0
NV SR28,CommercialFloorArea_SqFt,1181361.0
NV SR28,Residential_Units,7951.0
NV SR28,TouristAccommodation_Units,1002.0
SR89 28,CommercialFloorArea_SqFt,1294408.0
SR89 28,Residential_Units,11726.0
SR89 28,TouristAccommodation_Units,1224.0
SR89_Rec,CommercialFloorArea_SqFt,19903.0


In [25]:
# Use pivot_table to aggregate and get total units by corridor and unit type
pivot_df = df.pivot_table(index='CORRIDOR_NAME', 
                          values=['Residential_Units', 'TouristAccommodation_Units', 'CommercialFloorArea_SqFt'], 
                          aggfunc='sum', 
                          fill_value=0).reset_index()

pivot_df

Unnamed: 0,CORRIDOR_NAME,CommercialFloorArea_SqFt,Residential_Units,TouristAccommodation_Units
0,Meyers Y,1782245,12794,472
1,NV SR28,1181361,7951,1002
2,SR89 28,1294408,11726,1224
3,SR89_Rec,19903,2635,112
4,US50 ES,208395,2137,153
5,US50 SS,2000648,12173,8842


In [31]:
# Filter out parcels with 0 or NaN Residential Units
df_filtered = df[df['Residential_Units'].notna() & (df['Residential_Units'] > 0)]

# Create categories based on the number of Residential Units
bins = [1, 2, 20, float('inf')]  # Defines the categories: (0, 1], (1, 20], (20, inf)
labels = ['1 Residential Unit', '2-20 Residential Units', '>20 Residential Units']  # Labels for each bin

# Add a new column to categorize parcels based on their Residential_Units
df_filtered['Residential_Unit_Category'] = pd.cut(df_filtered['Residential_Units'], bins=bins, labels=labels, right=False)

# Group by CORRIDOR_NAME and Residential_Unit_Category, and calculate counts
category_counts_by_corridor = df_filtered.groupby(['CORRIDOR_NAME', 'Residential_Unit_Category']).size().unstack(fill_value=0)

# Calculate the percentage of parcels in each category for each corridor
category_percentages_by_corridor = category_counts_by_corridor.div(category_counts_by_corridor.sum(axis=1), axis=0) * 100

category_percentages_by_corridor

Residential_Unit_Category,1 Residential Unit,2-20 Residential Units,>20 Residential Units
CORRIDOR_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Meyers Y,96.837503,2.976467,0.186029
NV SR28,97.972436,1.987808,0.039756
SR89 28,95.502347,4.460094,0.037559
SR89_Rec,97.421875,2.578125,0.0
US50 ES,97.55142,2.399608,0.048972
US50 SS,88.684751,11.088296,0.226953


In [34]:
# Filter out parcels with NaN in EXISTING_LANDUSE (optional, if needed)
df_filtered_landuse = df[df['EXISTING_LANDUSE'].notna()]

# Calculate the count of parcels for each residential land use type
landuse_counts = df_filtered_landuse['EXISTING_LANDUSE'].value_counts()

# Calculate the percentage of parcels for each land use type relative to the total number of parcels
total_parcels_landuse = len(df_filtered_landuse)
landuse_percentages = (landuse_counts / total_parcels_landuse) * 100

landuse_percentages


EXISTING_LANDUSE
Single Family Residential    55.619185
Open Space                   14.835589
Condominium                  10.314715
Vacant                        9.019917
Multi-Family Residential      3.934773
Commercial                    2.801196
Condominium Common Area       2.320895
Public Service                0.463507
Tourist Accommodation         0.434958
Recreation                    0.255265
Name: count, dtype: float64

In [35]:
# Filter the data for the relevant land use types
landuse_filtered = df[df['EXISTING_LANDUSE'].isin(['Single Family Residential', 'Condominium', 'Multi-Family Residential'])]

# Calculate the count of parcels for each of these land use types
landuse_counts_filtered = landuse_filtered['EXISTING_LANDUSE'].value_counts()

# Calculate the percentage of each land use type relative to the total of these three types
total_filtered = landuse_counts_filtered.sum()
landuse_percentages_filtered = (landuse_counts_filtered / total_filtered) * 100

# Display the results
print(landuse_percentages_filtered)


EXISTING_LANDUSE
Single Family Residential    79.605326
Condominium                  14.763004
Multi-Family Residential      5.631670
Name: count, dtype: float64
