# Data Engineering Notebook

## Setup

> TO DO
* Add ADU, Bonus Unit, and Allocation fields to Parcel_History_Attributed
* Clean up unnecessary fields in Parcel_History_Attributed
* Add VHR/Bedrooms to the data
* Add CFA research from Ken to the data
* Check totals year over year
* Add in way to model TAUs as Residential units where City converted Hotels>Apartments

#### Terminology 

> Data engineering can consist of ***collection, cleaning, transformation, processing, and automating and monitoring tasks***
* Collection - examples include getting data from a rest service as a
* Cleaning - categorizing 
* Transformation - cateogorizing, standardization, 
* Processing - algorithm, pivot, groupby, merge
* Automating - schedule task, Apache Airflow

> Planning Jargon
* ADU - Accessory Dwelling Unit
* Existing Development Right - refers to residential, commercial, or tourist development currently built in the Lake Tahoe Basin

### Packages

In [None]:
import pandas as pd
import os
import pathlib
import arcpy
from arcgis.features import FeatureLayer, GeoAccessor, GeoSeriesAccessor
from arcgis.mapping import show_styles, display_colormaps
from arcgis.gis import GIS
from utils import *
from datetime import datetime
import logging
import sys
import pickle
import datetime
from time import strftime  
# auto reload imports
%load_ext autoreload
%autoreload 2

In [None]:

def fieldJoinCalc_multikey(updateFC, updateFieldsList_key, updateFieldsList_value, sourceFC, sourceFieldsList_key, sourceFieldsList_value):
    from time import strftime  
    print ("Started data transfer: " + strftime("%Y-%m-%d %H:%M:%S"))
    # Use list comprehension to build a dictionary from arcpy SearchCursor  
    total_count=0
    valueDict = {(r[0]+str(r[1])):(r[2]) for r in arcpy.da.SearchCursor(sourceFC, (sourceFieldsList_key + sourceFieldsList_value)) if r[0] is not None and r[1] is not None}  
    with arcpy.da.UpdateCursor(updateFC, (updateFieldsList_key + updateFieldsList_value)) as updateRows:  
        for updateRow in updateRows:  
            # store the Join value of the row being updated in a keyValue variable  
            if updateRow[0] is not None and updateRow[1] is not None:
                keyValue = updateRow[0]+str(updateRow[1])
                # verify that the keyValue is in the Dictionary  
                if keyValue in valueDict:
                    total_count +=1
                    if (total_count%1000)==0:
                        print (f"Updating row {total_count}")
                    # transfer the value stored under the keyValue from the dictionary to the updated field.  
                    updateRow[2] = valueDict[keyValue]  
                    updateRows.updateRow(updateRow)    
    del valueDict

### Global Variables

In [None]:
# set data frame display options
# pandas options
pd.options.mode.copy_on_write = True
pd.options.mode.chained_assignment = None
pd.options.display.max_columns = 999
pd.options.display.max_rows    = 999
pd.options.display.float_format = '{:,.2f}'.format
   
# set environement workspace to in memory 
arcpy.env.workspace = 'memory'
# overwrite true
arcpy.env.overwriteOutput = True
# Set spatial reference to NAD 1983 UTM Zone 10N
sr = arcpy.SpatialReference(26910)
arcpy.env.outputCoordinateSystem = sr
# # Set the extent environment using a feature class
# arcpy.env.extent = "TRPA_Boundary"

# current working directory
local_path = pathlib.Path().absolute()
# set data path as a subfolder of the current working directory TravelDemandModel\2022\
data_dir   = local_path.parents[0] / 'Reporting/data/raw_data'
# folder to save processed data
out_dir    = local_path.parents[0] / 'Reporting/data/processed_data'
# workspace gdb for stuff that doesnt work in memory
gdb        = local_path.parents[0] / 'Reporting/data/Workspace.gdb'

In [None]:
print(local_path)
print(data_dir)
print(out_dir)
print(gdb)

### Map Setup

In [None]:
# Set up the GIS object
## portal URL = "https://maps.trpa.org/portal/home/"
## AGOL URL   = "https://www.arcgis.com"
gis = GIS(
    url="https://maps.trpa.org/portal/home/",
    ## enter username above ##
    username= input("Enter username:"),
    ## enter password above ##
    password=getpass.getpass("Enter password:")
)

In [None]:
# make a map object
map = gis.map("Lake Tahoe", zoomlevel=10)

### Get Data

> Sources
* https://www.laketahoeinfo.org/WebServices/List
* https://maps.trpa.org/server/rest/services/
* sdeBase, sdeCollect, sdeTabular

In [None]:
# feature classes from sde
sde_ParcelAtt    = os.path.join(sdeCollect, "\\SDE.Parcel\\SDE.Parcel_History_Attributed")
sde_ParcelMaster = os.path.join(sdeBase,"\\sde.SDE.Parcels\\sde.SDE.Parcel_Master")

## get parcel data
sdfParcel     = get_fs_data_spatial("https://maps.trpa.org/server/rest/services/Existing_Development/MapServer/2")
sdfParcel23   = get_fs_data_spatial_query("https://maps.trpa.org/server/rest/services/Existing_Development/MapServer/2", "YEAR = 2023") 
# get spatial data to join to
sdfDistrict   = get_fs_data_spatial("https://maps.trpa.org/server/rest/services/Planning/MapServer/1")
sdfPlan       = get_fs_data_spatial("https://maps.trpa.org/server/rest/services/Boundaries/MapServer/0")
sdfTownCenter = get_fs_data_spatial("https://maps.trpa.org/server/rest/services/Boundaries/MapServer/1")
sdfTCbuffer   = get_fs_data_spatial("https://maps.trpa.org/server/rest/services/Planning/MapServer/4")
sdfCSLT       = get_fs_data_spatial("https://maps.trpa.org/server/rest/services/Boundaries/MapServer/2")
sdfCounty     = get_fs_data_spatial("https://maps.trpa.org/server/rest/services/Boundaries/MapServer/3")
sdfTRPA       = get_fs_data_spatial("https://maps.trpa.org/server/rest/services/Boundaries/MapServer/4")

# set spatial reference
sdfParcel.spatial.sr = sr
sdfParcel23.spatial.sr = sr
sdfDistrict.spatial.sr = sr
sdfPlan.spatial.sr = sr
sdfTownCenter.spatial.sr = sr
sdfTCbuffer.spatial.sr = sr
sdfCSLT.spatial.sr = sr
sdfCounty.spatial.sr = sr
sdfTRPA.spatial.sr = sr

In [None]:
# network path to connection files
filePath = "F:/GIS/PARCELUPDATE/Workspace/"
# database file path 
sdeBase    = os.path.join(filePath, "Vector.sde")
sdeCollect = os.path.join(filePath, "Collection.sde")
sdeTabular = os.path.join(filePath, "Tabular.sde")
# feature classes from sde
sde_ParcelAtt        = sdeCollect + "\\SDE.Parcel\\SDE.Parcel_History_Attributed"
sde_LocalPlan        = sdeBase + "\\sde.SDE.Planning\\sde.SDE.LocalPlan"
sde_CSLT             = sdeBase + "\\sde.SDE.Jurisdictions\\sde.SDE.CSLT"
sde_CurrentParcels   = sdeBase + "\\sde.SDE.Parcels\\sde.SDE.Parcel_Master"
sde_District         = sdeBase + "\\sde.SDE.Planning\\sde.SDE.District"
sde_TownCenter       = sdeBase + "\\sde.SDE.Planning\\sde.SDE.TownCenter"
sde_TownCenterBuffer = sdeBase + "\\sde.SDE.Planning\\sde.SDE.TownCenter_Buffer"
sde_TRPAboundary     = sdeBase + "\\sde.SDE.Jurisdictions\\sde.SDE.TRPA_bdy"
sde_BonusUnitboundary= sdeBase + "\\sde.SDE.Planning\\sde.SDE.Bonus_unit_boundary"
sde_UrbanArea        = sdeBase + "\\sde.SDE.Jurisdictions\\sde.SDE.UrbanAreas"
sde_Zip              = sdeBase + "\\sde.SDE.Jurisdictions\\sde.SDE.Postal_ZIP"
sde_TAZ              = sdeBase + "\\sde.SDE.Transportation\\sde.SDE.Transportation_Analysis_Zone"
sdf_County           = sdeBase + "\\sde.SDE.Jurisdictions\\SDE.Counties"

In [None]:
## LT Info Data
# Verified Development Rights
dfDevRight  = pd.read_json("https://www.laketahoeinfo.org/WebServices/GetParcelDevelopmentRightsForAccela/JSON/e17aeb86-85e3-4260-83fd-a2b32501c476")
# Deed Restrictions as a DataFrame
dfDeed      = pd.read_json("https://laketahoeinfo.org/WebServices/GetDeedRestrictedParcels/JSON/e17aeb86-85e3-4260-83fd-a2b32501c476")
# IPES LTinfo as a DataFrame
dfIPES      = pd.read_json("https://www.laketahoeinfo.org/WebServices/GetParcelIPESScores/JSON/e17aeb86-85e3-4260-83fd-a2b32501c476")
# Development Rights Transacted and Banked as a DataFrame
dfDevRights = pd.read_json("https://www.laketahoeinfo.org/WebServices/GetTransactedAndBankedDevelopmentRights/JSON/e17aeb86-85e3-4260-83fd-a2b32501c476")
# All Parcels as a DataFrame
dfLTParcel  = pd.read_json("https://www.laketahoeinfo.org/WebServices/GetAllParcels/JSON/e17aeb86-85e3-4260-83fd-a2b32501c476")

In [None]:
## get 2022 development units
devhistoryURL = "https://maps.trpa.org/server/rest/services/Existing_Development/MapServer/2"
# get parcel history for 2023
# df23 = get_fs_data_query(devhistoryURL, "Year = 2023")
# df22 = get_fs_data_query(devhistoryURL, "Year = 2022")
# df21 = get_fs_data_query(devhistoryURL, "Year = 2021")
# df20 = get_fs_data_query(devhistoryURL, "Year = 2020")
# df19 = get_fs_data_query(devhistoryURL, "Year = 2019")
# df18 = get_fs_data_query(devhistoryURL, "Year = 2018")
df12 = get_fs_data_query(devhistoryURL, "Year = 2012")

In [None]:
# set spatial reference
sdfParcel.spatial.sr = sr
sdfDistrict.spatial.sr = sr
sdfPlan.spatial.sr = sr
sdfTownCenter.spatial.sr = sr
sdfTCbuffer.spatial.sr = sr
sdfCSLT.spatial.sr = sr
sdfCounty.spatial.sr = sr
sdfTRPA.spatial.sr = sr

In [None]:
# web service url
permitTable = "https://maps.trpa.org/server/rest/services/Permit_Records/MapServer/1"
# get permit data as a dataframe
dfTRPAPermit = get_fs_data(permitTable)

## Permit Data Engineering

#### TRPA Permit Data

***Get Data***
> TRPA permit data is exported from accela nightly then stored in colleciton.sde enterprise geodatabase and published to the trpa server as the web service below

In [None]:
## TRPA Permit Data Engineering
dfTRPAPermit.info()

***Transformation***

In [None]:
df = dfTRPAPermit

# final fields for all permit dataframes
fields = ['APN', 'Address', 'Jurisdiction', 'Permit_ID', 
          'Permit_Type','Permit_Category', 'Permit_Status',  'Description',
          'Applied_Date', 'Issued_Date', 'PreGrade_Date', 'Finaled_Date'
          ]

# # set fields
column_mapping = {
'Accela_ID' : 'Permit_ID',
'Detailed_Description' : 'Description',
'Record_Status' : 'Permit_Status',
'Accela_CAPType_Name' : 'Permit_Type',
'File_Date' : 'Applied_Date'
}

# rename columns based on dictionary
df = renamecolumns(df, column_mapping, False)

# add missing fields
for field in fields:
    # if field not in dataframe add it
    if field not in df.columns:
        # insert new column
        df[field] = None
# limit to the final fields
df = df[fields]
# add jurisdiction value
df.Jurisdiction = "TRPA"
df.info()


***Processing***

In [None]:
# print out unique Record_Status values one at a time
for description in dfTRPAPermit.Detailed_Description.unique():
    print(description)

In [None]:
# print out unique Record_Status values one at a time
for permittype in dfTRPAPermit.Accela_CAPType_Name.unique():
    print(permittype)

In [None]:
# print out unique Record_Status values one at a time
for status in dfTRPAPermit.Record_Status.unique():
    print(status)

In [None]:
value_lookup = "resources\Value_Lookups.csv"
trpa_reportingcategory_lookup = import_lookup_dictionary(value_lookup,'key','value','Jurisdiction','TRPA','FieldName','Reporting_Category')
trpa_permittype_lookup        = import_lookup_dictionary(value_lookup,'key','value','Jurisdiction','TRPA','FieldName','Permit_Type')
trpa_permitstatus_lookup      = import_lookup_dictionary(value_lookup,'key','value','Jurisdiction','TRPA','FieldName','Permit_Status')

In [None]:
# Update fields from lookup dictionaries
df['Reporting_Category'] = df['Reporting_Category'].map(trpa_reportingcategory_lookup)
df['Permit_Type'] = df['Permit_Type'].map(trpa_permittype_lookup)
df['Permit_Status'] = df['Permit_Status'].map(trpa_permitstatus_lookup)

#### City of South Lake Tahoe Permit Data

***Get Data***

In [None]:
## City of South Lake Tahoe Permit data was sent over by Ryan Malhoski on 4/9/2021
dfCSLTPermit = read_file("data\PermitData_CSLT_040924.csv")

In [None]:
dfCSLTPermit.info()

***Transformation***

In [None]:
# drop existing 'Address' field
df = dfCSLTPermit.drop('Address', axis=1)

# final fields for all permit dataframes
fields = ['APN', 'Address', 'Jurisdiction', 
          'Permit_ID', 'Permit_Type','Permit_Status', 'Description',
          'Applied_Date', 'Issued_Date', 'Finaled_Date'
          ]

# # set fields
column_mapping = {
            'Parcel ID': 'APN',
            'Location Address':'Address',
            'Permit Number' : 'Permit_ID',
            'Note Text' : 'Description',
            'Status' : 'Permit_Status',
            'Permit Type' : 'Permit_Type',
            'Permit Issue Date' : 'Applied_Date',
            'Certificate Issue Date': "Finaled_Date"
            }

# rename columns based on dictionary
df = renamecolumns(df, column_mapping,False)

# add missing fields
for field in fields:
    # if field not in dataframe add it
    if field not in df.columns:
        # insert new column
        df[field] = None
# limit to the final fields
df = df[fields]
# add jurisdiction value
df.Jurisdiction = "CSLT"
df.info()

In [None]:
# APN is a PPNO format in the CSLT data, and also contains EL old naming convetion (-0)
# need to format to xxx-xxx-xxx and filter any odd values (e.g. 500 series)
# get rid of 100's and 500's series, and format to xxx-xxx-xxx, also remove any that start with strings
# strip off trailing spaces
df.APN = df.APN.str.replace(' ', '') 


***Processing***

In [None]:
# potential values for Permit Type
# 
# get unique permit types
for permittype in dfCSLTPermit["Permit Type"].unique():
    print(permittype)

#### El Dorado County Permit Data
>  there are two files, one for all TRPA files and one for all files in our geographic area, including TRPA files and EDC files. 

***Get Data***

In [None]:
## El Dorado Permit data representing all files in our geographic area
## exported by Ken Kasman on 4/1/2021 from their Trakit database
dfElDoPermit = read_file("data\PermitData_ElDorado_040124.csv")
dfElDoPermit.info()

***Transformation***

In [None]:
# drop existing 'Address' field
df = dfElDoPermit

# final fields for all permit dataframes
fields = ['APN', 'Address', 'Jurisdiction', 
          'Permit_ID', 'Permit_Type','Permit_Status','Description',
          'Applied_Date', 'Issued_Date', 'Finaled_Date'
          ]

# # set fields
column_mapping = {
            'SITE_APN' : 'APN',
            'SITE_ADDR':'Address',
            'Permit Number' : 'Permit_ID',
            'DESCRIPTION' : 'Description',
            'STATUS' : 'Permit_Status',
            'PERMITTYPE' : 'Permit_Type',
            'APPLIED' : 'Applied_Date',
            'ISSUED'  : 'Issued_Date',
            'FINALED' : "Finaled_Date"
            }

# rename columns based on dictionary
df = renamecolumns(df, column_mapping, False)

# add missing fields
for field in fields:
    # if field not in dataframe add it
    if field not in df.columns:
        # insert new column
        df[field] = None
# limit to the final fields
df = df[fields]
# add jurisdiction value
df.Jurisdiction = "EL"
df.info()

In [None]:
for permittype in dfElDoPermit["PERMITTYPE"].unique():
    print(permittype)

In [None]:
# get lookup dictionary
lookupTable = read_file("resources/lookup_reporting_category.csv")
lookupTable["Reporting Category"].unique()


***Processing***

#### Placer County Permit Data

***Get Data***

In [None]:
## Placer Permit Data Comes in monthly via email, and gets saved to the folder below.
## The code below will merge all the files in the folder into a single file, return a dataframe, and export to csv

# folder with the CSV files
folder_path = r"F:\Research and Analysis\Local Jurisdiction MOU data collection\Placer MOU Files\Placer"
# List to hold the DataFrames
dfs = []

# Loop through the files in the folder and identify CSV files
for file_name in os.listdir(folder_path):
    # Construct the full file path
    file_path = os.path.join(folder_path, file_name)
    # Read the CSV file into a DataFrame and append to the list
    df = pd.read_excel(file_path)
    # Append the DataFrame to the list
    dfs.append(df)
# Concatenate all DataFrames into a single DataFrame
final_df = pd.concat(dfs, ignore_index=True)
# Add today's date at the end of the file name _MMDDYY
today = pd.Timestamp.today().strftime("%m%d%y")
# Export the final DataFrame to a CSV file
final_df.to_csv("data\PermitData_Placer_" + today + ".csv", index=False)

In [None]:
## Placer Permit data explained above. 
dfPlacerPermit =read_file("data\PermitData_Placer_040924.csv")

In [None]:
dfPlacerPermit.info()

In [None]:
dfPlacerPermit.head()

***Transformation***
> hyperlink to Placer Accela record can be bulit using SERV_PROD_CODE, B1_PER_ID1, B1_PER_ID2, B1_PER_ID3
* https://permits.placer.ca.gov/CitizenAccess/Cap/CapDetail.aspx?Module=TRPA&TabName=TRPA&capID1=16CAP&capID2=00000&capID3=0036O&agencyCode=PLACERCO

In [None]:
# create lookup dictionary
lookupTable = read_file("resources/PL_lookup_reporting_category.csv")
lookupTable["Reporting Category"].unique()


***Processing***

#### Merge

In [None]:
# merege the processed dfs
df = pd.concat([dfTRPA, dfCSLT, dfEL, dfPL], axis=0)

#### Load

In [None]:
df.to_csv("data\PermitData.csv")

## Cumulative Accounting Data Engineering

#### Existing Development Rights

> Spatial Joins

In [None]:
### SPATIAL JOINS ### THIS TAKES A LONG TIME ~3 HOURS ###

# spatial join 2023 parcel data to all years of parcel data
arcpy.SpatialJoin_analysis(sdfParcel, sdfParcel23, "Join_23", 
                           "JOIN_ONE_TO_ONE", "KEEP_ALL", "", "WITHIN")
# spatial join to get Plan Area
arcpy.SpatialJoin_analysis(sdfParcel, sdfPlan, "Join_PlanArea", 
                           "JOIN_ONE_TO_MANY", "KEEP_ALL", "", "HAVE_THEIR_CENTER_IN")
# spatial join to get District
arcpy.SpatialJoin_analysis(sdfParcel, sdfDistrict, "Join_District", 
                           "JOIN_ONE_TO_ONE", "KEEP_ALL", "", "HAVE_THEIR_CENTER_IN")
# spatial join to get Town Center
arcpy.SpatialJoin_analysis(sdfParcel, sdfTownCenter, "Join_TownCenter", 
                           "JOIN_ONE_TO_ONE", "KEEP_ALL", "", "HAVE_THEIR_CENTER_IN")
# spatial join to get Town Center Buffer
arcpy.SpatialJoin_analysis(sdfParcel, sdfTCbuffer, "Join_TownCenterBuffer", 
                           "JOIN_ONE_TO_ONE", "KEEP_ALL", "", "HAVE_THEIR_CENTER_IN")
# spatial join to get CSLT
arcpy.SpatialJoin_analysis(sdfParcel, sdfCSLT, "Join_CSLT", 
                           "JOIN_ONE_TO_ONE", "KEEP_ALL", "", "HAVE_THEIR_CENTER_IN")
# spatial join to get County
arcpy.SpatialJoin_analysis(sdfParcel, sdfCounty, "Join_County", 
                           "JOIN_ONE_TO_ONE", "KEEP_ALL", "", "HAVE_THEIR_CENTER_IN")
# spatial join to get TRPA Boundary
arcpy.SpatialJoin_analysis(sdfParcel, sdfTRPA, "Join_TRPA", 
                           "JOIN_ONE_TO_ONE", "KEEP_ALL", "", "HAVE_THEIR_CENTER_IN")

> To Pickles

In [None]:
# get results of spatial joins as spatial dataframes
sdf_parcel_plan       = pd.DataFrame.spatial.from_featureclass("Join_PlanArea", sr=sr)
sdf_parcel_district   = pd.DataFrame.spatial.from_featureclass("Join_District", sr=sr)
sde_parcel_towncenter = pd.DataFrame.spatial.from_featureclass("Join_TownCenter", sr=sr)
sde_parcel_tcbuffer   = pd.DataFrame.spatial.from_featureclass("Join_TownCenterBuffer", sr=sr)
sde_parcel_cslt       = pd.DataFrame.spatial.from_featureclass("Join_CSLT", sr=sr)
sde_parcel_county     = pd.DataFrame.spatial.from_featureclass("Join_County", sr=sr)
sde_parcel_trpa       = pd.DataFrame.spatial.from_featureclass("Join_TRPA", sr=sr)
sde_parcel_23         = pd.DataFrame.spatial.from_featureclass("Join_23", sr=sr)

# pickling the dataframes
sdf_parcel_plan.to_pickle(data_dir / "sdf_parcel_plan.pkl")
sdf_parcel_district.to_pickle(data_dir / "sdf_parcel_district.pkl")
sde_parcel_towncenter.to_pickle(data_dir / "sde_parcel_towncenter.pkl")
sde_parcel_tcbuffer.to_pickle(data_dir / "sde_parcel_tcbuffer.pkl")
sde_parcel_cslt.to_pickle(data_dir / "sde_parcel_cslt.pkl")
sde_parcel_county.to_pickle(data_dir /"sde_parcel_county.pkl")
sde_parcel_trpa.to_pickle(data_dir / "sde_parcel_trpa.pkl")
sde_parcel_23.to_pickle(data_dir / "sde_parcel_23.pkl")

> Map Values

In [None]:
# get pickled dataframes
# get results of spatial joins as spatial dataframes
sdf_parcel_plan       = pd.read_pickle(data_dir/"sdf_parcel_plan.pkl")
sdf_parcel_district   = pd.read_pickle(data_dir/"sdf_parcel_district.pkl")
sde_parcel_towncenter = pd.read_pickle(data_dir/"sde_parcel_towncenter.pkl")
sde_parcel_tcbuffer   = pd.read_pickle(data_dir/"sde_parcel_tcbuffer.pkl")
sde_parcel_cslt       = pd.read_pickle(data_dir/"sde_parcel_cslt.pkl")
sde_parcel_county     = pd.read_pickle(data_dir/"sde_parcel_county.pkl")
sde_parcel_trpa       = pd.read_pickle(data_dir/"sde_parcel_trpa.pkl")
sde_parcel_23         = pd.read_pickle(data_dir/"sde_parcel_23.pkl")

In [None]:
# create uniqie key for spatial join APN _ YEAR

for df in dfs:
    df['APN_YEAR'] = df['APN'] + "_" + df['YEAR']
    
sdf_parcel_plan['APN_YEAR'] = sdf_parcel_plan['APN'] + "_" + sdf_parcel_plan['YEAR']
sdf_parcel_district['APN_YEAR'] = sdf_parcel_district['APN'] + "_" + sdf_parcel_district['YEAR']
sde_parcel_towncenter['APN_YEAR'] = sde_parcel_towncenter['APN'] + "_" + sde_parcel_towncenter['YEAR']
sde_parcel_tcbuffer['APN_YEAR'] = sde_parcel_tcbuffer['APN'] + "_" + sde_parcel_tcbuffer['YEAR']
sde_parcel_cslt['APN_YEAR'] = sde_parcel_cslt['APN'] + "_" + sde_parcel_cslt['YEAR']
sde_parcel_county['APN_YEAR'] = sde_parcel_county['APN'] + "_" + sde_parcel_county['YEAR']
sde_parcel_trpa['APN_YEAR'] = sde_parcel_trpa['APN'] + "_" + sde_parcel_trpa['YEAR']
sde_parcel_23['APN_YEAR'] = sde_parcel_23['APN'] + "_" + sde_parcel_23['YEAR']
sdfParcel['APN_YEAR'] = sdfParcel['APN'] + "_" + sdfParcel['YEAR']


In [None]:
# loop through the dataframes and get a list of columns for each
for df in [sdf_parcel_plan, sdf_parcel_district, sde_parcel_towncenter, 
           sde_parcel_tcbuffer, sde_parcel_cslt, sde_parcel_county, 
           sde_parcel_trpa, sde_parcel_23]:
    print(df.columns)


In [None]:

# set up the sdfParcel dataframe with the 2023 parcel data values and then map the values from the other spatial join dataframes
sdfParcel['JURISDICTION']      = sdfParcel.APN_YEAR.map(dict(zip(sde_parcel_23.APN_YEAR, sde_parcel_23.JURISDICTION_1)))
sdfParcel['COUNTY']            = sdfParcel.APN_YEAR.map(dict(zip(sde_parcel_23.APN_YEAR, sde_parcel_23.COUNTY_1)))
sdfParcel['COUNTY_LANDUSE_DESCRIPTION'] = sdfParcel.APN.map(dict(zip(sde_parcel_23.APN_YEAR, sde_parcel_23.COUNTY_LANDUSE_DESCRIPTION_1)))
sdfParcel['EXISTING_LANDUSE']  = sdfParcel.APN_YEAR.map(dict(zip(sde_parcel_23.APN_YEAR, sde_parcel_23.EXISTING_LANDUSE_1)))
sdfParcel['OWNERSHIP_TYPE']    = sdfParcel.APN_YEAR.map(dict(zip(sde_parcel_23.APN_YEAR, sde_parcel_23.OWNERSHIP_TYPE_1)))
sdfParcel['YEAR_BUILT']        = sdfParcel.APN_YEAR.map(dict(zip(sde_parcel_23.APN_YEAR, sde_parcel_23.YEAR_BUILT_1)))
sdfParcel['PLAN_ID']           = sdfParcel.APN_YEAR.map(dict(zip(sde_parcel_23.APN_YEAR, sde_parcel_23.PLAN_ID_1)))
sdfParcel['PLAN_NAME']         = sdfParcel.APN_YEAR.map(dict(zip(sde_parcel_23.APN_YEAR, sde_parcel_23.PLAN_NAME_1)))
# sdfParcel['PLAN_TYPE']         = sdfParcel.APN.map(dict(zip(sde_parcel_23.APN, sde_parcel_23.PLAN_TYPE_1)))
sdfParcel['ZONING_ID']         = sdfParcel.APN_YEAR.map(dict(zip(sde_parcel_23.APN_YEAR, sde_parcel_23.ZONING_ID_1)))
sdfParcel['ZONING_DESCRIPTION']= sdfParcel.APN_YEAR.map(dict(zip(sde_parcel_23.APN_YEAR, sde_parcel_23.ZONING_DESCRIPTION_1)))
sdfParcel['TOWN_CENTER']       = sdfParcel.APN_YEAR.map(dict(zip(sde_parcel_23.APN_YEAR, sde_parcel_23.TOWN_CENTER_1)))
sdfParcel['LOCATION_TO_TOWNCENTER'] = sdfParcel.APN_YEAR.map(dict(zip(sde_parcel_23.APN_YEAR, sde_parcel_23.LOCATION_TO_TOWNCENTER_1)))
sdfParcel['TAZ']               = sdfParcel.APN_YEAR.map(dict(zip(sde_parcel_23.APN_YEAR, sde_parcel_23.TAZ_1)))
sdfParcel['PARCEL_ACRES']      = sdfParcel.APN_YEAR.map(dict(zip(sde_parcel_23.APN_YEAR, sde_parcel_23.PARCEL_ACRES_1)))
sdfParcel['PARCEL_SQFT']       = sdfParcel.APN_YEAR.map(dict(zip(sde_parcel_23.APN_YEAR, sde_parcel_23.PARCEL_SQFT_1)))
sdfParcel['WITHIN_BONUSUNIT_BNDY'] = sdfParcel.APN_YEAR.map(dict(zip(sde_parcel_23.APN_YEAR, sde_parcel_23.WITHIN_BONUSUNIT_BNDY_1)))
sdfParcel['WITHIN_TRPA_BNDY'] = sdfParcel.APN_YEAR.map(dict(zip(sde_parcel_23.APN_YEAR, sde_parcel_23.WITHIN_TRPA_BNDY_1)))

# map dictionary to sdf_units dataframe to fill in TAZ and Block Group fields
sdfParcel['PLAN_ID']     = sdfParcel.APN_YEAR.map(dict(zip(sdf_parcel_plan.APN_YEAR, sdf_parcel_plan.PLAN_ID_1)))
sdfParcel['PLAN_NAME']   = sdfParcel.APN_YEAR.map(dict(zip(sdf_parcel_plan.APN_YEAR, sdf_parcel_plan.PLAN_NAME_1)))
# sdfParcel['PLAN_TYPE']   = sdfParcel.APN.map(dict(zip(sdf_parcel_plan.APN, sdf_parcel_plan.PLAN_TYPE_1)))

sdfParcel['ZONING_ID']     = sdfParcel.APN_YEAR.map(dict(zip(sdf_parcel_district.APN_YEAR, sdf_parcel_district.ZONING_ID_1)))
sdfParcel['ZONING_DESCRIPTION']   = sdfParcel.APN_YEAR.map(dict(zip(sdf_parcel_district.APN_YEAR, sdf_parcel_district.ZONING_DESCRIPTION_1))
                                                      ) 
# using sdf_parcel_towncenter and sdf_parcel_tcbuffer
sdfParcel['TOWN_CENTER']              = sdfParcel.APN_YEAR.map(dict(zip(sde_parcel_towncenter.APN_YEAR, sde_parcel_towncenter.Name)))
sdfParcel['LOCATION_TO_TOWNCENTER']   = sdfParcel.APN_YEAR.map(dict(zip(sde_parcel_tcbuffer.APN_YEAR, sde_parcel_tcbuffer.BUFFER_NAME)))
# # using sdf_parcel_cslt and sdf_parcel_county
# sdfParcel['JURISDICTION'] = sdfParcel.APN.map(dict(zip(sdf_parcel_cslt.APN, sdf_parcel_cslt.JURISDICTION)))
# sdfParcel['COUNTY']       = sdfParcel.APN.map(dict(zip(sdf_parcel_county.APN, sdf_parcel_county.COUNTY)))

# sdfParcel['WITHIN_BONUSUNIT_BNDY'] = sdfParcel.APN.map(dict(zip(sdf_parcel_trpa.APN, sdf_parcel_trpa.WITHIN_BONUS_UNIT_BNDY)))
# sdfParcel['WITHIN_TRPA_BNDRY'] = sdfParcel.APN.map(dict(zip(sde_parcel_trpa.APN, sde_parcel_trpa.WITHIN_TRPA_BNDY_TRPA)))
# sdfParcel to pickle
sdfParcel.to_pickle(data_dir/"sdfParcel.pkl")

In [None]:
# to feature class
sdfParcel.spatial.to_featureclass(location=os.path.join(gdb,"sdfParcel"), sanitize_columns=False)

In [None]:
# get total parcel count by year
parcel_count = sdfParcel.groupby('YEAR').size()
parcel_count

In [None]:
sdfParcel.info()

In [None]:
# feature classes from sde
sde_ParcelAtt    = sdeCollect+"\\SDE.Parcel\\SDE.Parcel_History_Attributed"
# get data frame from feature class
sdfParcel_SDE = pd.DataFrame.spatial.from_featureclass(sde_ParcelAtt)

In [None]:
# get total Residenatial units by year
residential_units = sdfParcel_SDE.groupby('YEAR')['Residential_Units'].sum()
residential_units

In [None]:
# get change in residential units by year and jurisdiction
residential_units = sdfParcel_SDE.groupby(['YEAR','ZONING_ID'])['Residential_Units'].sum()
residential_units  = residential_units.unstack()
# get difference between years
differences = residential_units.diff(axis=0)

In [None]:
differences.to_csv(data_dir / "Unit_Differences_by_Year_Zone.csv")

In [None]:
# filter to years
sdfParcel_SDE = sdfParcel_SDE[sdfParcel_SDE.YEAR.isin([2012,2018,2019,2020,2021,2022,2023])]
# send to pickle
sdfParcel_SDE.to_pickle(data_dir / "sdfParcel_SDE.pkl")

In [None]:
res_units = sdfParcel_SDE.groupby('YEAR')['Residential_Units'].sum()
res_units

In [None]:
sdfParcel_SDE.JURISDICTION.value_counts()

In [None]:
sdfParcel.JURISDICTION.value_counts()

In [None]:

# residential_units

In [None]:
residential_units

> Edit Operations

In [None]:
### THIS TAKES 2 HOURS TO RUN ###

# feature classes from sde
sde_ParcelAtt    = sdeCollect + "\\SDE.Parcel\\SDE.Parcel_History_Attributed"
# read in staging feature class
sdfParcelFC = os.path.join(gdb, "sdfParcel")

# start an edit session
edit = arcpy.da.Editor(sdeCollect)  
edit.startEditing(False, False)
edit.startOperation()

# use field join multikey to join the data
fieldJoinCalc_multikey(sde_ParcelAtt, ['APN', 'YEAR'],['JURISDICTION'], 
                       sdfParcelFC,   ['APN', 'YEAR'],['JURISDICTION'])
fieldJoinCalc_multikey(sde_ParcelAtt, ['APN', 'YEAR'],['COUNTY'],
                       sdfParcelFC,   ['APN', 'YEAR'],['COUNTY'])
fieldJoinCalc_multikey(sde_ParcelAtt, ['APN', 'YEAR'],['COUNTY_LANDUSE_DESCRIPTION'],
                       sdfParcelFC,   ['APN', 'YEAR'],['COUNTY_LANDUSE_DESCRIPTION'])
fieldJoinCalc_multikey(sde_ParcelAtt, ['APN', 'YEAR'],['EXISTING_LANDUSE'],
                       sdfParcelFC,   ['APN', 'YEAR'],['EXISTING_LANDUSE'])
fieldJoinCalc_multikey(sde_ParcelAtt, ['APN', 'YEAR'],['OWNERSHIP_TYPE'],
                       sdfParcelFC,   ['APN', 'YEAR'],['OWNERSHIP_TYPE'])    
fieldJoinCalc_multikey(sde_ParcelAtt, ['APN', 'YEAR'],['YEAR_BUILT'],
                       sdfParcelFC,   ['APN', 'YEAR'],['YEAR_BUILT'])
fieldJoinCalc_multikey(sde_ParcelAtt, ['APN', 'YEAR'],['PLAN_ID'],
                       sdfParcelFC,   ['APN', 'YEAR'],['PLAN_ID'])
fieldJoinCalc_multikey(sde_ParcelAtt, ['APN', 'YEAR'],['PLAN_NAME'],    
                       sdfParcelFC,   ['APN', 'YEAR'],['PLAN_NAME'])
fieldJoinCalc_multikey(sde_ParcelAtt, ['APN', 'YEAR'],['ZONING_ID'],
                       sdfParcelFC,   ['APN', 'YEAR'],['ZONING_ID'])
fieldJoinCalc_multikey(sde_ParcelAtt, ['APN', 'YEAR'],['ZONING_DESCRIPTION'],   
                       sdfParcelFC,   ['APN', 'YEAR'],['ZONING_DESCRIPTION'])
fieldJoinCalc_multikey(sde_ParcelAtt, ['APN', 'YEAR'],['TOWN_CENTER'],
                       sdfParcelFC,   ['APN', 'YEAR'],['TOWN_CENTER'])
fieldJoinCalc_multikey(sde_ParcelAtt, ['APN', 'YEAR'],['LOCATION_TO_TOWNCENTER'],
                       sdfParcelFC,   ['APN', 'YEAR'],['LOCATION_TO_TOWNCENTER'])
fieldJoinCalc_multikey(sde_ParcelAtt, ['APN', 'YEAR'],['TAZ'], 
                       sdfParcelFC,   ['APN', 'YEAR'],['TAZ'])
fieldJoinCalc_multikey(sde_ParcelAtt, ['APN', 'YEAR'],['PARCEL_ACRES'],
                       sdfParcelFC,   ['APN', 'YEAR'],['PARCEL_ACRES'])
fieldJoinCalc_multikey(sde_ParcelAtt, ['APN', 'YEAR'],['PARCEL_SQFT'],
                       sdfParcelFC,   ['APN', 'YEAR'],['PARCEL_SQFT'])
# fieldJoinCalc_multikey(sde_ParcelAtt, ['APN', 'YEAR'],['WITHIN_BONUSUNIT_BNDY'],
#                        sdfParcelFC,   ['APN', 'YEAR'],['WITHIN_BONUSUNIT_BNDY'])
# fieldJoinCalc_multikey(sde_ParcelAtt, ['APN', 'YEAR'],['WITHIN_TRPA_BNDY'], 
#                        sdfParcelFC,   ['APN', 'YEAR'],['WITHIN_TRPA_BNDY'])

# close the edit session
edit.stopOperation()
edit.stopEditing(True)


# # use field join multikey to join the data all fields! ### THIS DOESNT WORK YET ### 
# fieldJoinCalc_multikey(sde_ParcelAtt, ['APN', 'YEAR'],['JURISDICTION','COUNTY','COUNTY_LANDUSE_DESCRIPTION','EXISTING_LANDUSE',
#                                            '            OWNERSHIP_TYPE','YEAR_BUILT','PLAN_ID','PLAN_NAME','ZONING_ID','ZONING_DESCRIPTION',
#                                            '            TOWN_CENTER','LOCATION_TO_TOWNCENTER','TAZ','PARCEL_ACRES','PARCEL_SQFT',
#                                                         'WITHIN_BONUSUNIT_BNDY','WITHIN_TRPA_BNDY'], 
#                        sdfParcelFC,   ['APN', 'YEAR'],['JURISDICTION','COUNTY','COUNTY_LANDUSE_DESCRIPTION','EXISTING_LANDUSE',
#                                                         'OWNERSHIP_TYPE','YEAR_BUILT','PLAN_ID','PLAN_NAME','ZONING_ID','ZONING_DESCRIPTION',
#                                                         'TOWN_CENTER','LOCATION_TO_TOWNCENTER','TAZ','PARCEL_ACRES','PARCEL_SQFT',
#                                                         'WITHIN_BONUSUNIT_BNDY','WITHIN_TRPA_BNDY'])


> QA

In [None]:
# get the change in development units year over year
# group by year and county and sum residential units
sdfParcelYear = parcel_history.groupby(['COUNTY', 'YEAR']).agg({'Residential_Units':'sum'}).reset_index()
# sdfParcelYear['NET_CHANGE_RES'] = sdfParcel.groupby('COUNTY', 'YEAR').diff()
sdfParcelYear

In [None]:
# get the plan name and id
df.groupby(['PLAN_ID', 'YEAR']).agg({'Residential_Units':'sum'}).reset_index()

In [None]:
df.pivot_table(index='APN', columns='YEAR', values=['Residential_Units','CommercialFloorArea_SqFt', 'TouristAccommodation_Units'], aggfunc='sum').reset_index()

In [None]:
# display all rows
pd.set_option('display.max_rows', None)

# group by PLAN_NAME and sum Residential_Units
df1 = df.groupby('PLAN_ID').agg({'Residential_Units':'sum', 'TouristAccommodation_Units':'sum','CommercialFloorArea_SqFt':'sum'}).reset_index()

# print
df1.sort_values('Residential_Units', ascending=False)

# add total row
df1.loc['Total'] = df1.sum(numeric_only=True, axis=0)

> Exports

In [None]:
# # export to CSV
# df = parcel_history

# columns to keep
columns_to_keep = ['APN', 'Residential_Units', 'TouristAccommodation_Units',
                    'CommercialFloorArea_SqFt', 'YEAR',
                    'JURISDICTION', 'COUNTY', 
                    # 'ADU', 'RBU', 'Allocation','Deed_Restricted_Units',
                    'OWNERSHIP_TYPE','EXISTING_LANDUSE',
                    # 'WITHIN_TRPA_BNDY'
                    'PARCEL_ACRES', 'PARCEL_SQFT']

# add integer columns for RBU, ADU, Allocation, and Deed Restricted Units
df['ADU'] = 0
df['RBU'] = 0
df['Allocation'] = 0
df['Deed_Restricted_Units'] = 0

# keep only the columns in the list
df = df[columns_to_keep]
# # filter to 2023
# df = df[df.YEAR == 2023]

# export to csv with date stamp in name
today = pd.Timestamp.today().strftime("%m%d%y")
df.to_csv("data\DevelopmentHistory_2023_" + today + ".csv")

In [None]:
## get 2022 development units
devhistoryURL = "https://maps.trpa.org/server/rest/services/Existing_Development/MapServer/2"
parcel_history = get_fs_data_spatial(devhistoryURL)

parcel_history.info()

In [None]:
## get 2022 development units
devhistoryURL = "https://maps.trpa.org/server/rest/services/Existing_Development/MapServer/2"
parcel_history = get_fs_data_spatial(devhistoryURL)

# get unit table as pandas dataframe
unitsTable = pd.read_csv("data/CumulativeAccounting_2012to2023_Updated.csv", low_memory=False)
# get rid of columns after YEAR
unitsTable.drop(unitsTable.columns[unitsTable.columns.get_loc("YEAR")+1:], axis=1,inplace=True)
# set cfa to numeric
unitsTable['CommercialFloorArea_SqFt'] = pd.to_numeric(unitsTable['CommercialFloorArea_SqFt'], errors='coerce').fillna(0)  


In [None]:
# check for duplciates
years = [2012, 2018, 2019, 2020, 2021, 2022, 2023]
for year in years:
    print(year)
    # make a list of duplicate APNs
    duplicateAPNs = sdfParcel.loc[sdfParcel['YEAR'] == year].APN[sdfParcel.loc[sdfParcel['YEAR'] == year].APN.duplicated()].tolist()
    # print out duplicate rows
    print(duplicateAPNs)
    # get the number of duplicates in each list
    print(len(duplicateAPNs))


In [None]:
# global variables
years = [2012, 2018, 2019, 2020, 2021, 2022, 2023]
version = "_v6_"

# merge parcel history and units table by year and 
# export to feature class
def merge_and_export(parcel_history, unitsTable, years):
    for year in years:
        print(year)
        # filter parcel_history by year
        parcel_history_year = parcel_history.loc[parcel_history['YEAR'] == year]
        # filter unitsTable by year
        unitsTable_year = unitsTable.loc[unitsTable['YEAR'] == year]
        # merge parcel_history_year and unitsTable_year
        df = pd.merge(parcel_history_year, unitsTable_year, on='APN', how='left', indicator=True)
        # make sure field types are numeric for Residential_Unit, TouristAccommodation_Units, and CommercialFloorArea_SqFt fields
        df['Residential_Units']          = pd.to_numeric(df['Residential_Units_y'], errors='coerce')
        df['TouristAccommodation_Units'] = pd.to_numeric(df['TouristAccommodation_Units_y'], errors='coerce')
        df['CommercialFloorArea_SqFt']   = pd.to_numeric(df['CommercialFloorArea_SqFt_y'], errors='coerce')
        # if NaN in Residential_Units, set to 0
        df['Residential_Units'] = df['Residential_Units'].fillna(0)
        # if NaN in TouristAccommodation_Units, set to 0
        df['TouristAccommodation_Units'] = df['TouristAccommodation_Units'].fillna(0)
        # if NaN in CommercialFloorArea_SqFt, set to 0
        df['CommercialFloorArea_SqFt'] = df['CommercialFloorArea_SqFt'].fillna(0)
        # change YEAR_y to YEAR
        df['YEAR'] = df['YEAR_y']
        # Sanitize column names
        df.columns = [re.sub(r'[^a-zA-Z0-9_]', '_', col) for col in df.columns]
        # set output feature class name
        yearstr = str(year)
        outfc = f"Parcel_History_Attributed{version}{yearstr}"    
        # export updated parcel history to feature class filtered by year
        df.spatial.to_featureclass(location=os.path.join("C:/GIS/Scratch.gdb", outfc), overwrite=True, sanitize_columns=False)

# identify parcels that did not join from the merge
def get_unjoined(parcel_history, unitsTable, years):
    for year in years:
        # Filter parcel_history for the current year
        parcel_history_filtered = parcel_history.loc[parcel_history['YEAR'] == year]
        
        # Merge with unitsTable for the same year
        units_by_year = unitsTable.loc[unitsTable.YEAR == year]
        merged_data = units_by_year.merge(parcel_history_filtered, on='APN', how='outer', indicator=True)
        
        # Print year and merge value counts
        print(year)
        print(merged_data._merge.value_counts())
        
        # Data manipulations
        merged_data = merged_data.rename(columns={'YEAR_x': 'YEAR'})
        merged_data = merged_data.loc[merged_data._merge != 'both']
        merged_data.info()
        merged_data = merged_data[['APN', 'YEAR', '_merge', 'CommercialFloorArea_SqFt_x', 'Residential_Units_x', 'TouristAccommodation_Units_x',
                                   'CommercialFloorArea_SqFt_y', 'Residential_Units_y', 'TouristAccommodation_Units_y']]
        merged_data.info()
        # Save to CSV
        merged_data.to_csv(f"data\\Parcel_History_Attributed_APN_Merge{version, year}.csv", index=False)

# check for duplicates in parcel_history
def check_duplicates(parcel_history, unitsTable, years):
    for year in years:
        print(year)
        # make a list of duplicate APNs
        duplicateAPNs = parcel_history.loc[parcel_history['YEAR'] == year].APN[parcel_history.loc[parcel_history['YEAR'] == year].APN.duplicated()].tolist()
        # print out duplicate rows
        print(duplicateAPNs)
        # save 2021 duplicates to csv
        if year == 2018:
            parcel_history.loc[parcel_history['YEAR'] == year].loc[parcel_history.loc[parcel_history['YEAR'] == year].APN.duplicated()].to_csv("data\Parcel_History_Duplicates_2018.csv")

        # make a list of duplicate APNs in unitsTable
        duplicateAPNsCA = unitsTable.loc[unitsTable['YEAR'] == year].APN[unitsTable.loc[unitsTable['YEAR'] == year].APN.duplicated()].tolist()
        # print out duplicate rows
        print(duplicateAPNsCA)

# compare total Residnetial Units, Commercial Floor Area, and Tourist Accommodation Units by year, bewtween parcel_history and unitsTable
def compare_totals(parcel_history, unitsTable, years):
    for year in years:
        # filter parcel_history by year
        parcel_history_year = parcel_history.loc[parcel_history['YEAR'] == year]
        # filter unitsTable by year
        unitsTable_year = unitsTable.loc[unitsTable['YEAR'] == year]
        # # remove any commas from CommercialFloorArea_SqFt in unitsTable_year using .loc
        # unitsTable_year.loc[:, 'CommercialFloorArea_SqFt'] = unitsTable_year['CommercialFloorArea_SqFt'].str.replace(',', '').astype(float)

        # get sum of Residential Units in parcel_history
        resTotal = parcel_history_year['Residential_Units'].sum()
        cfaTotal = parcel_history_year['CommercialFloorArea_SqFt'].sum()
        tauTotal = parcel_history_year['TouristAccommodation_Units'].sum()

        # get sum of Residential Units in unitsTable
        resTotalCA = unitsTable_year['Residential_Units'].sum()
        cfaTotalCA = unitsTable_year['CommercialFloorArea_SqFt'].sum()
        tauTotalCA = unitsTable_year['TouristAccommodation_Units'].sum()

        # print totals
        print(year)
        print('Residential Units in Parcel_History \n' + str(resTotal))
        print('Residential Units in updated table \n'+ str(resTotalCA))
        print('Commercial Floor Area in Parcel_History \n'+ str(cfaTotal))
        print('Commercial Floor Area in updated table \n'+ str(cfaTotalCA))
        print('Tourist Accommodation Units in Parcel_History \n'+ str(tauTotal))
        print('Tourist Accommodation Units in updated table \n'+ str(tauTotalCA))

# identify rows where the Residential Units, Commercial Floor Area, and Tourist Accommodation Units are different between parcel_history and unitsTable
def find_different_rows(parcel_history, unitsTable, years):
    for year in years:
        print(year)
        # filter parcel_history by year
        parcel_history_year = parcel_history.loc[parcel_history['YEAR'] == year]
        # filter unitsTable by year
        unitsTable_year = unitsTable.loc[unitsTable['YEAR'] == year]
        # # remove any commas from CommercialFloorArea_SqFt in unitsTable_year using .loc
        # unitsTable_year.loc[:, 'CommercialFloorArea_SqFt'] = unitsTable_year['CommercialFloorArea_SqFt'].str.replace(',', '').astype(float)
        # merge parcel_history_year and unitsTable_year
        df = pd.merge(parcel_history_year, unitsTable_year, right_on='APN', left_on='APN', how='outer', indicator=True)
        # drop columns that are not needed
        df = df[['APN', 'YEAR_x','YEAR_y', 'Residential_Units_x', 'CommercialFloorArea_SqFt_x', 'TouristAccommodation_Units_x', 'Residential_Units_y', 'CommercialFloorArea_SqFt_y', 'TouristAccommodation_Units_y']]
        # get fields where the Residential Units, Commercial Floor Area, and Tourist Accommodation Units do not match
        df = df.loc[(df['Residential_Units_x'] != df['Residential_Units_y']) | (df['CommercialFloorArea_SqFt_x'] != df['CommercialFloorArea_SqFt_y']) | (df['TouristAccommodation_Units_x'] != df['TouristAccommodation_Units_y'])]
        # print out the rows
        print(df)

In [None]:
check_duplicates(parcel_history, unitsTable, years)

In [None]:
# run the merge functions to export feature classes and get unjoined data as csv
# merge_and_export(parcel_history, unitsTable, years)
get_unjoined(parcel_history, unitsTable, years)
check_duplicates(parcel_history, unitsTable, years)
compare_totals(parcel_history, unitsTable, years)
find_different_rows(parcel_history, unitsTable, years)

In [None]:
# analyze the changes in parcel history by year
years = [2012, 2018, 2019, 2020, 2021, 2022, 2023]
df = sdfUnits
for year in years:
    print(year)
    # filter parcel_history by year
    parcel_history_year = df.loc[df['YEAR'] == year]
    # get sum of Residential Units in parcel_history
    resTotal = parcel_history_year['Residential_Units'].sum()
    cfaTotal = parcel_history_year['CommercialFloorArea_SqFt'].sum()
    tauTotal = parcel_history_year['TouristAccommodation_Units'].sum()
    # print totals
    print('Residential Units in Parcel_History \n' + str(resTotal))
    print('Commercial Floor Area in Parcel_History \n'+ str(cfaTotal))
    print('Tourist Accommodation Units in Parcel_History \n'+ str(tauTotal))
    # print out changes in units by APN
# firnd all the rows where duplicate APNs change units between years
for year in years:
    print(year)
    # make a list of duplicate APNs as sets of APNs
    duplicateAPNs = df.loc[df['YEAR'] == year].APN[df.loc[df['YEAR'] == year].APN.duplicated()].tolist()
    # loop through the duplicate APNs
    for apn in duplicateAPNs:
        # get the rows for the APN
        df = df.loc[df['APN'] == apn]
        # get the rows for the APN by year
        df = df.loc[df['YEAR'] == year]
    

In [None]:
# get total residential units by year
def get_totals(parcels, years):
    # total
    total = pd.DataFrame(columns=['Year', 'Residential_Units'])
    for year in years:
        # filter parcel_history by year
        parcel_history_year = parcels.loc[parcels['YEAR'] == year]
        # get sum of Residential Units in parcel_history
        resTotal = parcel_history_year['Residential_Units'].sum()

        # add new row using concat
        total = pd.concat([total, pd.DataFrame({'Year': [year], 'Residential_Units': [resTotal]})])
    return total

# get total residential units by year
total = get_totals(parcel_history, years)
# calculate percentage change in residential units year over year
total['Percent_Change'] = (total['Residential_Units'].pct_change())*100
# create a new column for the difference in residential units year over year
total['Difference'] = total['Residential_Units'].diff()

total
# export to csv
total.to_csv('total_residential_units_by_year.csv', index=False)


***Transformation***

***Proecssing***

#### Deed Restrictions
> Deed restricted unit research needs to be merged with LTinfo housing deed restricitons and parcel unit data from 2022

***Get Data***

In [None]:
dfDeedUnits  = read_excel("data\Housing_Deed_Restrcitions.xlsx", 0)
dfDeedLTinfo = pd.read_json("https://laketahoeinfo.org/WebServices/GetDeedRestrictedParcels/JSON/e17aeb86-85e3-4260-83fd-a2b32501c476")

In [None]:
dfDeedUnits.to_csv("data\DeedRestricted_HousingUnits.csv", index=False)

In [None]:
dfDeedUnits.Units.sum()

In [None]:
dfDeedLTinfo.info()

In [None]:
# get unique values for deed restrcition type
dfDeedLTinfo.DeedRestrictionType.unique()

# filter to Affordable, Achievable, and Moderate
dfDeedLTinfo = dfDeedLTinfo[dfDeedLTinfo.DeedRestrictionType.isin(['Affordable Housing', 'Moderate Income Housing', 'Achievable Housing'])]  

# count of total records
dfDeedLTinfo.shape[0]

In [None]:
parcelUnits22.info()

In [None]:
dfDeedUnitsMerge = dfDeedUnits.merge(dfDeedLTinfo, on='APN', how='outer', indicator=True)

In [None]:
dfDeedUnitsMerge._merge.value_counts()

In [None]:
dfDeedLTinfo[dfDeedLTinfo.duplicated(subset=['APN','DeedRestrictionType'], keep=False)].sort_values('APN').to_csv("HousingDeedRestrictions_LTinfo_Duplicates.csv")

In [None]:
# identify duplicates unique by APN and 
dfDeedUnits[dfDeedUnits.duplicated(subset=['APN', 'Deed_Restriction_Type','Units'], keep=False)]

In [None]:
# identify duplicates
dfDeedUnitsMerge[dfDeedUnitsMerge.duplicated(subset=['APN'], keep=False)].sort_values(by='APN')

In [None]:
dfDeedUnitsMerge.to_csv("HousingDeedRestrictions_All.csv")

In [None]:
# merge the deed restricted units with the parcel units
dfDeedUnits_ParcelUnits  = dfDeedUnits.merge(parcelUnits22, on='APN', how='left')
# merge the deed restricted units with the parcel units
dfDeedLTinfo_ParcelUnits = dfDeedLTinfo.merge(parcelUnits22, left_on='APN', right_on='APN', how='left')


In [None]:
dfDeedLTinfo_ParcelUnits.info()

In [None]:
dfDeedLTinfo_ParcelUnits.Residential_Units.sum()

#### ADU Tracking
> ADU permit tracking from TRPA and othe Jurisdictions. There is a need to establish a system of record for this information (LT Info). This is similar to the Residential Bonus Unit data and there’s crossover on some of these, where a bonus unit was used to create an ADU, but you can have an ADU without requiring a bonus unit, and you can use a bonus unit without it being an ADU… 

***Get Data***

In [None]:
dfADU = read_excel("data\ADU Tracking.xlsx", 0)

In [None]:
dfADU

#### Allocations
> This file includes all of the allocations that have been tracked in LT Info, and adds in whether the subject parcel has been issued a BMP/SCC certificate and/or whether Air Quality/Mobility Mitigation fees (for added VMT) or Water Quality Mitigation fees (for added coverage) have been paid. 

In [None]:
allocations = read_excel("data\Allocation_Tracking.xlsx", 0)

#### Transactions with Inactive APNs

In [None]:
inactiveParcels = read_file("data\Transactions_InactiveParcels.csv")

## QA Process

> Process to compare against assessor parcel data signifying development


In [None]:
# create parcels feature class of missing parcels for Residential Units

# get parcel master
parcelURL = "https://maps.trpa.org/server/rest/services/Parcels/MapServer/0"
vhrURL    = "https://maps.trpa.org/server/rest/services/VHR/MapServer/0"
# get parcel and VHR data as spatial dataframes
sdfParcel = get_fs_data_spatial(parcelURL)
# sdfVHR    = get_fs_data_spatial(vhrURL)

# # keep only the columns needed
# sdfParcel = sdfParcel[['APN','EXISTING_LANDUSE','YEAR_BUILT','BEDROOMS','UNITS','SHAPE']]
# sdfVHR    = sdfVHR[['APN','SHAPE']]

In [None]:
# Gets feature service data as spatially enabled dataframe
def get_fs_data_spatial(service_url):
    feature_layer = FeatureLayer(service_url)
    df = feature_layer.query().sdf
    return df

In [None]:
# merge the parcel and VHR data
sdf = pd.merge(sdfParcel, sdfVHR, on='APN', how='left', indicator=True)
# merge the 2023 parcelhistory and 
parcelDev2023 = parcel_history.loc[parcel_history['YEAR'] == 2023]
sdf = pd.merge(sdf, parcelDev2023, on='APN', how='left', indicator=True)
sdf.info()
# # keep fields needed for QA
# sdf = sdf[['APN','EXISTING_LANDUSE','YEAR_BUILT','BEDROOMS','UNITS','WITHIN_TRPABNDY','_merge','SHAPE']]
# # export to feature class
# sdf.spatial.to_featureclass(location=os.path.join(arcpy.env.workspace, 'Parcel_Review'), overwrite=True, sanitize_columns=False)