TYC_toSDE.ipnyb
Created: February 21, 2025
Last Updated: December 1, 2025
Sarah Newsome, Tahoe Regional Planning Agency
Evelyn Malamut, Tahoe Regional Planning Agency
This python script was developed to update SDE with yearly TYC data

This script uses Python 3.13.7 and was designed to be used with ArcGIS Pro python environment "arcgispro-py3-plotly", which refers to the default cloned Python environment with plotly installed as an additional library.

In [None]:
# Import Data
import pandas as pd
import arcpy
import os
from datetime import datetime
import pathlib
import numpy as np
from arcgis import GIS
from arcgis.features import FeatureLayer
from arcgis.geometry import SpatialReference
from functools import reduce
from sqlalchemy.engine import URL
from sqlalchemy import create_engine

gis = GIS()

# set workspace and sde connections 
workspace = r"C:/GIS/Scratch.gdb"
arcpy.env.workspace = workspace

# network path to connection files
filePath = "F:\GIS\DB_CONNECT"
### SDE Vector is where the data will go into staging tables 
sdeVector    = os.path.join(filePath, "Vector.sde")
# local variables sdata is starting data and f data is finishing datatables
sdemonitoring= os.path.join(sdeVector, "sde.SDE.Monitoring")
#make this a spatial df
tycdata_path = os.path.join(sdemonitoring, "sde.SDE.Yellow_Cress")

tycdatafile_path = r"F:\Research and Analysis\Vegetation\TYC\TYC_Appendix_C.xlsx"
sheet_name = 'TYC_Appendix_C'  # Name of the sheet to read

data_year = 2025

# Final Output: feature class with projected points (all intermediates and final output go to Scratch folder)
final_fc = "NewTYC_points_Projected"

In [None]:
# Creating dataframe from TYC spreadsheet

df = pd.read_excel(tycdatafile_path, sheet_name=sheet_name)#, header=[0, 1])

#drop row 0
df = df.drop(0)

#make a copy of this data frame that includes Site Name and Columns with years
df2 = df.copy()

#drop all columns that are not Site Name or a year
df2 = df2[[' SITE NAME', data_year]]

#drop rows 65-71
df2 = df2.drop([66,67,68,69,70,71,72,73])

df_long = df2.melt(id_vars=[' SITE NAME'], var_name='YEAR_OF_COUNT', value_name='COUNT_VALUE')


In [None]:
# overwrite true
arcpy.env.overwriteOutput = True
# Set spatial reference to NAD 1983 UTM Zone 10N
sr = arcpy.SpatialReference(26910)

# function to get sql connection for tabular TRPA data Collection.sde?
# db options are 'tabular' or 'tahoebmpsde'??
def get_conn(db):
    # Get database user and password from environment variables on machine running script
    db_user             = os.environ.get('DB_USER')
    db_password         = os.environ.get('DB_PASSWORD')

    # driver is the ODBC driver for SQL Server
    driver              = 'ODBC Driver 17 for SQL Server'
    # server names are
    sql_12              = 'sql12'
    
    # make it case insensitive
    db = db.lower()
    # make sql database connection with pyodbc
    if db == 'sde':
        connection_string = f"DRIVER={driver};SERVER={sql_12};DATABASE={db};UID={db_user};PWD={db_password}"
        connection_url = URL.create("mssql+pyodbc", query={"odbc_connect": connection_string})
        engine = create_engine(connection_url)
    # else return None
    else:
        engine = None
    # connection file to use in pd.read_sql
    return engine

# Initialize an empty list to store the data
tycdata = []

# Extract site names and coordinates
with arcpy.da.SearchCursor(tycdata_path, ["SITE_NAME", "LATITUDE", "LONGITUDE", "OWNER", "LTINFO_HYPERLINK", "SHAPE"], spatial_reference=sr) as cursor:
    for row in cursor:
        site_name = row[0]  # Site name
        latitude = row[1]  # Extract X (longitude) and Y (latitude)
        longitude = row[2]
        owner = row[3]  # Owner
        ltinfo_hyperlink = row[4]  # LTINFO Hyperlink
        tycdata.append([site_name, latitude, longitude, owner, ltinfo_hyperlink])

# Create a pandas DataFrame
tyc_df = pd.DataFrame(tycdata, columns=["SITE_NAME", "LATITUDE", "LONGITUDE", "OWNER", "LTINFO_HYPERLINK"])


In [None]:

# Standardize column names
df_long.rename(columns={' SITE NAME': 'SITE_NAME'}, inplace=True)

# Create a new column STATUS that contains surveyed or no survey information
df_long['STATUS'] = df_long['COUNT_VALUE']
df_long['STATUS'] = np.where(df_long['STATUS'] == 'NS', 'No Survey', 'Surveyed')

# Change NS (no survey) to missing value (<NULL>) to be consistent with SDE
#df_long['COUNT_VALUE'] = df_long['COUNT_VALUE'].replace('NS', np.nan) 
#df_long['COUNT_VALUE'] = df_long['COUNT_VALUE'].astype("Int32") 
#df_long['COUNT_VALUE'] = df_long['COUNT_VALUE'].replace(np.nan, None) #Arcpy can only handle None type for missing values
df_long['COUNT_VALUE'] = df_long['COUNT_VALUE'].replace('NS', None) 


# # Strip whitespace to avoid mismatches
df_long['SITE_NAME'] = df_long['SITE_NAME'].astype(str).str.strip()
tyc_df['SITE_NAME'] = tyc_df['SITE_NAME'].str.strip()

# # Merge the two DataFrames on 'Site_Name', remove duplicates
merged_df = pd.merge(df_long, tyc_df, on='SITE_NAME', how='left')
merged_df = merged_df.drop_duplicates()

#Convert all text columns to string type
merged_df['SITE_NAME'] = merged_df['SITE_NAME'].astype('string')
merged_df['STATUS'] = merged_df['STATUS'].astype('string')
merged_df['OWNER'] = merged_df['OWNER'].astype('string')
merged_df['LATITUDE'] = merged_df['LATITUDE'].replace(np.nan, 0) #setting to zero to avoid errors with arcpy
merged_df['LONGITUDE'] = merged_df['LONGITUDE'].replace(np.nan, 0) #setting to zero to avoid errors with arcpy
merged_df['LTINFO_HYPERLINK'] = merged_df['LTINFO_HYPERLINK'].astype('string')
merged_df['LTINFO_HYPERLINK'] = merged_df['LTINFO_HYPERLINK'].fillna("Missing Link")
merged_df['OWNER'] = merged_df['OWNER'].fillna("Missing Owner")


In [None]:
#merged_df['COUNT_VALUE'] = merged_df['COUNT_VALUE'].replace(np.nan, 0) 
merged_df['OWNER'] = merged_df['OWNER'].fillna("Missing Owner")

In [None]:
type_mapping = {
    'int32': 'LONG',
    'float64': 'DOUBLE',
    'object': 'LONG',
    'string': 'TEXT',
    #'datetime64[ns]': 'DATE'
}
 
# Set up geodatabase and output table name
gdb_path = workspace
output_table = "tyc_temp"
output_path = f"{gdb_path}\\{output_table}"

#Delete existing table if it exists
if arcpy.Exists(output_path):
    arcpy.management.Delete(output_path)
    print(f"Deleted existing table: {output_table}")

# Create the table in the geodatabase
arcpy.management.CreateTable(gdb_path, output_table)

# Add fields based on DataFrame dtypes
for col_name, dtype in merged_df.dtypes.items():
    arcgis_type = type_mapping.get(str(dtype), 'TEXT')  # Default to TEXT if dtype is unknown
    if arcgis_type == 'TEXT':
        arcpy.management.AddField(output_path, col_name, arcgis_type, field_length=255)
    else:
        arcpy.management.AddField(output_path, col_name, arcgis_type)
 
# Insert data into the table
with arcpy.da.InsertCursor(output_path, merged_df.columns.tolist()) as cursor:
    for _, row in merged_df.iterrows():
        cursor.insertRow(row.tolist())
 
print(f"Table '{output_table}' created and populated in {gdb_path}")

In [None]:
# convert CSV to point feature class with NAD 1983 as coordinate system
arcpy.management.XYTableToPoint(output_path, 
                                "NewTYC_points", 
                                "LONGITUDE", "LATITUDE",
                                coordinate_system= arcpy.SpatialReference(4269))

# project to UTM Zone 10N
arcpy.Project_management("NewTYC_points", "NewTYC_points_Projected", 26910)

Append to SDE: Workflow in progress!

In [None]:
# ## UDPATE SDE
# inputfc= "NewTYC_points_Projected"

# # disconnect all users
# print("\nDisconnecting all users...")
# arcpy.DisconnectUser(sdeVector, "ALL")
 
# # unregister the sde feature class as versioned
# print ("\nUnregistering feature dataset as versioned...")
# arcpy.UnregisterAsVersioned_management(sdemonitoring,"NO_KEEP_EDIT","COMPRESS_DEFAULT")
# print ("\nFinished unregistering feature dataset as versioned.")

# arcpy.management.Append(inputfc, tycdata_path,"NO_TEST")

# # disconnect all users
# print("\nDisconnecting all users...")
# arcpy.DisconnectUser(sdeVector, "ALL")

# # register SDE feature class as versioned
# arcpy.RegisterAsVersioned_management(sdemonitoring, "NO_EDITS_TO_BASE")
