# Use Raw CSV's created in R to update SDE.Stream with CSCI scores

Stream_Data_Update.ipynb
Created: October 26th, 2023
Last Updated: December 1, 2025
Sarah Newsome, Tahoe Regional Planning Agency
Evelyn Malamut, Tahoe Regional Planning Agency
This python script was developed to update SDE with yearly bioassessment data

This script uses Python 3.13.7 and was designed to be used with ArcGIS Pro python environment "arcgispro-py3-plotly", which refers to the default cloned Python environment with plotly installed as an additional library.

## Setup

In [None]:
import arcpy
from datetime import datetime
import os
import sys
from sqlalchemy.engine import URL
from sqlalchemy import create_engine
import sqlalchemy as sa
import pandas as pd
from arcgis import GIS
from arcgis.features import FeatureSet, GeoAccessor, GeoSeriesAccessor, FeatureLayer
import pandas as pd
import numpy as np
import requests

gis = GIS()
# # Set Pandas display options to show all rows and columns
# pd.set_option('display.max_rows', None)
# pd.set_option('display.max_columns', None)
# in memory output file path
wk_memory = "memory" + "\\"

# set workspace and sde connections 
working_folder = r"F:/Research and Analysis/Fisheries/Streams/Bioassessment/California Stream Condition Index/California Stream Condition Index/2024_CSCI"
arcpy.env.workspace = r"C:/GIS/Scratch.gdb"

# network path to connection files
filePath = r"F:\GIS\DB_CONNECT"

# database file path 
sdeBase    = os.path.join(filePath, "Vector.sde")
sdeCollect = os.path.join(filePath, "Collection.sde")

# local variables
fdata = os.path.join(sdeBase, "sde.SDE.Monitoring")
## Final feature class to append to in Enterprise Geodatabase
sdeStreams = os.path.join(sdeBase, "sde.SDE.Monitoring\sde.SDE.Stream")

originalcsv = os.path.join(working_folder, "CSCI_Report_2024.csv")
locationcsv = os.path.join(working_folder, "Stations_2024.csv")

if not os.path.exists(originalcsv):
    print(f"Error: File not found at {originalcsv}")

if not os.path.exists(locationcsv):
    print(f"Error: File not found at {locationcsv}")

In [None]:
# Function to assign Station type and lat long and LTinfo website to Trend Sites


#Calculate Rating for CSCI value
#Define a function to categorize values based on ranges
def categorize_value(value):
    if 0 <= value < 0.6:
        return 'poor'
    elif 0.6 <= value < 0.8:
        return 'marginal'
    elif 0.8 <= value < 1.0:
        return 'good'
    else:
        return 'excellent'
    
def get_fs_data(service_url):
    
    feature_layer = FeatureLayer(service_url)
    query_result = feature_layer.query()
    # Convert the query result to a list of dictionaries
    feature_list = query_result.features

    # Create a pandas DataFrame from the list of dictionaries
    all_data = pd.DataFrame([feature.attributes for feature in feature_list])

    return all_data

In [None]:
#Create Dictionary Using Rest Service data
# setup
import pandas as pd
from arcgis import GIS
from arcgis.features import FeatureLayer

# Connect to TRPA Enterprise GIS Portal *if it's a service only shared with org
# portal_user = "TRPA_PORTAL_ADMIN"
# portal_pwd = str(os.environ.get('Password'))
# portal_url = "https://maps.trpa.org/portal/"

# setup connection to GIS server this can be GIS() with a public service
gis = GIS()


# get Stream data as a Spatially Enabled Dataframe
service_url = 'https://maps.trpa.org/server/rest/services/LTInfo_Monitoring/MapServer/8'
feature_layer = FeatureLayer(service_url)
query_result = feature_layer.query()

# Convert the query result to a Spatially Enabled Dataframe
sdfStreamHab = query_result.sdf

sdfStreamHab.info()
columnstokeep = ['SITE_NAME','STATION_TYPE', 'LATITUDE', 'LONGITUDE', 'LTINFO']
sdfStreamHab = sdfStreamHab.loc[:, columnstokeep]
unique_values = sdfStreamHab.drop_duplicates()

# Select specific columns for look up
selected_columns = ['STATION_TYPE', 'LATITUDE', 'LONGITUDE', 'LTINFO']

# Convert selected columns to dictionary
lookup_dict = unique_values.set_index('SITE_NAME')[selected_columns].to_dict(orient='index')

# Display the dictionary
print(lookup_dict)

## Transform CSCI Scores to Point feature class in Enterprise Geodatabase

In [None]:
# Create DataFrames from CSV files
#For 2024 data add locations of new data and label in similar way
dfCSCI = pd.read_csv(originalcsv)
dflocations = pd.read_csv(locationcsv)

# # Append DataFrames from additional CSV files
# dfCSCI = dfCSCI.append(pd.read_csv(originalcsv), ignore_index=True)
# dflocations = dflocations.append(pd.read_csv(locationcsv), ignore_index=True)

# merge CSCI scores and location data
RawData_df = pd.merge(dfCSCI, dflocations, how='inner', on='StationCode')

try:
    dfCSCI = pd.read_csv(originalcsv)
except FileNotFoundError:
    print("File not found. Please check the file path.")
except pd.errors.EmptyDataError:
    print("The CSV file is empty.")
except pd.errors.ParserError:
    print("Error parsing the CSV file.")

In [None]:
# get year from sample id
RawData_df['Year']=RawData_df.SampleID.str.split("_").str[-1]

#Calculate Station Type 
RawData_df['STATION_TYPE'] = RawData_df['StationCode'].map(lambda x: lookup_dict[x]['STATION_TYPE'] if x in lookup_dict else 'Status')

#Calculate LATITUDE
RawData_df['LATITUDE'] = RawData_df['StationCode'].map(lambda x: lookup_dict[x]['LATITUDE']if x in lookup_dict else None)
RawData_df['LATITUDE'] = RawData_df['LATITUDE'].fillna(RawData_df['New_Lat'])
#Calculate LONGITUDE
RawData_df['LONGITUDE'] = RawData_df['StationCode'].map(lambda x: lookup_dict[x]['LONGITUDE']if x in lookup_dict else None)
RawData_df['LONGITUDE'] = RawData_df['LONGITUDE'].fillna(RawData_df['New_Long'])
#Caculate LTINFO
RawData_df['LTINFO'] = RawData_df['StationCode'].map(lambda x: lookup_dict[x]['LTINFO'] if x in lookup_dict else None)

#Use only for threshold?
# Apply the categorization function to create the new field
#RawData_df['Rating'] = RawData_df['CSCI'].apply(categorize_value)


Field_Mapping={
    'StationCode': 'SITE_NAME',
    'Year': 'YEAR_OF_COUNT',
    'LATITUDE': 'LATITUDE',
    'LONGITUDE': 'LONGITUDE',
    'CSCI': 'COUNT_VALUE',
    'STATION_TYPE': 'STATION_TYPE',
    'LTINFO': 'LTINFO'
    
}
# rename fields based on field mappings
df_final = RawData_df.rename(columns=Field_Mapping).drop(columns=[col for col in RawData_df.columns if col not in Field_Mapping])

# establish duration field
def assign_duration(stationtype):
    if stationtype == 'Status' :
        return 'One-time'
    else:
        return 'Long-term'
df_final['DURATION']= df_final['STATION_TYPE'].apply(assign_duration)

# station code is site name, site name is station code.
df_final['STATION_CODE']=df_final['SITE_NAME']

# export to csv for QA
df_final.to_csv(os.path.join(working_folder,"StreamCSCI_proccesed.csv"), index= False)

In [None]:
# convert CSV to point feature class with NAD1983 as coordinate system
arcpy.management.XYTableToPoint(os.path.join(working_folder,"StreamCSCI_proccesed.csv"), 
                                "NewCSCI_points", 
                                "LONGITUDE", "LATITUDE",
                                coordinate_system= arcpy.SpatialReference(4269))

# project to UTM Zone 10N
arcpy.Project_management("NewCSCI_points_test", "NewStream_CSCI_Projected", 26910)


# THIS IS HITTING SDE - BE CAREFUL

In [None]:

inputfc= "NewStream_CSCI_Projected"

# disconnect all users
print("\nDisconnecting all users...")
arcpy.DisconnectUser(sdeBase, "ALL")
 
# unregister the sde feature class as versioned
print ("\nUnregistering feature dataset as versioned...")
arcpy.UnregisterAsVersioned_management(fdata,"NO_KEEP_EDIT","COMPRESS_DEFAULT")
print ("\nFinished unregistering feature dataset as versioned.")

arcpy.management.Append(inputfc, sdeStreams,"NO_TEST")

# disconnect all users
print("\nDisconnecting all users...")
arcpy.DisconnectUser(sdeBase, "ALL")

# register SDE feature class as versioned
arcpy.RegisterAsVersioned_management(fdata, "NO_EDITS_TO_BASE")