In [1]:
# Import Data
import pandas as pd
import arcpy
import os
from datetime import datetime
import pathlib
import numpy as np
from arcgis import GIS
from arcgis.features import FeatureLayer
from arcgis.geometry import SpatialReference
from functools import reduce
from sqlalchemy.engine import URL
from sqlalchemy import create_engine

tycdatafile_path = r"F:\Research and Analysis\Vegetation\TYC\TYC 2024.xlsx"
sheet_name = '2023'  # Name of the sheet to read

df = pd.read_excel(tycdatafile_path, sheet_name=sheet_name)#, header=[0, 1])

#drop row 0
df = df.drop(0)

#make a copy of this data frame that includes Site Name and Columns with years
df2 = df.copy()

#drop all columns that are not Site Name or a year
df2 = df2[[' SITE NAME', 2024, 2023, 2022, 2021, 2020]]

#drop rows 65-71
df2 = df2.drop([64,65,66,67,68,69,70,71])

df_long = df2.melt(id_vars=[' SITE NAME'], var_name='YEAR_OF_COUNT', value_name='COUNT_VALUE')






In [2]:

# set environement workspace to Scratch
arcpy.env.workspace = r"F:/Research and Analysis/Workspace/Evelyn/Scratch.gdb"
# # clear memory workspace
# arcpy.management.Delete('memory')

# overwrite true
arcpy.env.overwriteOutput = True
# Set spatial reference to NAD 1983 UTM Zone 10N
sr = arcpy.SpatialReference(26910)

# function to get sql connection for tabular TRPA data Collection.sde?
# db options are 'tabular' or 'tahoebmpsde'??
def get_conn(db):
    # Get database user and password from environment variables on machine running script
    db_user             = os.environ.get('DB_USER')
    db_password         = os.environ.get('DB_PASSWORD')

    # driver is the ODBC driver for SQL Server
    driver              = 'ODBC Driver 17 for SQL Server'
    # server names are
    sql_12              = 'sql12'
    
    # make it case insensitive
    db = db.lower()
    # make sql database connection with pyodbc
    if db == 'sde':
        connection_string = f"DRIVER={driver};SERVER={sql_12};DATABASE={db};UID={db_user};PWD={db_password}"
        connection_url = URL.create("mssql+pyodbc", query={"odbc_connect": connection_string})
        engine = create_engine(connection_url)
    # else return None
    else:
        engine = None
    # connection file to use in pd.read_sql
    return engine


# network path to connection files
filePath = "F:\\GIS\\DB_CONNECT"
### SDE Vector is where the data will go into staging tables 
sdeVector    = os.path.join(filePath, "Vector.sde")
# local variables sdata is starting data and f data is finishing datatables
sdemonitoring= os.path.join(sdeVector, "sde.SDE.Monitoring")
#make this a spatial df
tycdata_path = os.path.join(sdemonitoring, "sde.SDE.Yellow_Cress")

# Initialize an empty list to store the data
tycdata = []

# Extract site names and coordinates
with arcpy.da.SearchCursor(tycdata_path, ["SITE_NAME", "SHAPE@XY", "LATITUDE", "LONGITUDE", "OWNER", "LTINFO_HYPERLINK", "SHAPE"], spatial_reference=sr) as cursor:
    for row in cursor:
        site_name = row[0]  # Site name
        X,Y = row[1]
        latitude = row[2]  # Extract X (longitude) and Y (latitude)
        longitude = row[3]
        owner = row[4]  # Owner
        ltinfo_hyperlink = row[5]  # LTINFO Hyperlink
        tycdata.append([site_name, X, Y, latitude, longitude, owner, ltinfo_hyperlink])

tyc_df = pd.DataFrame(tycdata, columns=["SITE_NAME", "X", "Y", "LATITUDE", "LONGITUDE", "OWNER", "LTINFO_HYPERLINK"])

# # Extract site names and coordinates
# with arcpy.da.SearchCursor(tycdata_path, ["SITE_NAME", "SHAPE@XY, "OWNER", "LTINFO_HYPERLINK", "SHAPE"]) as cursor:
#     for row in cursor:
#         site_name = row[0]  # Site name
#         longitude, latitude = row[1]  # Extract X (longitude) and Y (latitude)
#         owner = row[2]  # Owner
#         ltinfo_hyperlink = row[3]  # LTINFO Hyperlink
#         tycdata.append([site_name, latitude, longitude, owner, ltinfo_hyperlink])

# Create a pandas DataFrame
# tyc_df = pd.DataFrame(tycdata, columns=["SITE_NAME", "LATITUDE", "LONGITUDE", "OWNER", "LTINFO_HYPERLINK"])


In [None]:

# Standardize column name in df and change year to int before processing
df_long.rename(columns={' SITE NAME': 'SITE_NAME'}, inplace=True)
df_long['YEAR_OF_COUNT'] = df_long['YEAR_OF_COUNT'].astype(int) 
df_long['COUNT_VALUE'] = df_long['COUNT_VALUE'].replace('NS', None) #changing NS (no survey) to missing value (<NULL>) to be consistent with SDE


# # Strip whitespace to avoid mismatches
df_long['SITE_NAME'] = df_long['SITE_NAME'].astype(str).str.strip()
tyc_df['SITE_NAME'] = tyc_df['SITE_NAME'].str.strip()

# # Merge the two DataFrames on 'Site_Name', remove duplicates
merged_df = pd.merge(df_long, tyc_df, on='SITE_NAME', how='left')
merged_df = merged_df.drop_duplicates()


In [9]:
type_mapping = {
    'int32': 'LONG',
    'float64': 'DOUBLE',
    'object': 'TEXT',
    'string': 'TEXT',
    #'datetime64[ns]': 'DATE'
}
 
# Set up geodatabase and output table name
gdb_path = r"F:\Research and Analysis\Workspace\Evelyn\Scratch.gdb"
output_table = "tyc_temp"
output_path = f"{gdb_path}\\{output_table}"

#Delete existing table if it exists
if arcpy.Exists(output_path):
    arcpy.management.Delete(output_path)
    print(f"Deleted existing table: {output_table}")

# Create the table in the geodatabase
arcpy.management.CreateTable(gdb_path, output_table)

# Add fields based on DataFrame dtypes
for col_name, dtype in merged_df.dtypes.items():
    arcgis_type = type_mapping.get(str(dtype), 'TEXT')  # Default to TEXT if dtype is unknown
    if arcgis_type == 'TEXT':
        arcpy.management.AddField(output_path, col_name, arcgis_type, field_length=255)
    else:
        arcpy.management.AddField(output_path, col_name, arcgis_type)
 
# Insert data into the table
with arcpy.da.InsertCursor(output_path, merged_df.columns.tolist()) as cursor:
    for _, row in merged_df.iterrows():
        cursor.insertRow(row.tolist())
 
print(f"Table '{output_table}' created and populated in {gdb_path}")

Deleted existing table: tyc_temp
Table 'tyc_temp' created and populated in F:\Research and Analysis\Workspace\Evelyn\Scratch.gdb


In [10]:


# convert CSV to point feature class
arcpy.management.XYTableToPoint(output_path, "NewTYC_points", 
                                "X", "Y",
                                coordinate_system= arcpy.SpatialReference(26910))

#MIGHT NEED TO INCLUDE SR for gcs and pcs!! NAD 1983
#arcpy.SpatialReference(4269, 26910)

# arcpy.management.XYTableToPoint(output_path, 
#                                 "NewTYC_points", 
#                                 "LONGITUDE", "LATITUDE")

# project to UTM Zone 10N
#arcpy.Project_management("NewTYC_points", "NewTYC_points_Projected", 26910)

In [11]:
# Removing X, Y field before adding to SDE

# Specify the feature class and the fields to delete
feature_class = "NewTYC_points"
fields_to_delete = ["X", "Y"]

# Delete the specified fields
try:
    arcpy.management.DeleteField(feature_class, fields_to_delete)
    print(f"Fields {fields_to_delete} deleted successfully from {feature_class}.")
except arcpy.ExecuteError:
    print(arcpy.GetMessages(2))
except Exception as e:
    print(f"An error occurred: {e}")

Fields ['X', 'Y'] deleted successfully from NewTYC_points.


In [None]:
## UDPATE SDE
