In [8]:
#import libraries
import os
import pandas as pd
import arcpy

def get_periphyton_data():
    #Import DAta
    file_path = r"F:\Research and Analysis\Water Quality Monitoring Program\Nearshore\IntegratedAlgaeMonitoring\data\Peri"
    #file_path = local_path.parents[0] / '2023/data/raw_data'
    # Load each CSV file into a DataFrame
    inclinedf = pd.read_csv(os.path.join(file_path, 'InclineWest_Historic.csv'))
    Pinelanddf = pd.read_csv(os.path.join(file_path, 'Pineland_Historic.csv'))
    Rubicondf = pd.read_csv(os.path.join(file_path, 'Rubicon_Historic.csv'))
    Sugarpinedf = pd.read_csv(os.path.join(file_path, 'Sugarpine_Historic.csv'))
    TahoeCitydf = pd.read_csv(os.path.join(file_path, 'TahoeCity_Historic.csv'))
    Zephyrdf = pd.read_csv(os.path.join(file_path, 'Zephyr_Historic.csv'))

    #Combine All Dataframes
    df = pd.concat([inclinedf, Pinelanddf, Rubicondf, Sugarpinedf, TahoeCitydf, Zephyrdf], ignore_index=True)
    return df

df= get_periphyton_data()

def format_peri(df, start_date):
    # Convert 'Sample_Date' to datetime
    df['Sample_Date'] = pd.to_datetime(df['Sample_Date'])
    
    # Filter the DataFrame to include only the data from the specified date and newer
    df = df[df['Sample_Date'] >= start_date]

    df['unit'] = 'Chla_mg_m2'
    # Rename site, date and 'chl' column
    df = df.rename(columns={'site': 'id', 'Sample_Date': 'Date', 'Chl': 'value'})
    
    # Keep id, Date, value, and unit only
    keep_columns = ['id', 'Date', 'value', 'unit']

    # Drop all columns except for 'id', 'Date', 'value', and 'unit'
    df = df[keep_columns]

    # Reset the index
    df = df.reset_index(drop=True)

    #drop rows with missing values
    df = df.dropna()
    
    return df

readydf = format_peri(df, '2016-10-20')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['unit'] = 'Chla_mg_m2'


In [6]:
#check data types
readydf.dtypes

id               object
Date     datetime64[ns]
value           float64
unit             object
dtype: object

In [10]:
type_mapping = {
        'float64': 'DOUBLE',
        'object': 'TEXT',
        'string': 'TEXT',
        'datetime64[ns]': 'TEXT'
    }
# Set up geodatabase and output table name
gdb_path = r"F:\Research and Analysis\Workspace\Sarah\Scratch.gdb"
output_table = "Peri_temp"
output_path = f"{gdb_path}\\{output_table}"

#Delete existing table if it exists
if arcpy.Exists(output_path):
    arcpy.management.Delete(output_path)
    print(f"Deleted existing table: {output_table}")

# Create the table in the geodatabase
arcpy.management.CreateTable(gdb_path, output_table)

# # Identify date columns (assume they are 'object' type but contain dates)
# date_columns = [col for col in df_long.columns if pd.api.types.is_datetime64_any_dtype(df_long[col])]


# Add fields based on DataFrame dtypes
for col_name, dtype in readydf.dtypes.items():
    arcgis_type = type_mapping.get(str(dtype), 'TEXT')  # Default to TEXT if dtype is unknown
    if arcgis_type == 'TEXT':
        arcpy.management.AddField(output_path, col_name, arcgis_type, field_length=255)
    else:
        arcpy.management.AddField(output_path, col_name, arcgis_type)
 
# Insert data into the table
with arcpy.da.InsertCursor(output_path, readydf.columns.tolist()) as cursor:
    for _, row in readydf.iterrows():
        cursor.insertRow(row.tolist())
 
print(f"Table '{output_table}' created and populated in {gdb_path}")

Deleted existing table: Peri_temp
Table 'Peri_temp' created and populated in F:\Research and Analysis\Workspace\Sarah\Scratch.gdb
