In [None]:
import os
import pandas as pd

Calculate the daily average for station that has value in sub daily interval

In [None]:
def process_csv_with_averaging(input_folder, output_folder):
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Iterate through all CSV files in the input folder
    for file_name in os.listdir(input_folder):
        if file_name.endswith('.csv'):
            # Construct the full file path
            file_path = os.path.join(input_folder, file_name)

            # Load the CSV file, skipping the first 8 rows
            df = pd.read_csv(file_path, skiprows=8)

            # Rename the columns as requested
            df.columns = ['Datetime', 'Streamflow (m3/s)']

            # Replace blank or non-numeric values with -9999 in the 'Streamflow (m3/s)' column
            df['Streamflow (m3/s)'] = pd.to_numeric(df['Streamflow (m3/s)'], errors='coerce').fillna(-9999)

            # Convert the 'Datetime' column to a consistent datetime format (YYYY-MM-DD)
            df['Datetime'] = pd.to_datetime(df['Datetime'], errors='coerce').dt.strftime('%Y-%m-%d')

            # Group by 'Datetime' and take the average of 'Streamflow (m3/s)' for duplicate dates
            df_grouped = df.groupby('Datetime', as_index=False).agg({'Streamflow (m3/s)': 'mean'})

            # Save the processed file to the output folder
            output_file_path = os.path.join(output_folder, file_name)
            df_grouped.to_csv(output_file_path, index=False)

# Define your input and output folders
input_folder = '/Users/yubin/Library/CloudStorage/Box-Box/Bias Correction/Africa/Rwanda/Observed data'
output_folder = '/Users/yubin/Library/CloudStorage/Box-Box/Bias Correction/Africa/Rwanda/Hydroserver'

# Call the function to process the files
process_csv_with_averaging(input_folder, output_folder)

Later found out that there are negative streamflow value in the csv file so this will clean it

In [None]:
input_folder = '/Users/yubin/Library/CloudStorage/Box-Box/Bias Correction/Africa/Rwanda/Hydroserver'
# Iterate over each file in the input folder
for file_name in os.listdir(input_folder):
    if file_name.endswith('.csv'):
        file_path = os.path.join(input_folder, file_name)

        # Load the CSV file, skipping the first 8 rows
        df = pd.read_csv(file_path)
        df['Datetime'] = pd.to_datetime(df['Datetime'])

        # Remove rows where streamflow is negative
        df_filtered = df[df['Streamflow (m3/s)'] >= 0]

        # Group by the "Datetime" column and calculate the mean streamflow for each day
        df_daily_mean = df_filtered.groupby('Datetime').mean().reset_index()

        # Create an output file path (you can modify this to avoid overwriting)
        output_file_path = os.path.join(input_folder, file_name)
        
        # Save the grouped data to a new CSV
        df_daily_mean.to_csv(output_file_path, index=False)

print("Processing complete!")


