In [3]:
import os
import pandas as pd
import numpy as np
import re
from tqdm import tqdm

# Directory containing the CSV files
input_dir = r"G:\Public\CYGNSS_clip_csv"
output_file = r"G:\Public\CYGNSS_clip_csv\All_cygnss_data_2023.csv"

# Function to extract satellite ID and date from the filename
def extract_info_from_filename(filename):
    match = re.search(r"cyg(\d+)\.ddmi\.s(\d{8})-", filename)
    if match:
        satellite = f"cyg{match.group(1)}"
        date = match.group(2)
        return satellite, date
    return None, None

# Function to calculate the coordinates of the corners of a square
def calculate_square(lat, lon, km_radius):
    earth_radius = 6371
    delta_lat = (km_radius / earth_radius) * (180 / np.pi)
    delta_lon = (km_radius / (earth_radius * np.cos(np.radians(lat)))) * (180 / np.pi)

    return {
        "top_left_lat": lat + delta_lat,
        "top_left_lon": lon - delta_lon,
        "top_right_lat": lat + delta_lat,
        "top_right_lon": lon + delta_lon,
        "bottom_left_lat": lat - delta_lat,
        "bottom_left_lon": lon - delta_lon,
        "bottom_right_lat": lat - delta_lat,
        "bottom_right_lon": lon + delta_lon
    }

# List to hold dataframes
dataframes = []

# Iterate through all files in the directory
for file in tqdm(os.listdir(input_dir)):
    if file.endswith(".csv"):
        file_path = os.path.join(input_dir, file)
        
        # Read the CSV file
        try:
            df = pd.read_csv(file_path)
            # Drop rows with any missing values
            df = df.dropna()

            # Extract satellite and date information
            satellite, date = extract_info_from_filename(file)

            if satellite and date:
                # Add satellite and date information to the dataframe
                df["satellite"] = satellite
                df["date"] = date

                # Recalculate the corner coordinates with km_radius = 1.5
                new_coords = df.apply(lambda row: calculate_square(row["sp_lat"], row["sp_lon"], 1.5), axis=1)
                new_coords_df = pd.DataFrame(new_coords.tolist())
                
                # Merge the new coordinates with the dataframe
                df = pd.concat([df, new_coords_df], axis=1)

                # Append the dataframe to the list
                dataframes.append(df)
        except Exception as e:
            print(f"Error reading file {file}: {e}")

# Concatenate all dataframes
if dataframes:
    merged_df = pd.concat(dataframes, ignore_index=True)

    # Save the merged dataframe to a CSV file
    merged_df.to_csv(output_file, index=False)
    print(f"Merged data saved to {output_file}")
else:
    print("No valid data to merge.")

100%|██████████████████████████████████████████████████████| 2544/2544 [10:50<00:00,  3.91it/s]


Merged data saved to G:\Public\CYGNSS_clip_csv\All_cygnss_data_2023.csv


In [2]:
import pandas as pd
import os

# Path to the merged CSV file
input_csv = r"G:\Public\CYGNSS_clip_csv\Merged_data\All_cygnss_data_2023.csv"
output_dir = r"G:\Public\CYGNSS_clip_csv\Merged_data"

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Read the merged CSV file
df = pd.read_csv(input_csv)

# Check if the 'date' column exists and format is correct
if 'date' in df.columns:
    try:
        # Convert 'date' column to datetime format
        df['date'] = pd.to_datetime(df['date'], format='%Y%m%d')

        # Extract month from the date
        df['month'] = df['date'].dt.month

        # Remove duplicate columns (if any)
        df = df.loc[:, ~df.columns.duplicated()]

        # Split data into 12 files by month
        for month in range(2, 3):
            month_df = df[df['month'] == month]

            if not month_df.empty:
                # Create the output file name
                output_file = os.path.join(output_dir, f"cygnss_data_month_{month:02}.csv")

                # Save the data for the current month
                month_df.to_csv(output_file, index=False)
                print(f"Data for month {month:02} saved to {output_file}")
            else:
                print(f"No data found for month {month:02}")
    except Exception as e:
        print(f"Error processing the 'date' column: {e}")
else:
    print("The 'date' column is missing from the input CSV.")

  df = pd.read_csv(input_csv)


Data for month 02 saved to G:\Public\CYGNSS_clip_csv\Merged_data\cygnss_data_month_02.csv


In [1]:
import pandas as pd
import os

# Define the folder path and file pattern
folder_path = r"G:\Public\CYGNSS_clip_csv\Merged_data"
file_pattern = "cygnss_data_month_"
columns_to_drop = 9  # Number of columns to delete

# Process each file from month 01 to 12
for month in range(1, 13):
    file_name = f"{file_pattern}{month:02}.csv"
    file_path = os.path.join(folder_path, file_name)
    
    if not os.path.exists(file_path):
        print(f"File not found: {file_name}, skipping...")
        continue

    print(f"Processing file: {file_name}")
    
    # Load the file into a DataFrame
    try:
        df = pd.read_csv(file_path)
        if df.shape[1] <= columns_to_drop:
            print(f"File {file_name} has fewer than {columns_to_drop} columns. Skipping deletion.")
            continue
        
        # Drop the last 9 columns
        df = df.iloc[:, :-columns_to_drop]
        
        # Save the modified DataFrame back to the same file
        df.to_csv(file_path, index=False)
        print(f"Successfully processed and saved: {file_name}")
    except Exception as e:
        print(f"Error processing file {file_name}: {e}")

Processing file: cygnss_data_month_01.csv
Successfully processed and saved: cygnss_data_month_01.csv
Processing file: cygnss_data_month_02.csv
Successfully processed and saved: cygnss_data_month_02.csv
Processing file: cygnss_data_month_03.csv
Successfully processed and saved: cygnss_data_month_03.csv
Processing file: cygnss_data_month_04.csv
Successfully processed and saved: cygnss_data_month_04.csv
Processing file: cygnss_data_month_05.csv
Successfully processed and saved: cygnss_data_month_05.csv
Processing file: cygnss_data_month_06.csv
Successfully processed and saved: cygnss_data_month_06.csv
Processing file: cygnss_data_month_07.csv
Successfully processed and saved: cygnss_data_month_07.csv
Processing file: cygnss_data_month_08.csv
Successfully processed and saved: cygnss_data_month_08.csv
Processing file: cygnss_data_month_09.csv
Successfully processed and saved: cygnss_data_month_09.csv
Processing file: cygnss_data_month_10.csv
Successfully processed and saved: cygnss_data_mon

In [9]:
import os
import pandas as pd

def merge_csv_files(directory, output_file):
    # Get a list of all CSV files in the directory
    csv_files = [os.path.join(directory, file) for file in os.listdir(directory) if file.endswith('.csv')]

    # List to hold individual DataFrames
    data_frames = []

    # Read each CSV and append to the list
    for file in csv_files:
        df = pd.read_csv(file)
        data_frames.append(df)

    # Concatenate all DataFrames
    merged_df = pd.concat(data_frames, ignore_index=True)

    # Save the merged DataFrame to a new CSV file
    merged_df.to_csv(output_file, index=False)

# Directory containing the CSV files
directory = r'G:\Public\CYGNSS_clip_csv\Merged_data\Random_forest_training_without_location'

# Output file path
output_file = r'G:\Public\CYGNSS_clip_csv\Merged_data\Random_forest_training_without_location\merged_water_fraction.csv'

# Merge the CSV files
merge_csv_files(directory, output_file)

print("CSV files merged successfully!")

CSV files merged successfully!


In [7]:
import pandas as pd

# Load the large CSV
file_path = r"G:\Public\CYGNSS_clip_csv\Merged_data\cygnss_data_month_11.csv"  # Replace with your file path
output_file1 = r"G:\Public\CYGNSS_clip_csv\Merged_data\cygnss_data_month_11_a.csv"
output_file2 = r"G:\Public\CYGNSS_clip_csv\Merged_data\cygnss_data_month_11_b.csv"

# Read the CSV
data = pd.read_csv(file_path)

# Find the midpoint
midpoint = len(data) // 2

# Split the data
data_part1 = data.iloc[:midpoint]
data_part2 = data.iloc[midpoint:]

# Save the two parts
data_part1.to_csv(output_file1, index=False)
data_part2.to_csv(output_file2, index=False)

print(f"CSV split into two files:\n- {output_file1}\n- {output_file2}")


CSV split into two files:
- G:\Public\CYGNSS_clip_csv\Merged_data\cygnss_data_month_11_a.csv
- G:\Public\CYGNSS_clip_csv\Merged_data\cygnss_data_month_11_b.csv


In [3]:
import os
import pandas as pd

def merge_csv_files(directory, output_file):
    # Get a list of all CSV files in the directory
    csv_files = [os.path.join(directory, file) for file in os.listdir(directory) if file.endswith('.csv')]

    # List to hold individual DataFrames
    data_frames = []

    # Read each CSV and append to the list
    for file in csv_files:
        try:
            df = pd.read_csv(file, error_bad_lines=False, warn_bad_lines=True, engine='python')
            data_frames.append(df)
        except Exception as e:
            print(f"Error reading {file}: {e}")
            continue

    # Concatenate all DataFrames
    merged_df = pd.concat(data_frames, ignore_index=True)

    # Save the merged DataFrame to a new CSV file
    merged_df.to_csv(output_file, index=False)

# Directory containing the CSV files
directory = r"G:\Public\CYGNSS_clip_csv\Merged_data\Random_forest_training_with_location"

# Output file path
output_file = r"G:\Public\CYGNSS_clip_csv\Merged_data\Random_forest_training_with_location\merged_water_fraction.csv"

# Merge the CSV files
merge_csv_files(directory, output_file)

print("CSV files merged successfully!")



  df = pd.read_csv(file, error_bad_lines=False, warn_bad_lines=True, engine='python')


  df = pd.read_csv(file, error_bad_lines=False, warn_bad_lines=True, engine='python')


  df = pd.read_csv(file, error_bad_lines=False, warn_bad_lines=True, engine='python')


  df = pd.read_csv(file, error_bad_lines=False, warn_bad_lines=True, engine='python')


  df = pd.read_csv(file, error_bad_lines=False, warn_bad_lines=True, engine='python')


  df = pd.read_csv(file, error_bad_lines=False, warn_bad_lines=True, engine='python')


  df = pd.read_csv(file, error_bad_lines=False, warn_bad_lines=True, engine='python')


  df = pd.read_csv(file, error_bad_lines=False, warn_bad_lines=True, engine='python')


  df = pd.read_csv(file, error_bad_lines=False, warn_bad_lines=True, engine='python')


  df = pd.read_csv(file, error_bad_lines=False, warn_bad_lines=True, engine='python')


  df = pd.read_csv(file, error_bad_lines=False, warn_bad_lines=True, engine='python')


  df = pd.read_csv(file, error

CSV files merged successfully!
