In [12]:
import pandas as pd, glob, re, os

# --- Path containing your .txt files ---
folder_path = r"C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1"

# --- Create output folder ---
output_folder = os.path.join(folder_path, "Potholes_clean_csvs")
os.makedirs(output_folder, exist_ok=True)

# --- Get list of all .txt files ---
txt_files = glob.glob(os.path.join(folder_path, "*.txt"))
print(f"Found {len(txt_files)} .txt files in {folder_path}")

for txt_file in txt_files:
    try:
        # --- Read file ---
        df = pd.read_csv(txt_file, skiprows=1, sep=',', engine='python')
        df = df[["Name", "X", "Y", "Z"]]

        # --- Extract type (P, C, E) and number ---
        def extract_type_and_number(name):
            m = re.search(r'([PCE])(\d+)', name)
            return (m.group(1), int(m.group(2))) if m else (None, -1)
        df[['Type', 'Number']] = df['Name'].apply(lambda x: pd.Series(extract_type_and_number(x)))

        # --- Extract base name (without _S / _F) for proper grouping ---
        df['Base'] = df['Name'].apply(lambda x: re.sub(r'_[SF]$', '', x))

        # --- Define sort keys ---
        df['SF'] = df['Name'].apply(lambda x: 0 if x.endswith('_S') else 1)  # S before F
        order_map = {'P': 1, 'C': 2, 'E': 3}
        df['TypeOrder'] = df['Type'].map(order_map).fillna(99)

        # --- Sort by site type, number, then ensure S comes before F ---
        df = df.sort_values(by=['TypeOrder', 'Number', 'Base', 'SF']).drop(columns=['TypeOrder', 'SF', 'Type', 'Number', 'Base'])

        # --- Define output file path ---
        base_name = os.path.basename(txt_file).replace(".txt", "_clean.csv")
        csv_file = os.path.join(output_folder, base_name)

        # --- Save cleaned CSV ---
        df.to_csv(csv_file, index=False, float_format="%.3f")
        print(f"✅ Saved cleaned CSV → {csv_file}")

    except Exception as e:
        print(f"❌ Error processing {txt_file}: {e}")

print(f"\n🎯 All cleaned CSVs saved in: {output_folder}")


Found 15 .txt files in C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1
✅ Saved cleaned CSV → C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1\Potholes_clean_csvs\Site1_0_clean.csv
✅ Saved cleaned CSV → C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1\Potholes_clean_csvs\Site1_1_clean.csv
✅ Saved cleaned CSV → C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1\Potholes_clean_csvs\Site1_10_clean.csv
✅ Saved cleaned CSV → C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1\Potholes_clean_csvs\Site1_11_clean.csv
✅ Saved cleaned CSV → C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1\Potholes_clean_csvs\Site1_12_clean.csv
✅ Saved cleaned CSV → C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1\Potholes_clean_csvs\Site1_13_clean.csv
✅ Saved cleaned CSV → C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLear

In [14]:
import pandas as pd, glob, re, os

# --- Path containing your .txt files ---
folder_path = r"C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1"  # change as needed

# --- Create output folder inside the same directory ---
output_folder = os.path.join(folder_path, "clean_csvs")
os.makedirs(output_folder, exist_ok=True)

# --- Get list of all .txt files ---
txt_files = glob.glob(os.path.join(folder_path, "*.txt"))
print(f"Found {len(txt_files)} .txt files in {folder_path}")

for txt_file in txt_files:
    try:
        # --- Read file ---
        df = pd.read_csv(txt_file, skiprows=1, sep=',', engine='python')
        df = df[["Name", "X", "Y", "Z"]]

        # --- Extract type (P, C, E) and number ---
        def extract_type_and_number(name):
            m = re.search(r'([PCE])_(\d+)', str(name))
            return (m.group(1), int(m.group(2))) if m else (None, -1)

        df[['Type', 'Number']] = df['Name'].apply(lambda x: pd.Series(extract_type_and_number(x)))

        # --- Identify S/F flag ---
        def sf_flag(name):
            if '_S' in name:
                return 0  # S first
            elif '_F' in name:
                return 1  # F second
            else:
                return 2  # others (if any)
        df['SF'] = df['Name'].apply(sf_flag)

        # --- Create a "Base" key to ensure S and F stay together ---
        # Example: Site1_0_P1_S → Site1_0_P1
        df['Base'] = df['Name'].apply(lambda x: re.sub(r'(_[SF])$', '', str(x)))

        # --- Define type ordering ---
        order_map = {'P': 1, 'C': 2, 'E': 3}
        df['TypeOrder'] = df['Type'].map(order_map).fillna(99)

        # --- Sort so that:
        #     1. Type: P → C → E
        #     2. Number ascending
        #     3. Within each pair, S before F
        df = df.sort_values(by=['TypeOrder', 'Number', 'Base', 'SF']).drop(columns=['TypeOrder', 'SF', 'Type', 'Number', 'Base'])

        # --- Define output file path inside new folder ---
        base_name = os.path.basename(txt_file).replace(".txt", "_clean.csv")
        csv_file = os.path.join(output_folder, base_name)

        # --- Save cleaned CSV ---
        df.to_csv(csv_file, index=False, float_format="%.3f")
        print(f"✅ Saved cleaned CSV → {csv_file}")

    except Exception as e:
        print(f"❌ Error processing {txt_file}: {e}")

print(f"\n🎯 All cleaned CSVs saved in: {output_folder}")


Found 15 .txt files in C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1
✅ Saved cleaned CSV → C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1\clean_csvs\Site1_0_clean.csv
✅ Saved cleaned CSV → C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1\clean_csvs\Site1_1_clean.csv
✅ Saved cleaned CSV → C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1\clean_csvs\Site1_10_clean.csv
✅ Saved cleaned CSV → C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1\clean_csvs\Site1_11_clean.csv
✅ Saved cleaned CSV → C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1\clean_csvs\Site1_12_clean.csv
✅ Saved cleaned CSV → C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1\clean_csvs\Site1_13_clean.csv
✅ Saved cleaned CSV → C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1\clean_csvs\Site1_14_clean.csv
✅ Saved