In [12]:
import pandas as pd, glob, re, os

# --- Path containing your .txt files ---
folder_path = r"C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1"

# --- Create output folder ---
output_folder = os.path.join(folder_path, "Potholes_clean_csvs")
os.makedirs(output_folder, exist_ok=True)

# --- Get list of all .txt files ---
txt_files = glob.glob(os.path.join(folder_path, "*.txt"))
print(f"Found {len(txt_files)} .txt files in {folder_path}")

for txt_file in txt_files:
    try:
        # --- Read file ---
        df = pd.read_csv(txt_file, skiprows=1, sep=',', engine='python')
        df = df[["Name", "X", "Y", "Z"]]

        # --- Extract type (P, C, E) and number ---
        def extract_type_and_number(name):
            m = re.search(r'([PCE])(\d+)', name)
            return (m.group(1), int(m.group(2))) if m else (None, -1)
        df[['Type', 'Number']] = df['Name'].apply(lambda x: pd.Series(extract_type_and_number(x)))

        # --- Extract base name (without _S / _F) for proper grouping ---
        df['Base'] = df['Name'].apply(lambda x: re.sub(r'_[SF]$', '', x))

        # --- Define sort keys ---
        df['SF'] = df['Name'].apply(lambda x: 0 if x.endswith('_S') else 1)  # S before F
        order_map = {'P': 1, 'C': 2, 'E': 3}
        df['TypeOrder'] = df['Type'].map(order_map).fillna(99)

        # --- Sort by site type, number, then ensure S comes before F ---
        df = df.sort_values(by=['TypeOrder', 'Number', 'Base', 'SF']).drop(columns=['TypeOrder', 'SF', 'Type', 'Number', 'Base'])

        # --- Define output file path ---
        base_name = os.path.basename(txt_file).replace(".txt", "_clean.csv")
        csv_file = os.path.join(output_folder, base_name)

        # --- Save cleaned CSV ---
        df.to_csv(csv_file, index=False, float_format="%.3f")
        print(f"‚úÖ Saved cleaned CSV ‚Üí {csv_file}")

    except Exception as e:
        print(f"‚ùå Error processing {txt_file}: {e}")

print(f"\nüéØ All cleaned CSVs saved in: {output_folder}")


Found 15 .txt files in C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1
‚úÖ Saved cleaned CSV ‚Üí C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1\Potholes_clean_csvs\Site1_0_clean.csv
‚úÖ Saved cleaned CSV ‚Üí C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1\Potholes_clean_csvs\Site1_1_clean.csv
‚úÖ Saved cleaned CSV ‚Üí C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1\Potholes_clean_csvs\Site1_10_clean.csv
‚úÖ Saved cleaned CSV ‚Üí C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1\Potholes_clean_csvs\Site1_11_clean.csv
‚úÖ Saved cleaned CSV ‚Üí C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1\Potholes_clean_csvs\Site1_12_clean.csv
‚úÖ Saved cleaned CSV ‚Üí C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1\Potholes_clean_csvs\Site1_13_clean.csv
‚úÖ Saved cleaned CSV ‚Üí C:/Users/umair.muhammad/Documen

In [14]:
import pandas as pd, glob, re, os

# --- Path containing your .txt files ---
folder_path = r"C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1"  # change as needed

# --- Create output folder inside the same directory ---
output_folder = os.path.join(folder_path, "clean_csvs")
os.makedirs(output_folder, exist_ok=True)

# --- Get list of all .txt files ---
txt_files = glob.glob(os.path.join(folder_path, "*.txt"))
print(f"Found {len(txt_files)} .txt files in {folder_path}")

for txt_file in txt_files:
    try:
        # --- Read file ---
        df = pd.read_csv(txt_file, skiprows=1, sep=',', engine='python')
        df = df[["Name", "X", "Y", "Z"]]

        # --- Extract type (P, C, E) and number ---
        def extract_type_and_number(name):
            m = re.search(r'([PCE])_(\d+)', str(name))
            return (m.group(1), int(m.group(2))) if m else (None, -1)

        df[['Type', 'Number']] = df['Name'].apply(lambda x: pd.Series(extract_type_and_number(x)))

        # --- Identify S/F flag ---
        def sf_flag(name):
            if '_S' in name:
                return 0  # S first
            elif '_F' in name:
                return 1  # F second
            else:
                return 2  # others (if any)
        df['SF'] = df['Name'].apply(sf_flag)

        # --- Create a "Base" key to ensure S and F stay together ---
        # Example: Site1_0_P1_S ‚Üí Site1_0_P1
        df['Base'] = df['Name'].apply(lambda x: re.sub(r'(_[SF])$', '', str(x)))

        # --- Define type ordering ---
        order_map = {'P': 1, 'C': 2, 'E': 3}
        df['TypeOrder'] = df['Type'].map(order_map).fillna(99)

        # --- Sort so that:
        #     1. Type: P ‚Üí C ‚Üí E
        #     2. Number ascending
        #     3. Within each pair, S before F
        df = df.sort_values(by=['TypeOrder', 'Number', 'Base', 'SF']).drop(columns=['TypeOrder', 'SF', 'Type', 'Number', 'Base'])

        # --- Define output file path inside new folder ---
        base_name = os.path.basename(txt_file).replace(".txt", "_clean.csv")
        csv_file = os.path.join(output_folder, base_name)

        # --- Save cleaned CSV ---
        df.to_csv(csv_file, index=False, float_format="%.3f")
        print(f"‚úÖ Saved cleaned CSV ‚Üí {csv_file}")

    except Exception as e:
        print(f"‚ùå Error processing {txt_file}: {e}")

print(f"\nüéØ All cleaned CSVs saved in: {output_folder}")


Found 15 .txt files in C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1
‚úÖ Saved cleaned CSV ‚Üí C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1\clean_csvs\Site1_0_clean.csv
‚úÖ Saved cleaned CSV ‚Üí C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1\clean_csvs\Site1_1_clean.csv
‚úÖ Saved cleaned CSV ‚Üí C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1\clean_csvs\Site1_10_clean.csv
‚úÖ Saved cleaned CSV ‚Üí C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1\clean_csvs\Site1_11_clean.csv
‚úÖ Saved cleaned CSV ‚Üí C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1\clean_csvs\Site1_12_clean.csv
‚úÖ Saved cleaned CSV ‚Üí C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1\clean_csvs\Site1_13_clean.csv
‚úÖ Saved cleaned CSV ‚Üí C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/site1\clean_csv

In [4]:
import os
import pandas as pd
import re

# -------------------------------------
# Utility: normalize the first token
# -------------------------------------
def normalize_code(code):
    """
    Takes first token: 'Site8_0_Fl_S_4'
    Applies corrections and returns dict fields.
    """
    code = code.strip()

    # Force normalized Fl form
    code = re.sub(r'(Flushed|FLUSHED|flushed|FL|fl)', 'Fl', code)

    parts = re.split(r'[_-]', code)

    # Expected formats like:
    # Site8, 0, Fl, S, 4
    # Site8, 0, P, F, 2
    # SiteV2, 58, C, S, 11
    # etc.

    if len(parts) < 4:
        return None

    # Identify site
    site = parts[0]
    sub = parts[1]

    # Identify labels
    # type = P, C, E, Fl
    # sf (START/FINISH) = S or F
    # num = integer
    types = ["P", "C", "E", "Fl"]

    type_token = None
    sf_token = None
    num_token = None

    for p in parts[2:]:
        if p in types:
            type_token = p
        elif p in ["S", "F"]:
            sf_token = p
        elif p.isdigit():
            num_token = int(p)

    # Require S/F
    if sf_token is None:
        return None

    if type_token is None or num_token is None:
        return None

    return {
        "site": site,
        "sub": sub,
        "type": type_token,
        "num": num_token,
        "SF": sf_token,
        "raw": code
    }


# -------------------------------------------------------
# Build final unified string as per your naming standard
# -------------------------------------------------------
def build_name(row):
    # Example: Site8_0_Fl4_S
    return f"{row['site']}_{row['sub']}_{row['type']}{row['num']}_{row['SF']}"


# -------------------------------------------------------
# PROCESS ONE TXT ‚Üí one CSV with SAME NAME
# -------------------------------------------------------
def process_txt_file(txt_path, out_dir):
    filename = os.path.basename(txt_path)

    cleaned_rows = []

    with open(txt_path, "r") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue

            # Split CSV-like row
            first_token = line.split(",")[0]

            parsed = normalize_code(first_token)
            if parsed is not None:
                cleaned_rows.append(parsed)

    if not cleaned_rows:
        print(f"[WARNING] All lines rejected in {filename}")
        return

    df = pd.DataFrame(cleaned_rows)

    # Sorting rule: P < C < E < Fl
    sort_order = {"P": 1, "C": 2, "E": 3, "Fl": 4}

    df["type_order"] = df["type"].map(sort_order)

    df = df.sort_values(by=["site", "sub", "type_order", "num", "SF"])

    # Build final name
    df["final_name"] = df.apply(build_name, axis=1)

    # Save with SAME filename but CSV extension
    out_csv = os.path.join(out_dir, filename.replace(".txt", ".csv"))
    df.to_csv(out_csv, index=False)

    print(f"[OK] Saved {out_csv}, rows={len(df)}")


# ------------------------------
# MAIN EXECUTION
# ------------------------------
input_folder = "C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/All_Nome/txt_lables"
output_folder = "C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/All_Nome/txt_lables/cleancsvs"
os.makedirs(output_folder, exist_ok=True)

for file in os.listdir(input_folder):
    if file.lower().endswith(".txt"):
        process_txt_file(os.path.join(input_folder, file), output_folder)

print("\nDONE ‚Äî All CSVs saved in cleancsvs/")


[OK] Saved C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/All_Nome/txt_lables/cleancsvs\Site1_14.csv, rows=30
[OK] Saved C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/All_Nome/txt_lables/cleancsvs\Site1_2.csv, rows=22
[OK] Saved C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/All_Nome/txt_lables/cleancsvs\Site1_3.csv, rows=36
[OK] Saved C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/All_Nome/txt_lables/cleancsvs\Site1_4.csv, rows=24
[OK] Saved C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/All_Nome/txt_lables/cleancsvs\Site1_5.csv, rows=20
[OK] Saved C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/All_Nome/txt_lables/cleancsvs\Site1_6.csv, rows=6
[OK] Saved C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/training/All_Nome/txt_lables/cleancsvs\Site1_7.csv, rows=36
[OK] Saved C:/Users/umair.muhammad/Documents/PhD/Research Work/FedLearn/trai

In [8]:
import os
import csv
import re

input_folder = r"C:\Users\umair.muhammad\Documents\PhD\Research Work\FedLearn\training\All_Nome\txt_lables"
output_folder = os.path.join(input_folder, "cleancsvs")

os.makedirs(output_folder, exist_ok=True)

LABELS = ["P", "C", "E", "FL"]


def find_label(name):
    """Find P, C, E, Fl ANYWHERE in the name."""
    n = name.upper()

    # Priority: Fl first (two letters)
    if "FL" in n:
        return "Fl"

    # Single-letter labels
    for L in ["P", "C", "E"]:
        if L in n:
            return L

    return None


def has_final_SF(name):
    """
    Check if the name ends with S or F ANYWHERE before numbers.
    We look for last alphabetic character.
    """
    letters = re.findall("[A-Za-z]", name)
    if not letters:
        return False

    last = letters[-1].upper()
    return last in ["S", "F"]


def clean_name(name):
    """Normalize double underscores etc."""
    name = name.replace("__", "_")
    name = re.sub(r"_+", "_", name)
    return name.strip()


for filename in os.listdir(input_folder):
    if not filename.lower().endswith(".txt"):
        continue

    input_path = os.path.join(input_folder, filename)
    output_path = os.path.join(output_folder, filename.replace(".txt", ".csv"))

    saved = 0
    rejected = 0
    rows = []

    with open(input_path, "r", errors="ignore") as f:
        for line in f:
            parts = [p.strip() for p in line.split(",")]

            if len(parts) < 4:
                rejected += 1
                continue

            name = clean_name(parts[0])

            # Extract coordinates
            try:
                X = float(parts[1])
                Y = float(parts[2])
                Z = float(parts[3])
            except:
                rejected += 1
                continue

            # Find label anywhere in name
            label = find_label(name)
            if label is None:
                rejected += 1
                continue

            # Check F / S anywhere at end
            if not has_final_SF(name):
                rejected += 1
                continue

            rows.append([name, X, Y, Z, label])
            saved += 1

    if rows:
        with open(output_path, "w", newline="") as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(["Damage_ID", "X", "Y", "Z", "Label"])
            writer.writerows(rows)
        print(f"[OK] {filename} -> {output_path}  (total={saved+rejected}, saved={saved}, rejected={rejected})")
    else:
        print(f"[WARNING] All lines rejected in {filename} (total={saved+rejected})")


[OK] Site1_0.txt -> C:\Users\umair.muhammad\Documents\PhD\Research Work\FedLearn\training\All_Nome\txt_lables\cleancsvs\Site1_0.csv  (total=4, saved=2, rejected=2)
[OK] Site1_1.txt -> C:\Users\umair.muhammad\Documents\PhD\Research Work\FedLearn\training\All_Nome\txt_lables\cleancsvs\Site1_1.csv  (total=42, saved=40, rejected=2)
[OK] Site1_10.txt -> C:\Users\umair.muhammad\Documents\PhD\Research Work\FedLearn\training\All_Nome\txt_lables\cleancsvs\Site1_10.csv  (total=88, saved=76, rejected=12)
[OK] Site1_11.txt -> C:\Users\umair.muhammad\Documents\PhD\Research Work\FedLearn\training\All_Nome\txt_lables\cleancsvs\Site1_11.csv  (total=64, saved=62, rejected=2)
[OK] Site1_12.txt -> C:\Users\umair.muhammad\Documents\PhD\Research Work\FedLearn\training\All_Nome\txt_lables\cleancsvs\Site1_12.csv  (total=72, saved=70, rejected=2)
[OK] Site1_13.txt -> C:\Users\umair.muhammad\Documents\PhD\Research Work\FedLearn\training\All_Nome\txt_lables\cleancsvs\Site1_13.csv  (total=20, saved=18, rejected=