In [8]:
import pandas as pd
import os
import re

# ---------------------------------------------------
# CONFIG
# ---------------------------------------------------
input_folder = r"C:\Users\umair.muhammad\Documents\PhD\Research Work\FedLearn\training\All_Nome\txt_lables\cleancsvs"          # folder with raw CSVs
output_folder = r"C:\Users\umair.muhammad\Documents\PhD\Research Work\FedLearn\training\All_Nome\txt_lables\cleancsvs\improved_csvs"      # folder for per-file bounding boxes
combined_output = r"C:\Users\umair.muhammad\Documents\PhD\Research Work\FedLearn\training\All_Nome\txt_lables\cleancsvs\improved_csvs\combined_output.csv"  # one combined file



os.makedirs(output_folder, exist_ok=True)

def get_base_id(dmg_id):
    """Remove trailing _S or _F to get base Damage_ID"""
    return re.sub(r'_(S|F)$', '', dmg_id)

def get_damage_type(dmg_id):
    """Detect Fl first, then P, C, E anywhere in ID"""
    for t in ["Fl", "P", "C", "E"]:
        if t in dmg_id:
            return t
    return "Unknown"

all_rows = []

csv_files = [f for f in os.listdir(input_folder) if f.lower().endswith(".csv")]

for fname in csv_files:
    in_path = os.path.join(input_folder, fname)
    out_path = os.path.join(output_folder, fname)

    df = pd.read_csv(in_path)
    df.columns = ["Damage_ID", "X", "Y", "Z", "Label"]

    df["base_id"] = df["Damage_ID"].apply(get_base_id)
    df["damage_type"] = df["Damage_ID"].apply(get_damage_type)

    rows = []

    for base_id, g in df.groupby("base_id"):
        dmg_type = g["damage_type"].iloc[0]

        xmin, xmax = g["X"].min(), g["X"].max()
        ymin, ymax = g["Y"].min(), g["Y"].max()
        zmin, zmax = g["Z"].min(), g["Z"].max()

        row = [base_id, dmg_type, xmin, xmax, ymin, ymax, zmin, zmax]
        rows.append(row)
        all_rows.append([fname] + row)

    out_df = pd.DataFrame(
        rows,
        columns=["Damage_ID", "Damage_Type", "Xmin", "Xmax", "Ymin", "Ymax", "Zmin", "Zmax"]
    )
    out_df.to_csv(out_path, index=False)

combined_df = pd.DataFrame(
    all_rows,
    columns=["Source_File", "Damage_ID", "Damage_Type",
             "Xmin", "Xmax", "Ymin", "Ymax", "Zmin", "Zmax"]
)
combined_df.to_csv(combined_output, index=False)

print("✔ Done. Individual and combined CSVs generated.")


✔ Done. Individual and combined CSVs generated.


In [15]:
import pandas as pd
import os
import re

# ---------------------------------------------------
# CONFIG
# ---------------------------------------------------
input_folder = r"C:\Users\umair.muhammad\Documents\PhD\Research Work\FedLearn\training\All_Nome\txt_lables\cleancsvs"          
output_folder = r"C:\Users\umair.muhammad\Documents\PhD\Research Work\FedLearn\training\All_Nome\txt_lables\cleancsvs\improved_csvs"      
combined_output = os.path.join(output_folder, "combined_output.csv")  # one combined file
summary_output = os.path.join(output_folder, "summary_per_site.csv")  # summary table

os.makedirs(output_folder, exist_ok=True)

# ---------------------------------------------------
# HELPER FUNCTIONS
# ---------------------------------------------------
def get_base_id(dmg_id):
    """Remove trailing _S or _F to get base Damage_ID"""
    return re.sub(r'_(S|F)$', '', dmg_id)

def get_damage_type(dmg_id):
    """Detect Fl first, then P, C, E anywhere in ID"""
    for t in ["Fl", "P", "C", "E"]:
        if t in dmg_id:
            return t
    return "Unknown"

# ---------------------------------------------------
# PROCESS FILES
# ---------------------------------------------------
all_rows = []
summary_list = []

csv_files = [f for f in os.listdir(input_folder) if f.lower().endswith(".csv")]

for fname in csv_files:
    in_path = os.path.join(input_folder, fname)
    out_path = os.path.join(output_folder, fname)

    df = pd.read_csv(in_path)
    df.columns = ["Damage_ID", "X", "Y", "Z", "Label"]

    df["base_id"] = df["Damage_ID"].apply(get_base_id)
    df["damage_type"] = df["Damage_ID"].apply(get_damage_type)

    rows = []

    # Count per damage type for this file
    type_counts = {"P": 0, "C": 0, "E": 0, "Fl": 0}

    for base_id, g in df.groupby("base_id"):
        dmg_type = g["damage_type"].iloc[0]

        xmin, xmax = g["X"].min(), g["X"].max()
        ymin, ymax = g["Y"].min(), g["Y"].max()
        zmin, zmax = g["Z"].min(), g["Z"].max()

        row = [base_id, dmg_type, xmin, xmax, ymin, ymax, zmin, zmax]
        rows.append(row)
        all_rows.append([fname] + row)

        # Update counts
        if dmg_type in type_counts:
            type_counts[dmg_type] += 1

    total = sum(type_counts.values())
    summary_list.append({
        "Site": fname.replace(".csv", ""),
        "P_BBoxes": type_counts["P"],
        "C_BBoxes": type_counts["C"],
        "E_BBoxes": type_counts["E"],
        "Fl_BBoxes": type_counts["Fl"],
        "Total": total
    })

    # Save per-file bounding boxes
    out_df = pd.DataFrame(
        rows,
        columns=["Damage_ID", "Damage_Type", "Xmin", "Xmax", "Ymin", "Ymax", "Zmin", "Zmax"]
    )
    out_df.to_csv(out_path, index=False)

# ---------------------------------------------------
# COMBINED CSV
# ---------------------------------------------------
combined_df = pd.DataFrame(
    all_rows,
    columns=["Source_File", "Damage_ID", "Damage_Type",
             "Xmin", "Xmax", "Ymin", "Ymax", "Zmin", "Zmax"]
)
combined_df.to_csv(combined_output, index=False)

# ---------------------------------------------------
# SUMMARY TABLE
# ---------------------------------------------------
summary_df = pd.DataFrame(summary_list)

# Add total row
totals = summary_df[["P_BBoxes","C_BBoxes","E_BBoxes","Fl_BBoxes","Total"]].sum()
totals_row = pd.DataFrame([["Total", totals["P_BBoxes"], totals["C_BBoxes"], totals["E_BBoxes"], totals["Fl_BBoxes"], totals["Total"]]],
                          columns=summary_df.columns)
summary_df = pd.concat([summary_df, totals_row], ignore_index=True)

summary_df.to_csv(summary_output, index=False)

print(f"✔ Done. Individual CSVs saved in: {output_folder}")
print(f"✔ Combined CSV saved as: {combined_output}")
print(f"✔ Summary CSV saved as: {summary_output}")


✔ Done. Individual CSVs saved in: C:\Users\umair.muhammad\Documents\PhD\Research Work\FedLearn\training\All_Nome\txt_lables\cleancsvs\improved_csvs
✔ Combined CSV saved as: C:\Users\umair.muhammad\Documents\PhD\Research Work\FedLearn\training\All_Nome\txt_lables\cleancsvs\improved_csvs\combined_output.csv
✔ Summary CSV saved as: C:\Users\umair.muhammad\Documents\PhD\Research Work\FedLearn\training\All_Nome\txt_lables\cleancsvs\improved_csvs\summary_per_site.csv
