In [2]:
import os
import re
import pandas as pd

# === USER SETTINGS ===
folder_path = r"Z:\UriMons\Alex to Uri\Bins_output"  # <- change this to your folder with the Excel files
output_path = os.path.join(folder_path, "Uri_Alex_grooming_comparison.xlsx")


def parse_filename(filename):
    """
    Expected format: M123_Uri.xlsx or M45_Alex.xlsx
    Returns (mouse_id, annotator) e.g. ('M123', 'Uri')
    """
    base = os.path.basename(filename)
    name, ext = os.path.splitext(base)
    parts = name.split("_")
    if len(parts) < 2:
        return None, None
    mouse_id = parts[0]       # e.g. "M123"
    annotator = parts[1]      # e.g. "Uri" or "Alex"
    return mouse_id, annotator


# 1. Collect files per mouse and annotator
mouse_files = {}  # {mouse_id: {"Uri": path, "Alex": path}}

for fname in os.listdir(folder_path):
    if not fname.lower().endswith((".xls", ".xlsx")):
        continue
    if not fname.startswith("M"):
        continue

    full_path = os.path.join(folder_path, fname)
    mouse_id, annotator = parse_filename(fname)
    if mouse_id is None or annotator is None:
        continue

    if mouse_id not in mouse_files:
        mouse_files[mouse_id] = {}
    mouse_files[mouse_id][annotator] = full_path

# 2. Process only mice that have BOTH Uri and Alex files
summary_rows = []

with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
    for mouse_id, annotators in mouse_files.items():
        if "Uri" not in annotators or "Alex" not in annotators:
            # Skip mice that don't have both annotators
            continue

        uri_file = annotators["Uri"]
        alex_file = annotators["Alex"]

        # Read Excel files
        df_uri = pd.read_excel(uri_file)
        df_alex = pd.read_excel(alex_file)

        # Ensure columns are present
        if "frames" not in df_uri.columns or "grooming" not in df_uri.columns:
            print(f"Warning: {uri_file} is missing 'frames' or 'grooming' column.")
            continue
        if "frames" not in df_alex.columns or "grooming" not in df_alex.columns:
            print(f"Warning: {alex_file} is missing 'frames' or 'grooming' column.")
            continue

        # Rename grooming columns to distinguish
        df_uri = df_uri[["frames", "grooming"]].rename(columns={"grooming": "grooming_Uri"})
        df_alex = df_alex[["frames", "grooming"]].rename(columns={"grooming": "grooming_Alex"})

        # Merge on frames
        df_merged = pd.merge(df_uri, df_alex, on="frames", how="inner").sort_values("frames")

        # Make sure grooming is integer (0/1)
        df_merged["grooming_Uri"] = df_merged["grooming_Uri"].astype(int)
        df_merged["grooming_Alex"] = df_merged["grooming_Alex"].astype(int)

        # 3. Create comparison columns
        df_merged["Uri+Alex"] = df_merged["grooming_Uri"] + df_merged["grooming_Alex"]
        df_merged["Uri-Alex"] = df_merged["grooming_Uri"] - df_merged["grooming_Alex"]

        # 4. Compute performance metrics
        hits = (df_merged["Uri+Alex"] == 2).sum()              # both 1
        correct_rejection = (df_merged["Uri+Alex"] == 0).sum() # both 0
        miss = (df_merged["Uri-Alex"] == 1).sum()              # Uri 1, Alex 0
        false_alarm = (df_merged["Uri-Alex"] == -1).sum()      # Uri 0, Alex 1

        # Add row for summary
        summary_rows.append({
            "Mouse_ID": mouse_id,
            "Hits": hits,
            "Miss": miss,
            "Correct_Rejection": correct_rejection,
            "False_Alarm": false_alarm,
            "Total_Frames_Compared": len(df_merged)
        })

        # Optionally, you can also add these counts at the bottom of each sheet if you like:
        # stats_df = pd.DataFrame({
        #     "Metric": ["Hits", "Miss", "Correct_Rejection", "False_Alarm"],
        #     "Count": [hits, miss, correct_rejection, false_alarm]
        # })
        #
        # But here we'll just write the frame-wise comparison.

        # Write this mouse's comparison to a sheet
        sheet_name = mouse_id[:31]  # Excel sheet name max length is 31 chars
        df_merged.to_excel(writer, sheet_name=sheet_name, index=False)

    # 5. Create summary sheet
    if summary_rows:
        summary_df = pd.DataFrame(summary_rows)

        # Add an overall total row
        total_row = {
            "Mouse_ID": "TOTAL",
            "Hits": summary_df["Hits"].sum(),
            "Miss": summary_df["Miss"].sum(),
            "Correct_Rejection": summary_df["Correct_Rejection"].sum(),
            "False_Alarm": summary_df["False_Alarm"].sum(),
            "Total_Frames_Compared": summary_df["Total_Frames_Compared"].sum()
        }
        summary_df = pd.concat([summary_df, pd.DataFrame([total_row])], ignore_index=True)

        summary_df.to_excel(writer, sheet_name="Summary", index=False)

print("Done! Comparison file saved to:")
print(output_path)


Done! Comparison file saved to:
Z:\UriMons\Alex to Uri\Bins_output\Uri_Alex_grooming_comparison.xlsx
