In [7]:
import pandas as pd
import re
from pathlib import Path

In [8]:
# ---- Paths ----
in_path = Path(r"Z:\UriMons\Alex to Uri\ELIZA\ELIZA_results.xlsx")
out_path = in_path.with_name("ELIZA_results_grouped.xlsx")

# ---- Load ----
df = pd.read_excel(in_path, sheet_name=0)

In [9]:

def normalize_cols(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df.columns = [re.sub(r"\s+", " ", str(c)).strip() for c in df.columns]
    return df

def find_cort_col(cols) -> str:
    if "CORT levels" in cols:
        return "CORT levels"
    if "CORT level" in cols:
        return "CORT level"
    matches = [c for c in cols if "cort" in c.lower()]
    if not matches:
        raise KeyError(f"Could not find CORT column. Columns are: {list(cols)}")
    return matches[0]

def order_baseline_then_stress(df: pd.DataFrame) -> pd.DataFrame:
    # Baseline first, Stress after, and within each sort by Mouse number
    cond_order = pd.Categorical(df["Condition"], categories=["Baseline", "Stress"], ordered=True)
    out = df.copy()
    out["__cond_order"] = cond_order
    out = out.sort_values(["__cond_order", "Mouse number"]).drop(columns="__cond_order")
    return out

def mouse_level_one_row(df: pd.DataFrame, cort_col: str, keep_raw_cort: bool) -> pd.DataFrame:
    """
    Returns one row per Mouse number per Condition.
    If keep_raw_cort=False, drops the raw CORT column (and Sample if present).
    """
    out = (
        df.dropna(subset=["Mouse number", "Condition"])
          .drop_duplicates(subset=["Mouse number", "Condition"])
          .copy()
    )

    # Drop repeat-specific columns
    drop_cols = []
    if not keep_raw_cort and cort_col in out.columns:
        drop_cols.append(cort_col)
    if "Sample" in out.columns:
        drop_cols.append("Sample")

    if drop_cols:
        out = out.drop(columns=[c for c in drop_cols if c in out.columns])

    return out

def process_sheet_to_file(sheet_index: int, out_path: Path):
    # ---- Load ----
    df = pd.read_excel(in_path, sheet_name=sheet_index)
    df = normalize_cols(df)

    # ---- Identify CORT column ----
    cort_col = find_cort_col(df.columns)

    # ---- Clean / fix merged-cell columns (Excel blanks) ----
    # Strip string columns, then forward-fill genotype/sex because Excel merged cells read as NaN
    for col in ["genotype", "Condition", "Sex"]:
        if col in df.columns:
            df[col] = df[col].astype(str).str.strip()
            df.loc[df[col].isin(["", "nan", "None"]), col] = pd.NA

    for col in ["genotype", "Sex"]:
        if col in df.columns:
            df[col] = df[col].ffill()

    # ---- Numeric CORT ----
    df[cort_col] = pd.to_numeric(df[cort_col], errors="coerce")

    # ---- Avg + SD per mouse per condition ----
    df["Avg"] = df.groupby(["Mouse number", "Condition"])[cort_col].transform("mean")
    df["SD"]  = df.groupby(["Mouse number", "Condition"])[cort_col].transform("std")

    # ---- Build the “collapsed” versions (one row per mouse per condition) ----
    baseline_raw = df[df["Condition"] == "Baseline"].copy()
    stress_raw   = df[df["Condition"] == "Stress"].copy()

    # Baseline/Stress sheets: one row per mouse, drop raw CORT + Sample
    baseline_sheet = mouse_level_one_row(baseline_raw, cort_col=cort_col, keep_raw_cort=False)
    stress_sheet   = mouse_level_one_row(stress_raw,   cort_col=cort_col, keep_raw_cort=False)

    # WT/KO/F/M sheets: include BOTH conditions, one row per mouse per condition, baseline then stress
    def group_sheet(mask):
        sub = df[mask].copy()
        sub = mouse_level_one_row(sub, cort_col=cort_col, keep_raw_cort=False)
        sub = order_baseline_then_stress(sub)
        return sub

    wt_sheet = group_sheet(df["genotype"] == "WT")
    ko_sheet = group_sheet(df["genotype"] == "KO")
    f_sheet  = group_sheet(df["Sex"] == "F")
    m_sheet  = group_sheet(df["Sex"] == "M")

    # ---- Summary based on mouse-level rows (no repeats) ----
    grouped_for_summary = {
        "WT": wt_sheet, "KO": ko_sheet, "F": f_sheet, "M": m_sheet,
        "Baseline": baseline_sheet, "Stress": stress_sheet
    }

    summary_rows = []
    for name, sub in grouped_for_summary.items():
        summary_rows.append({
            "Sheet": name,
            "Overall Avg": sub["Avg"].mean(),
            "Baseline Avg": sub.loc[sub["Condition"] == "Baseline", "Avg"].mean() if "Condition" in sub.columns else pd.NA,
            "Stress Avg": sub.loc[sub["Condition"] == "Stress", "Avg"].mean() if "Condition" in sub.columns else pd.NA,
            "N mice (Baseline)": sub.loc[sub["Condition"] == "Baseline", "Mouse number"].nunique() if "Condition" in sub.columns else pd.NA,
            "N mice (Stress)": sub.loc[sub["Condition"] == "Stress", "Mouse number"].nunique() if "Condition" in sub.columns else pd.NA,
        })
    summary_df = pd.DataFrame(summary_rows)

    # ---- Write output ----
    with pd.ExcelWriter(out_path, engine="openpyxl") as writer:
        # Keep a full sheet too (handy for debugging)
        df.to_excel(writer, sheet_name="Raw_with_Avg_SD", index=False)

        # Required sheets (fixed formatting)
        wt_sheet.to_excel(writer, sheet_name="WT", index=False)
        ko_sheet.to_excel(writer, sheet_name="KO", index=False)
        baseline_sheet.to_excel(writer, sheet_name="Baseline", index=False)
        stress_sheet.to_excel(writer, sheet_name="Stress", index=False)
        f_sheet.to_excel(writer, sheet_name="F", index=False)
        m_sheet.to_excel(writer, sheet_name="M", index=False)

        summary_df.to_excel(writer, sheet_name="Summary", index=False)

    print(f"✅ Created: {out_path}  (from sheet index {sheet_index})")

# ---- Run for BOTH input sheets ----
out1 = in_path.with_name("ELIZA_results_grouped.xlsx")
out2 = in_path.with_name("ELIZA_results_without_outliers_grouped.xlsx")

process_sheet_to_file(sheet_index=0, out_path=out1)  # first sheet (all results)
process_sheet_to_file(sheet_index=1, out_path=out2)  # second sheet (without outliers)


✅ Created: Z:\UriMons\Alex to Uri\ELIZA\ELIZA_results_grouped.xlsx  (from sheet index 0)
✅ Created: Z:\UriMons\Alex to Uri\ELIZA\ELIZA_results_without_outliers_grouped.xlsx  (from sheet index 1)
