<a href="https://colab.research.google.com/github/ykitaguchi77/statistics_for_articles/blob/main/Lacrimal_gland_MRD-1_study.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**Lacrimal gland MRD-1 study**

In [1]:
# prompt: gdriveをマウント

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
# ────────────────────────────────────────────────────────────────
# Google Colab  FULL SCRIPT  —  Lacrimal-tumor (unilateral) analysis
# ────────────────────────────────────────────────────────────────
#
# • Excel :  /content/drive/Shareddrives/岩崎Dr_IgG4 deulk/涙腺腫瘍/涙腺腫瘍unilateral.xlsx
# • Outputs: /content 直下に画像を保存
# • pandas ≥ 2.0 対応（append 未使用）
# ────────────────────────────────────────────────────────────────

# ❶  Google Drive をマウント
from google.colab import drive
drive.mount('/content/drive')

# ❷  ライブラリ
import pandas as pd, numpy as np, scipy.stats as stats
import statsmodels.api as sm, matplotlib.pyplot as plt
from pathlib import Path

# ❸  パス
XLSX  = "/content/drive/Shareddrives/岩崎Dr_IgG4 deulk/涙腺腫瘍/涙腺腫瘍unilateral.xlsx"
SHEET = "涙腺腫瘍 unilateral"
OUT   = Path("/content"); OUT.mkdir(exist_ok=True)

# ❹  データ読込
df = pd.read_excel(XLSX, sheet_name=SHEET)
df.columns = df.columns.str.strip()                      # 列名トリム
GCOL = "前から触れるか(触れる1/触れない0)"
df["Proptosis_delta"] = df["眼球突出 pre"] - df["眼球突出 pre (none)"]

# ─────────────────────────────────────────────
# ❺  ヘルパ関数
# ─────────────────────────────────────────────
def p_cont(x0, x1):
    sw0 = stats.shapiro(x0).pvalue if len(x0) >= 3 else 1
    sw1 = stats.shapiro(x1).pvalue if len(x1) >= 3 else 1
    if (sw0 >= .05) and (sw1 >= .05):
        return stats.ttest_ind(x0, x1, equal_var=False).pvalue, "Welch t"
    return stats.mannwhitneyu(x0, x1).pvalue, "MW-U"

def p_cat(s0, s1):
    ct = pd.concat([s0.value_counts(), s1.value_counts()], axis=1
                   ).fillna(0).astype(int).T.values   # 2×K 行列
    if (ct < 5).any():
        return stats.fisher_exact(ct)[1], "Fisher"
    return stats.chi2_contingency(ct)[1], "χ²"

def cohen_d(x, y):
    nx, ny = len(x), len(y)
    pooled = ((nx-1)*x.var(ddof=1) + (ny-1)*y.var(ddof=1)) / (nx+ny-2)
    return (x.mean()-y.mean())/np.sqrt(pooled)

def rank_biserial(u, n1, n2):
    return 1 - 2*u/(n1*n2)

# ─────────────────────────────────────────────
# ❻  Table 1  Baseline（群間 p 値付き）
# ─────────────────────────────────────────────
base_rows = []

# 連続変数
cont_cols = {
    "Age (yrs)"       : "age",
    "Proptosis Δ (mm)": "Proptosis_delta"
}
for lbl, col in cont_cols.items():
    a0 = df[df[GCOL]==0][col].dropna()
    a1 = df[df[GCOL]==1][col].dropna()
    p, test = p_cont(a0, a1)
    base_rows.append([
        lbl,
        len(a0), f"{a0.mean():.1f} ± {a0.std(ddof=1):.1f}",
        f"{a0.median():.1f}[{a0.quantile(.25):.1f}-{a0.quantile(.75):.1f}]",
        len(a1), f"{a1.mean():.1f} ± {a1.std(ddof=1):.1f}",
        f"{a1.median():.1f}[{a1.quantile(.25):.1f}-{a1.quantile(.75):.1f}]",
        test, f"{p:.3f}"
    ])

# カテゴリ変数
cat_info = {
    "Sex (M/F)" : ("sex(M0/F1)", {0:"M", 1:"F"}),
    "Side (R/L)": ("side(R0/L1)", {0:"R", 1:"L"})
}
for lbl, (col, cmap) in cat_info.items():
    s0 = df[df[GCOL]==0][col].map(cmap).dropna()
    s1 = df[df[GCOL]==1][col].map(cmap).dropna()
    p, test = p_cat(s0, s1)
    fmt = lambda s: " ".join(f"{k}:{(s==k).sum()}/{len(s)}" for k in cmap.values())
    base_rows.append([
        lbl,
        len(s0), fmt(s0), "",
        len(s1), fmt(s1), "",
        test, f"{p:.3f}"
    ])

baseline = pd.DataFrame(base_rows, columns=[
    "Variable",
    "FT0 n","FT0 mean±SD","FT0 median[IQR]",
    "FT1 n","FT1 mean±SD","FT1 median[IQR]",
    "Test","p"
])

# ─────────────────────────────────────────────
# ❼  Table 2  MRD-1 comparisons
# ─────────────────────────────────────────────
vars_int = [
    "MRD-1 pre","MRD-1 post",
    "MRD-1 pre (none)","MRD-1 post (none)",
    "MRD-1 pre 差","MRD-1 post 差","ΔMRD-1"
]

rows=[]
for v in vars_int:
    a0 = df[df[GCOL]==0][v].dropna()
    a1 = df[df[GCOL]==1][v].dropna()
    mean0 = f"{a0.mean():.2f} ± {a0.std(ddof=1):.2f}"
    mean1 = f"{a1.mean():.2f} ± {a1.std(ddof=1):.2f}"
    med0  = f"{a0.median():.2f}[{a0.quantile(.25):.2f}-{a0.quantile(.75):.2f}]"
    med1  = f"{a1.median():.2f}[{a1.quantile(.25):.2f}-{a1.quantile(.75):.2f}]"
    sw_ok = (stats.shapiro(a0).pvalue>=.05) and (stats.shapiro(a1).pvalue>=.05)
    if sw_ok:
        p  = stats.ttest_ind(a0,a1,equal_var=False).pvalue
        eff= cohen_d(a0,a1); test="Welch t";  hl=""
    else:
        U,p = stats.mannwhitneyu(a0,a1)
        eff = rank_biserial(U,len(a0),len(a1))
        hl  = np.median(np.subtract.outer(a1.values, a0.values)).round(3)
        test="MW-U"
    rows.append([
        v, len(a0), mean0, med0,
        len(a1), mean1, med1,
        test, f"{p:.3f}", f"{eff:.3f}", hl
    ])

table2 = pd.DataFrame(rows, columns=[
    "Variable",
    "FT0 n","FT0 mean±SD","FT0 median[IQR]",
    "FT1 n","FT1 mean±SD","FT1 median[IQR]",
    "Test","p","Effect","HL diff"
])

# ─────────────────────────────────────────────
# ❽  Multivariable regression (ΔMRD-1)
# ─────────────────────────────────────────────
ana   = df[[GCOL,"Proptosis_delta","ΔMRD-1"]].dropna().rename(columns={GCOL:"FrontTouch"})
y     = ana["ΔMRD-1"]
X     = sm.add_constant(ana[["FrontTouch","Proptosis_delta"]])
mdl   = sm.OLS(y, X).fit()
regtb = pd.concat([mdl.params, mdl.bse, mdl.conf_int(), mdl.pvalues], axis=1)
regtb.columns = ["Coef","StdErr","CI_low","CI_high","p"]; regtb = regtb.round(3)

# ─────────────────────────────────────────────
# ❾  Figure 2  scatter ΔMRD-1 vs Proptosis Δ
# ─────────────────────────────────────────────
plt.figure(figsize=(5,4))
for g, m in [(0,"o"), (1,"^")]:
    sub = df[df[GCOL]==g]
    plt.scatter(sub["Proptosis_delta"], sub["ΔMRD-1"], marker=m, label=f"FT{g}")
valid = df[["Proptosis_delta","ΔMRD-1"]].dropna()
if len(valid) >= 2:
    slope, intercept = np.polyfit(valid["Proptosis_delta"], valid["ΔMRD-1"], 1)
    x = np.linspace(valid["Proptosis_delta"].min(), valid["Proptosis_delta"].max(), 100)
    plt.plot(x, slope*x + intercept, color="black")
plt.xlabel("Proptosis Δ (mm)")
plt.ylabel("ΔMRD-1 (mm)")
plt.title("ΔMRD-1 vs Tumor-induced Proptosis")
plt.legend(); plt.tight_layout()
plt.savefig(OUT/"Figure2_scatter.png", dpi=300); plt.close()

# ─────────────────────────────────────────────
# ❿  Figure 3  MRD-1 pre/post adjacent boxplots
# ─────────────────────────────────────────────
sets = [
    df[df[GCOL]==0]["MRD-1 pre"].dropna(),
    df[df[GCOL]==0]["MRD-1 post"].dropna(),
    df[df[GCOL]==1]["MRD-1 pre"].dropna(),
    df[df[GCOL]==1]["MRD-1 post"].dropna()
]
pos  = [0.85,1.15,1.85,2.15]
colors = ['#1b9e77','#d95f02']*2
fig, ax = plt.subplots(figsize=(6,4))
bp = ax.boxplot(sets, positions=pos, widths=.25,
                patch_artist=True, medianprops={'color':'black'})
for patch, c in zip(bp['boxes'], colors): patch.set_facecolor(c)
ax.set_xticks([1,2]); ax.set_xticklabels(["FT0","FT1"])
ax.set_ylabel("MRD-1 (mm)")
ax.set_title("MRD-1 Pre/Post by Group")
ax.legend(
    [plt.Line2D([0],[0],color='#1b9e77',lw=4),
     plt.Line2D([0],[0],color='#d95f02',lw=4)],
    ['pre','post'], frameon=False, loc='upper right')
fig.tight_layout()
fig.savefig(OUT/"Figure3_MRD1_PrePost.png", dpi=300); plt.close()

# ─────────────────────────────────────────────
# ⓫  表の表示（任意）
# ─────────────────────────────────────────────
print("\n── Table 1 Baseline ──");  display(baseline)
print("\n── Table 2 MRD-1 comparisons ──"); display(table2)
print("\n── ΔMRD-1 regression ──"); display(regtb)

print("\nImages saved to /content :")
for f in ["Figure2_scatter.png","Figure3_MRD1_PrePost.png"]:
    print(" •", f)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

── Table 1 Baseline ──


Unnamed: 0,Variable,FT0 n,FT0 mean±SD,FT0 median[IQR],FT1 n,FT1 mean±SD,FT1 median[IQR],Test,p
0,Age (yrs),11,62.5 ± 13.0,59.0[52.5-75.5],16,60.8 ± 12.4,54.5[52.8-72.2],MW-U,0.656
1,Proptosis Δ (mm),11,3.5 ± 3.0,4.0[1.0-5.5],16,1.7 ± 1.2,2.0[1.0-2.0],Welch t,0.075
2,Sex (M/F),11,M:3/11 F:8/11,,16,M:3/16 F:13/16,,Fisher,0.662
3,Side (R/L),11,R:7/11 L:4/11,,16,R:8/16 L:8/16,,Fisher,0.696



── Table 2 MRD-1 comparisons ──


Unnamed: 0,Variable,FT0 n,FT0 mean±SD,FT0 median[IQR],FT1 n,FT1 mean±SD,FT1 median[IQR],Test,p,Effect,HL diff
0,MRD-1 pre,11,2.50 ± 1.48,2.50[1.50-3.00],16,2.22 ± 1.25,2.25[1.50-3.00],Welch t,0.612,0.209,
1,MRD-1 post,11,1.68 ± 1.31,1.00[0.75-2.50],16,2.38 ± 1.79,2.50[1.00-3.62],MW-U,0.223,0.284,0.5
2,MRD-1 pre (none),11,2.64 ± 1.58,2.50[1.25-4.00],16,3.62 ± 1.35,3.75[2.50-4.62],Welch t,0.107,-0.684,
3,MRD-1 post (none),11,2.59 ± 1.74,2.00[1.25-3.75],16,3.91 ± 1.14,4.00[3.50-4.50],Welch t,0.043,-0.93,
4,MRD-1 pre 差,11,-0.14 ± 0.92,0.00[-0.25-0.50],16,-1.41 ± 1.28,-1.00[-1.62--0.88],MW-U,0.003,-0.676,-1.5
5,MRD-1 post 差,11,-0.91 ± 1.34,0.00[-1.50-0.00],16,-1.53 ± 1.73,-1.00[-2.12--0.50],MW-U,0.268,-0.256,-0.5
6,ΔMRD-1,11,-0.82 ± 0.93,-0.50[-1.50-0.00],16,0.16 ± 1.08,0.50[-0.50-1.00],MW-U,0.026,0.511,1.0



── ΔMRD-1 regression ──


Unnamed: 0,Coef,StdErr,CI_low,CI_high,p
const,-0.05,0.419,-0.914,0.814,0.906
FrontTouch,0.572,0.399,-0.251,1.395,0.165
Proptosis_delta,-0.217,0.088,-0.398,-0.036,0.021



Images saved to /content :
 • Figure2_scatter.png
 • Figure3_MRD1_PrePost.png
