In [None]:
# Random Forest

import pandas as pd
import numpy as np
from sklearn.model_selection import LeaveOneOut, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline
import statsmodels.stats.proportion as smp

# --- 1) Veri yükle ---
file_path = "ML_Analysis_V3.xlsx"  # kendi dosya yolunu yaz
df = pd.read_excel(file_path, sheet_name="Sheet1")

targets = ["Cervical Lordosis Risk","Kyphosis Risk","Lumbar Lordosis Risk","Scoliosis Risk"]
feature_cols = [c for c in df.columns if c not in targets]

# --- 2) Yardımcı fonksiyon: tek fold eğitimi ---
def fit_predict_one_fold(X_train, y_train, X_test, rf_params):
    # minority sınıf sayısı
    cls_counts = y_train.value_counts().to_dict()
    if 0 not in cls_counts: cls_counts[0] = 0
    if 1 not in cls_counts: cls_counts[1] = 0
    minority_n = min(cls_counts[0], cls_counts[1])
    # SMOTE için k_neighbors seçimi
    k = max(1, min(5, minority_n - 1))  # 1 ile 5 arasında
    pipe = Pipeline([
        ('smote', SMOTE(k_neighbors=k, random_state=42)),
        ('rf', RandomForestClassifier(n_estimators=100, random_state=42))
    ])
    pipe.fit(X_train, y_train)
    return pipe.predict(X_test)

# --- 3) CV şeması ile değerlendirme ---
def evaluate_with_scheme(X, y, splitter, scheme_name):
    y_true, y_pred = [], []
    if isinstance(splitter, StratifiedKFold):
        splits = splitter.split(X, y)
    else:
        splits = splitter.split(X)
    for tr, te in splits:
        X_tr, X_te = X.iloc[tr], X.iloc[te]
        y_tr, y_te = y.iloc[tr], y.iloc[te]
        pred = fit_predict_one_fold(X_tr, y_tr, X_te, dict(n_estimators=100, random_state=42))
        y_true.extend(y_te)
        y_pred.extend(pred)
    # metrikler
    acc = accuracy_score(y_true, y_pred)
    rep = classification_report(y_true, y_pred, output_dict=True, zero_division=0)
    cm = confusion_matrix(y_true, y_pred, labels=[0,1])
    tn, fp, fn, tp = cm.ravel()
    n = len(y_true)
    k = int((np.array(y_true) == np.array(y_pred)).sum())
    ci_lo, ci_hi = smp.proportion_confint(k, n, alpha=0.05, method='wilson')
    row = {
        "Scheme": scheme_name,
        "Accuracy": acc,
        "95% CI Low": ci_lo,
        "95% CI High": ci_hi,
        "Weighted Precision": rep["weighted avg"]["precision"],
        "Weighted Recall": rep["weighted avg"]["recall"],
        "Weighted F1": rep["weighted avg"]["f1-score"],
        "Support": int(cm.sum()),
        "TN": int(tn), "FP": int(fp), "FN": int(fn), "TP": int(tp)
    }
    return row, cm

# --- 4) Tüm hedefler için çalıştır ---
results = []
cms = {}
for target in targets:
    X = df[feature_cols].copy()
    y = df[target].copy()

    # CV şemaları
    cv_loso = LeaveOneOut()
    cv_s5   = StratifiedKFold(n_splits=5,  shuffle=True, random_state=42)
    cv_s10  = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

    # 1) LOSO
    row_loso, cm_loso = evaluate_with_scheme(X, y, cv_loso, f"{target} – LOSO")
    results.append({"Target": target, **row_loso})
    cms[f"{target} – LOSO"] = cm_loso

    # 2) Stratified 5-Fold
    row_s5, cm_s5 = evaluate_with_scheme(X, y, cv_s5, f"{target} – Stratified 5-Fold")
    results.append({"Target": target, **row_s5})
    cms[f"{target} – Stratified 5-Fold"] = cm_s5

    # 3) Stratified 10-Fold
    try:
        row_s10, cm_s10 = evaluate_with_scheme(X, y, cv_s10, f"{target} – Stratified 10-Fold")
        results.append({"Target": target, **row_s10})
        cms[f"{target} – Stratified 10-Fold"] = cm_s10
    except Exception as e:
        print(f"{target}: Stratified 10-Fold uygulanamadı ({e})")

# --- 5) Master tablo ---
summary_master = pd.DataFrame(results)
print("LOSO vs 5-Fold vs 10-Fold")
print(summary_master)