In [1]:
# ================================================================
# SVM 
# ================================================================

import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold, LeaveOneOut
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, balanced_accuracy_score, f1_score
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE, RandomOverSampler
from imblearn.pipeline import Pipeline
import statsmodels.stats.proportion as smp
from itertools import product
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

# -----------------------
# 1) Veri setini yükle
# -----------------------
file_path = "ML_Analysis_V3.xlsx"  # <-- kendi yolunu ayarla
df = pd.read_excel(file_path, sheet_name="Sheet1")

targets = ["Cervical Lordosis Risk","Kyphosis Risk","Lumbar Lordosis Risk","Scoliosis Risk"]
feature_cols = [c for c in df.columns if c not in targets]

# ---------------------------------------------------------
# 2) Yardımcı: adaptif sampler seç (SMOTE(k) ya da ROS)
# ---------------------------------------------------------
def make_adaptive_sampler(y_train):
    counts = y_train.value_counts().to_dict()
    minority_n = min(counts.get(0, 0), counts.get(1, 0))
    if minority_n >= 3:
        k = max(1, min(5, minority_n - 1))  # 1..5
        return SMOTE(k_neighbors=k, random_state=42)
    else:
        return RandomOverSampler(random_state=42)

# ---------------------------------------------------------
# 3) Yardımcı: adaptif StratifiedKFold seç
#    (n_splits, min sınıf sayısını aşamaz)
# ---------------------------------------------------------
def get_stratified_cv(y, desired_splits):
    minority_n = y.value_counts().min()
    n_splits = max(2, min(desired_splits, int(minority_n)))
    return StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42), n_splits

# --------------------------------------------------------------------------
# 4) Tek fold eğitimi: StandardScaler -> Sampler -> SVM
# --------------------------------------------------------------------------
def fit_predict_one_fold(X_train, y_train, X_test, svm_params):
    sampler = make_adaptive_sampler(y_train)
    pipe = Pipeline([
        ('scaler', StandardScaler()),
        ('sampler', sampler),
        ('svm', SVC(**svm_params))
    ])
    pipe.fit(X_train, y_train)
    return pipe.predict(X_test)

# --------------------------------------------------------------------------
# 5) Global tuning (hızlı): Stratified k-fold ile en iyi SVM paramları
#    Skor: balanced_accuracy (alternatif: f1_macro)
# --------------------------------------------------------------------------
def global_tune_svm(X, y, desired_splits=5, scoring="balanced_accuracy"):
    skf, used_splits = get_stratified_cv(y, desired_splits)

    # Param grid (makul ve küçük tut)
    grids = [
        {"kernel": ["rbf"], "C": [0.1, 1, 10, 100], "gamma": ["scale", 0.1, 0.01, 0.001]},
        {"kernel": ["linear"], "C": [0.1, 1, 10, 100]}
    ]

    best_score = -np.inf
    best_params = {"kernel": "rbf", "C": 1.0, "gamma": "scale"}  # fallback

    for grid in grids:
        keys = list(grid.keys())
        vals = [grid[k] for k in keys]
        for combo in product(*vals):
            p = dict(zip(keys, combo))
            fold_scores = []
            for tr, va in skf.split(X, y):
                X_tr, X_va = X.iloc[tr], X.iloc[va]
                y_tr, y_va = y.iloc[tr], y.iloc[va]

                # adaptif sampler + scaler + SVM
                sampler = make_adaptive_sampler(y_tr)
                pipe = Pipeline([
                    ('scaler', StandardScaler()),
                    ('sampler', sampler),
                    ('svm', SVC(class_weight="balanced", **p))
                ])
                pipe.fit(X_tr, y_tr)
                pred = pipe.predict(X_va)
                if scoring == "balanced_accuracy":
                    s = balanced_accuracy_score(y_va, pred)
                else:
                    s = f1_score(y_va, pred, average="macro")
                fold_scores.append(s)
            mean_s = np.mean(fold_scores)
            if mean_s > best_score:
                best_score = mean_s
                best_params = p

    # class_weight='balanced' sabit
    best_params = {"class_weight": "balanced", **best_params}
    return best_params, best_score, used_splits

# --------------------------------------------------------------------------
# 6) Değerlendirme: seçilen şema ile (LOSO / Stratified k)
# --------------------------------------------------------------------------
def evaluate_scheme_with_fixed_params(X, y, splitter, scheme_name, svm_params):
    # splitter: LeaveOneOut() ya da StratifiedKFold(...)
    if isinstance(splitter, StratifiedKFold):
        splits = splitter.split(X, y)
    else:
        splits = splitter.split(X)

    y_true, y_pred = [], []
    for tr, te in splits:
        X_tr, X_te = X.iloc[tr], X.iloc[te]
        y_tr, y_te = y.iloc[tr], y.iloc[te]
        pred = fit_predict_one_fold(X_tr, y_tr, X_te, svm_params)
        y_true.extend(y_te)
        y_pred.extend(pred)

    acc = accuracy_score(y_true, y_pred)
    rep = classification_report(y_true, y_pred, output_dict=True, zero_division=0)
    cm  = confusion_matrix(y_true, y_pred, labels=[0,1])
    tn, fp, fn, tp = cm.ravel()

    n = len(y_true)
    k = int((np.array(y_true) == np.array(y_pred)).sum())
    ci_lo, ci_hi = smp.proportion_confint(k, n, alpha=0.05, method='wilson')

    row = {
        "Accuracy": acc,
        "95% CI Low": ci_lo,
        "95% CI High": ci_hi,
        "Weighted Precision": rep["weighted avg"]["precision"],
        "Weighted Recall": rep["weighted avg"]["recall"],
        "Weighted F1": rep["weighted avg"]["f1-score"],
        "Support": int(cm.sum()),
        "TN": int(tn), "FP": int(fp), "FN": int(fn), "TP": int(tp)
    }
    return row

# -------------------------------------------------------
# 7) Çalıştır: Tuning + LOSO / 5-Fold / 10-Fold (adaptif)
# -------------------------------------------------------
all_rows = []
for target in targets:
    print(f"\n=== SVM Tuning & Evaluation for: {target} ===")
    X = df[feature_cols].copy()
    y = df[target].copy()

    # Global tuning (balanced_accuracy ile)
    best_params, best_cv, used_splits = global_tune_svm(X, y, desired_splits=5, scoring="balanced_accuracy")
    print(f"Best params (Stratified {used_splits}-Fold): {best_params} | Mean CV (balanced_acc): {round(best_cv,3)}")

    # Şemalar
    cv_loso = LeaveOneOut()
    cv5, used5   = get_stratified_cv(y, 5)
    cv10, used10 = get_stratified_cv(y, 10)

    # Değerlendirme
    row_loso = evaluate_scheme_with_fixed_params(X, y, cv_loso, "LOSO", best_params)
    row_s5   = evaluate_scheme_with_fixed_params(X, y, cv5,   f"Stratified {used5}-Fold", best_params)
    row_s10  = evaluate_scheme_with_fixed_params(X, y, cv10,  f"Stratified {used10}-Fold", best_params)

    all_rows += [
        {"Target": target, "Scheme": "LOSO", **row_loso},
        {"Target": target, "Scheme": f"Stratified {used5}-Fold", **row_s5},
        {"Target": target, "Scheme": f"Stratified {used10}-Fold", **row_s10},
    ]

summary_svm_fixed = pd.DataFrame(all_rows)
print("\n=== SVM (scaled + tuned + balanced + adaptive CV/oversampling) – Master Table ===")
print(summary_svm_fixed)

# CSV’ye kaydetmek istersen:
# summary_svm_fixed.to_csv("SVM_Scaled_Tuned_Balanced_Adaptive_AllTargets.csv", index=False)



=== SVM Tuning & Evaluation for: Cervical Lordosis Risk ===
Best params (Stratified 5-Fold): {'class_weight': 'balanced', 'kernel': 'rbf', 'C': 1, 'gamma': 0.01} | Mean CV (balanced_acc): 0.81

=== SVM Tuning & Evaluation for: Kyphosis Risk ===
Best params (Stratified 5-Fold): {'class_weight': 'balanced', 'kernel': 'rbf', 'C': 100, 'gamma': 0.001} | Mean CV (balanced_acc): 0.758

=== SVM Tuning & Evaluation for: Lumbar Lordosis Risk ===
Best params (Stratified 4-Fold): {'class_weight': 'balanced', 'kernel': 'linear', 'C': 0.1} | Mean CV (balanced_acc): 0.571

=== SVM Tuning & Evaluation for: Scoliosis Risk ===
Best params (Stratified 5-Fold): {'class_weight': 'balanced', 'kernel': 'rbf', 'C': 10, 'gamma': 'scale'} | Mean CV (balanced_acc): 0.65

=== SVM (scaled + tuned + balanced + adaptive CV/oversampling) – Master Table ===
                    Target              Scheme  Accuracy  95% CI Low  \
0   Cervical Lordosis Risk                LOSO  0.800000    0.626943   
1   Cervical Lord