# Optimierung des Schwellwerts für das Klassifikationsmodell

In diesem Notebook wird untersucht, ob eine Optimierung des Schwellwerts für das Klassifikationsmodell sinnvoll ist.

In [2]:
import numpy as np
from scipy.optimize import minimize_scalar
from sklearn.model_selection import RepeatedStratifiedKFold, train_test_split
from xgboost import XGBClassifier

import fraud_detection as fd
from fraud_detection import data_loader, metrics
from fraud_detection.models.costoptim import bewertung

datapath = "../data/transformed_label_and_damage.parquet"
seed = 42

In [3]:
seed = 42

In [4]:
# lade Daten ohne die nutzlosen Features
X, targets = data_loader.load_data_np(datapath, drop_features=data_loader.useless_features)

X_train, X_test, y_train, y_test = train_test_split(
    X, targets, test_size=0.2, random_state=seed, stratify=targets[:, 1] > 0
)

In [5]:
def bewertung(yhat, y, damage):
    """
    Bewertung der Vorhersagen mittels der Bewertungsfunktion der Wertkauf GmbH.
    """
    res = np.zeros(yhat.shape)
    # Case 1: FRAUD caught
    res += ((y == 1) & (yhat == 1)) * 5
    # Case 2: False positive
    res -= ((y == 0) & (yhat == 1)) * 10
    # Case 3: FRAUD missed
    res -= ((y == 1) & (yhat == 0)) * damage
    return res.sum()

def calc_bewertung_for_given_threshold(probs, theta_threshold, label_true, damage_true):
    """
    Berechnet die Bewertung der Vorhersagen basierend auf den gegebenen Parametern.
    """
    yhat = probs > theta_threshold
    return bewertung(yhat, label_true, damage_true)


In [12]:
def optimize_threshold(clf, X_train, y_train):
    """
    Optimiert die Entscheidungsschwelle für das gegebene Modell und die Trainingsdaten.
    """
    probs_train = clf.predict_proba(X_train)[:, 1]
    def objective(theta):
        values = calc_bewertung_for_given_threshold(probs_train, theta, y_train[:, 0], y_train[:, 1])
        return -np.mean(values)  # negative because we minimize

    res = minimize_scalar(objective, bounds=(0.1, 1.0), method='bounded')
    cost_tp = res.x
    return cost_tp

In [13]:
def evaluate_threshold(clf, X_test, y_test, threshold):
    """
    Bewertet die Vorhersagen des Modells auf den Testdaten unter Verwendung der gegebenen Threshold.
    """
    probs_test = clf.predict_proba(X_test)[:, 1]
    yhat = probs_test > threshold
    return metrics.bewertung(probs_test, yhat, y_test[:, 0], y_test[:, 1])

In [17]:
def optimize_and_evaluate(clf, X_train, y_train, X_test, y_test):
    probs_baseline = clf.predict_proba(X_test)[:, 1]
    preds_baseline = clf.predict(X_test)

    metrics_baseline = metrics.bewertung(probs_baseline, preds_baseline, y_test[:, 0], y_test[:, 1])

    opt_threshold = optimize_threshold(clf, X_train, y_train)
    metrics_opt = evaluate_threshold(clf, X_test, y_test, opt_threshold)

    diff = metrics_baseline["Bewertung"] - metrics_opt["Bewertung"]
    if diff < 0:
        print(f"Optimized threshold ({opt_threshold:.2f}) improved the score by {-diff:.2f} points.")
    else:
        print(f"Optimized threshold ({opt_threshold:.2f}) did not improve the score, difference: {diff:.2f}.")

    return { "baseline": metrics_baseline, "optimized_threshold": metrics_opt,}

In [18]:
def run_experiment(X, targets, n_splits=5, n_repeats=1, random_state=42):

    # Initialize the RepeatedStratifiedKFold
    skf = RepeatedStratifiedKFold(
        n_splits=n_splits, n_repeats=n_repeats, random_state=random_state
    )

    model_metrics = []

    for i, (train_idx, test_idx) in enumerate(skf.split(X, targets[:, 0])):
        clf = XGBClassifier(
            n_estimators=100,
            max_depth=5,
            learning_rate=0.1,
            objective="binary:logistic",
        )
        clf.fit(X[train_idx,:], targets[train_idx, 0])

        mm = optimize_and_evaluate(clf, X[train_idx], targets[train_idx], X[test_idx], targets[test_idx])
        # optimize on test set to check if functions work
        # mm = optimize_and_evaluate(clf, X[test_idx], targets[test_idx], X[test_idx], targets[test_idx])
        model_metrics.append(mm)

    return model_metrics


In [19]:
model_metrics = run_experiment(X, targets, n_splits=5, n_repeats=5, random_state=seed)

Optimized threshold (0.45) did not improve the score, difference: 135.67.
Optimized threshold (0.47) improved the score by 52.23 points.
Optimized threshold (0.39) did not improve the score, difference: 102.49.
Optimized threshold (0.33) did not improve the score, difference: 180.37.
Optimized threshold (0.43) did not improve the score, difference: 113.34.
Optimized threshold (0.44) did not improve the score, difference: 9.80.
Optimized threshold (0.40) did not improve the score, difference: 66.55.
Optimized threshold (0.45) did not improve the score, difference: 135.52.
Optimized threshold (0.45) improved the score by 30.87 points.
Optimized threshold (0.37) did not improve the score, difference: 115.04.
Optimized threshold (0.43) did not improve the score, difference: 6.54.
Optimized threshold (0.39) did not improve the score, difference: 222.65.
Optimized threshold (0.45) improved the score by 43.73 points.
Optimized threshold (0.35) did not improve the score, difference: 123.05.
Op

Die Differenz von Baseline und Resultat mit Optimierung des Entscheidungsschwellwertes ist positiv. Das bedeutet, dass die Optimierung des Entscheidungsschwellwertes generell keine Verbesserung bringt. Die Unterschiede sind jedoch gering, was darauf hindeutet, dass die Optimierung des Entscheidungsschwellwertes in diesem Fall keinen signifikanten Einfluss auf das Ergebnis hat.

Der Schwellwert des Model von 0.5 ist hochstwahrscheinlich schon relativ nah am Optimum, sodass eine weitere Optimierung des Schwellwerte keine Verbesserung mehr bringt.

In [21]:
np.mean([m["baseline"]["Bewertung"] - m["optimized_threshold"]["Bewertung"] for m in model_metrics])

np.float64(62.532799999999995)