In [None]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    roc_auc_score,
    average_precision_score,
    log_loss,
    classification_report
)

# Beispielhafte Trainingsdaten
data = pd.DataFrame([
    {"days_since_transaction": 7,    "value": 19.99,  "label": 1},
    {"days_since_transaction": 90,   "value": 49.00,  "label": 1},
    {"days_since_transaction": 240,  "value": 89.99,  "label": 0},
    {"days_since_transaction": 3,    "value": 4.99,   "label": 1},
    {"days_since_transaction": 365,  "value": 150.00, "label": 0},
    {"days_since_transaction": 120,  "value": 9.99,   "label": 1},
    {"days_since_transaction": 180,  "value": 59.00,  "label": 0},
    {"days_since_transaction": 30,   "value": 29.90,  "label": 1},
    {"days_since_transaction": 14,   "value": 199.99, "label": 0},
    {"days_since_transaction": 300,  "value": 5.00,   "label": 0},
    {"days_since_transaction": 45,   "value": 25.00,  "label": 1},
    {"days_since_transaction": 200,  "value": 45.00,  "label": 0},
    {"days_since_transaction": 15,   "value": 15.00,  "label": 1},
    {"days_since_transaction": 90,   "value": 300.00, "label": 0},
    {"days_since_transaction": 10,   "value": 9.99,   "label": 1},
    {"days_since_transaction": 270,  "value": 39.90,  "label": 0},
    {"days_since_transaction": 1,    "value": 1.99,   "label": 1},
    {"days_since_transaction": 150,  "value": 59.99,  "label": 0},
    {"days_since_transaction": 60,   "value": 79.99,  "label": 1},
    {"days_since_transaction": 21,   "value": 10.00,  "label": 1}
])

# Features und Ziel
X = data[["days_since_transaction", "value"]]
y = data["label"]

# Train/Test-Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Modell
model = LogisticRegression()
model.fit(X_train, y_train)

# Wahrscheinlichkeiten für Klasse 1
probas = model.predict_proba(X_test)[:, 1]

# Bewertungsmetriken
roc_auc = roc_auc_score(y_test, probas)
ap_score = average_precision_score(y_test, probas)
logloss = log_loss(y_test, probas)

# Binäre Klassifikation bei Schwelle 0.5
pred_labels = (probas >= 0.5).astype(int)

# Ausgabe
print("=== Bewertung ===")
print(f"ROC AUC Score:           {roc_auc:.3f}")
print(f"Average Precision Score: {ap_score:.3f}")
print(f"Log Loss:                {logloss:.3f}")
print("\nClassification Report:")
print(classification_report(y_test, pred_labels))

# Beispielvorhersage
new_data = pd.DataFrame([{"days_since_transaction": 60, "value": 50.00}])
prob = model.predict_proba(new_data)[0, 1]
print(f"\nWahrscheinlichkeit für Kauf in 6 Monaten: {prob:.3f}")


=== Bewertung ===
ROC AUC Score:           1.000
Average Precision Score: 1.000
Log Loss:                0.012

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         2
           1       1.00      1.00      1.00         2

    accuracy                           1.00         4
   macro avg       1.00      1.00      1.00         4
weighted avg       1.00      1.00      1.00         4


Wahrscheinlichkeit für Kauf in 6 Monaten: 1.000
