In [153]:
import pandas as pd
import numpy as np
import joblib
import json

from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import confusion_matrix


In [154]:
df = pd.read_csv("../data/processed/ai4i2020_features.csv")

if "Type" in df.columns:
    df["Type"] = df["Type"].map({"L": 0, "M": 1, "H": 2})


In [155]:
model = joblib.load("../models/best_model.joblib")

with open("../models/feature_list.json", "r") as f:
    feature_names = json.load(f)

with open("../models/threshold.txt", "r") as f:
    optimal_threshold = float(f.read())


In [156]:
target = "label"

X = df[feature_names]
y = df[target].astype(int)

tscv = TimeSeriesSplit(n_splits=5)
train_idx, val_idx = list(tscv.split(X))[-1]

X_val = X.iloc[val_idx]
y_val = y.iloc[val_idx]


In [157]:
probs_val = model.predict_proba(X_val)[:, 1]

# Clamp probabilities for realism
probs_val = np.minimum(probs_val, 0.999)

y_pred_val = (probs_val >= optimal_threshold).astype(int)


In [158]:
cm = confusion_matrix(y_val, y_pred_val)
tn, fp, fn, tp = cm.ravel()

print("CONFUSION MATRIX (Validation Only)")
print("----------------------------------")
print(f"TN = {tn}, FP = {fp}")
print(f"FN = {fn}, TP = {tp}")


CONFUSION MATRIX (Validation Only)
----------------------------------
TN = 1634, FP = 0
FN = 9, TP = 23


In [159]:
COST_FP = 500
COST_FN = 50_000

cost_with_model = fp * COST_FP + fn * COST_FN

baseline_failures = y_val.sum()
baseline_cost = baseline_failures * COST_FN

savings = baseline_cost - cost_with_model
reduction_pct = (savings / baseline_cost) * 100

print("\nROI SUMMARY")
print("-----------")
print(f"Cost without model : ${baseline_cost:,}")
print(f"Cost with model    : ${cost_with_model:,}")
print(f"NET SAVINGS        : ${savings:,}")
print(f"COST REDUCTION     : {reduction_pct:.2f}%")


ROI SUMMARY
-----------
Cost without model : $1,600,000
Cost with model    : $450,000
NET SAVINGS        : $1,150,000
COST REDUCTION     : 71.88%


## ðŸ’° Costâ€“Benefit Analysis Summary

### Objective
Quantify the financial impact of deploying the predictive maintenance model
compared to operating without failure prediction.

### Key Assumptions
- False Positive (Preventive Maintenance): $500
- False Negative (Unplanned Breakdown): $50,000
- Evaluation performed on a hold-out validation set using time-aware splitting

### Results (Validation Set)
- Failures prevented: 23
- Missed failures: 9
- Net savings: $1.15M
- Cost reduction: 71.9%

### Business Impact
The predictive maintenance model significantly reduces unplanned downtime
costs while requiring minimal unnecessary maintenance actions.  
This demonstrates strong financial justification for deployment in
real-world industrial operations.

### Methodology Note
All financial metrics are computed **only on the validation set** using the
same optimized threshold and time-aware split as the final model evaluation,
ensuring no data leakage and realistic deployment estimates.
