**Block 0 – Imports & Helper**

In [None]:
import time
import numpy as np
from scipy.stats import entropy, wilcoxon
from dataclasses import dataclass
from typing import Optional, Dict, List
import torch
import torch.nn.functional as F

**Block 1 – PSI, KL, Stability, Confidence**

In [None]:
def clamp01(x):
    return max(0.0, min(1.0, float(x)))


def compute_hist(prob_array, n_bins=10):
    """Simple histogram in [0,1] for PSI/KL."""
    hist, bin_edges = np.histogram(prob_array, bins=n_bins, range=(0.0, 1.0))
    # convert to probabilities
    hist = hist.astype(float)
    hist = hist / (hist.sum() + 1e-8)
    return hist, bin_edges


def compute_psi(base_probs, cur_probs, n_bins=10):
    """Population Stability Index between baseline and current."""
    p, _ = compute_hist(base_probs, n_bins)
    q, _ = compute_hist(cur_probs, n_bins)
    # avoid zero
    p = np.clip(p, 1e-6, 1)
    q = np.clip(q, 1e-6, 1)
    psi = np.sum((p - q) * np.log(p / q))
    return float(psi)


def compute_kl(base_probs, cur_probs, n_bins=10):
    """KL divergence between baseline and current histograms."""
    p, _ = compute_hist(base_probs, n_bins)
    q, _ = compute_hist(cur_probs, n_bins)
    p = np.clip(p, 1e-6, 1)
    q = np.clip(q, 1e-6, 1)
    return float(entropy(p, q))  # KL(p||q)


def compute_stability_components(
    base_probs: np.ndarray,
    cur_probs: np.ndarray,
    base_pos_rate: float,
    cur_pos_rate: float,
):
    """
    Hitung 3 komponen untuk stability:
    - PSI (input/output)
    - KL divergence
    - class distribution shift
    """
    psi = compute_psi(base_probs, cur_probs)
    kl = compute_kl(base_probs, cur_probs)
    diff_rate = abs(cur_pos_rate - base_pos_rate)
    return psi, kl, diff_rate


def compute_confidence_stats(
    probs: np.ndarray,
    base_conf: float,
):
    """
    probs: array [N, num_classes] (softmax).
    base_conf: rata-rata max prob baseline.
    """
    max_conf = probs.max(axis=1)
    avg_conf = float(max_conf.mean())
    var_conf = float(max_conf.var())
    ratio_conf = avg_conf / (base_conf + 1e-8)
    return avg_conf, var_conf, ratio_conf

**Block 2 – Normalisasi & Composite Score D_prod**

In [None]:
@dataclass
class ProductionMetrics:
    psi: float
    kl: float
    class_shift: float
    avg_conf: float
    var_conf: float
    ratio_conf: float
    p95_latency_ms: float
    baseline_p95_latency_ms: float
    error_rate: float
    baseline_error_rate: float
    flag_rate: Optional[float] = None
    baseline_flag_rate: Optional[float] = None


def compute_stability_norm(m: ProductionMetrics) -> float:
    # clip terhadap “worst case” supaya tidak meledak
    psi_norm_bad = clamp01(m.psi / 0.25)   # PSI > 0.25 = drift berat
    kl_norm_bad = clamp01(m.kl / 0.5)      # KL > 0.5  = drift berat
    diff_norm_bad = clamp01(m.class_shift / 0.2)  # shift 20 poin = parah

    psi_good = 1.0 - psi_norm_bad
    kl_good = 1.0 - kl_norm_bad
    dist_good = 1.0 - diff_norm_bad

    return (psi_good + kl_good + dist_good) / 3.0


def compute_confidence_norm(m: ProductionMetrics) -> float:
    # ratio_conf diharapkan sekitar 1
    ratio = float(m.ratio_conf)
    ratio = max(0.5, min(1.5, ratio))
    deviation = abs(ratio - 1.0) / 0.5  # 0–1
    return 1.0 - deviation


def compute_latency_good(m: ProductionMetrics) -> float:
    ratio = m.p95_latency_ms / (m.baseline_p95_latency_ms + 1e-8)
    ratio = max(1.0, min(2.0, ratio))   # 1x–2x baseline
    norm_bad = (ratio - 1.0) / 1.0      # 0–1
    return 1.0 - norm_bad               # 1 = sama baseline, 0 = 2x lebih lambat


def compute_error_good(m: ProductionMetrics) -> float:
    baseline = max(m.baseline_error_rate, 1e-4)
    ratio = m.error_rate / baseline
    ratio = max(1.0, min(3.0, ratio))   # sampai 3x baseline
    norm_bad = (ratio - 1.0) / 2.0      # 0–1
    return 1.0 - norm_bad               # 1 = sehat, 0 = 3x lebih buruk


def compute_biz_norm(m: ProductionMetrics) -> float:
    if m.flag_rate is None or m.baseline_flag_rate is None:
        return 0.0  # kalau tidak dipakai
    ratio = m.flag_rate / (m.baseline_flag_rate + 1e-8)
    ratio = max(0.5, min(1.5, ratio))
    deviation = abs(ratio - 1.0) / 0.5
    return 1.0 - deviation


def compute_composite_production(
    m: ProductionMetrics,
    w_stab: float = 0.35,
    w_conf: float = 0.20,
    w_lat:  float = 0.20,
    w_err:  float = 0.15,
    w_biz:  float = 0.10,
) -> float:
    """Hitung D_prod (0–1) dari berbagai komponen."""
    w_sum = w_stab + w_conf + w_lat + w_err + w_biz
    w_stab, w_conf, w_lat, w_err, w_biz = [
        w / w_sum for w in (w_stab, w_conf, w_lat, w_err, w_biz)
    ]

    stab_norm = compute_stability_norm(m)
    conf_norm = compute_confidence_norm(m)
    lat_good  = compute_latency_good(m)
    err_good  = compute_error_good(m)
    biz_norm  = compute_biz_norm(m)

    D_prod = (
        w_stab * stab_norm +
        w_conf * conf_norm +
        w_lat  * lat_good  +
        w_err  * err_good  +
        w_biz  * biz_norm
    )
    return float(D_prod)

**Block 3 – Contoh Inference Loop per Skenario**

In [None]:
def run_inference_collect(
    model,
    dataloader,
    device,
):
    model.eval()
    all_probs = []
    latencies_ms = []
    error_count = 0
    total_count = 0

    with torch.no_grad():
        for imgs, _ in dataloader:   # label boleh diabaikan di production
            imgs = imgs.to(device)
            start = time.time()
            try:
                logits = model(imgs)
                probs = F.softmax(logits, dim=1).cpu().numpy()
                all_probs.append(probs)
            except Exception as e:
                # kalau terjadi error inferensi
                error_count += len(imgs)
            end = time.time()

            elapsed_ms = (end - start) * 1000.0
            latencies_ms.append(elapsed_ms)
            total_count += len(imgs)

    if len(all_probs) == 0:
        probs_concat = np.zeros((0, 2))
    else:
        probs_concat = np.concatenate(all_probs, axis=0)

    latencies_ms = np.array(latencies_ms)
    error_rate = error_count / max(total_count, 1)

    return probs_concat, latencies_ms, error_rate

**Buat baseline_loader**

In [None]:
from torchvision import transforms

baseline_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],   # ImageNet normalization
        std=[0.229, 0.224, 0.225]
    ),
])

In [None]:
from torchvision import datasets
from torch.utils.data import DataLoader

baseline_dataset_path = "/content/drive/MyDrive/TESIS/dataset1"  # ganti sesuai lokasi dataset kamu

baseline_dataset = datasets.ImageFolder(
    root=baseline_dataset_path,
    transform=baseline_transform
)

baseline_loader = DataLoader(
    baseline_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=2,   # bebas, bisa 0 kalau di Colab CPU
    pin_memory=True
)

print("✅ baseline_loader created — total images:", len(baseline_dataset))

✅ baseline_loader created — total images: 749


**Definisikan Model** - Load Model Baseline (pth)

In [None]:
import torch
import torch.nn as nn
from torchvision import models

num_classes = 2  # helmet / no_helmet

# 1) Definisikan arsitektur yang sama seperti saat training
model = models.mobilenet_v3_small(weights=None)
model.classifier[3] = nn.Linear(
    in_features=model.classifier[3].in_features,
    out_features=num_classes
)

# 2) Load state_dict dari file .pth
ckpt_path = "mobilenet_baseline.pth"  # ganti path kalau beda lokasi

state_dict = torch.load(ckpt_path, map_location="cpu")

# 3) Kalau dulu pernah pakai DataParallel, key-nya biasanya ada 'module.'
if any(k.startswith("module.") for k in state_dict.keys()):
    new_state_dict = {}
    for k, v in state_dict.items():
        new_k = k.replace("module.", "", 1)
        new_state_dict[new_k] = v
    state_dict = new_state_dict

# 4) Masukkan ke model
model.load_state_dict(state_dict)

print("✅ Model & weight baseline loaded")

✅ Model & weight baseline loaded


**Pindahkan ke Device & Jalankan Baseline Run**

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# asumsi: baseline_loader = dataloader untuk dataset bersih
base_probs, base_latencies_ms, base_error_rate = run_inference_collect(
    model, baseline_loader, device
)

base_max_conf = base_probs.max(axis=1)
baseline_conf = float(base_max_conf.mean())
baseline_pos_rate = float((base_probs[:,1] > 0.5).mean())  # contoh utk kelas 1
baseline_p95_latency = float(np.percentile(base_latencies_ms, 95))

# kalau mau pakai flag rate (misalnya proporsi prediksi "pelanggaran")
baseline_flag_rate = baseline_pos_rate



**Kode baseline + single metric**

In [None]:
import numpy as np

# --- BASELINE METRICS (multi-criteria) ---
base_max_conf = base_probs.max(axis=1)               # [N]
baseline_conf = float(base_max_conf.mean())
baseline_conf_var = float(base_max_conf.var())
baseline_pos_rate = float((base_probs[:, 1] > 0.5).mean())
baseline_p95_latency = float(np.percentile(base_latencies_ms, 95))
baseline_flag_rate = baseline_pos_rate

metrics_baseline = ProductionMetrics(
    psi=0.0,
    kl=0.0,
    class_shift=0.0,
    avg_conf=baseline_conf,
    var_conf=baseline_conf_var,
    ratio_conf=1.0,                      # ← Single metric baseline = 1.0
    p95_latency_ms=baseline_p95_latency,
    baseline_p95_latency_ms=baseline_p95_latency,
    error_rate=base_error_rate,
    baseline_error_rate=base_error_rate,
    flag_rate=baseline_flag_rate,
    baseline_flag_rate=baseline_flag_rate,
)

D_baseline = compute_composite_production(metrics_baseline)


# --- DEFINISI SINGLE METRIC (CONFIDENCE RATIO) ---
def compute_single_metric(m: ProductionMetrics) -> float:
    """Single metric untuk RQ2: confidence ratio (tanpa label)."""
    return float(m.ratio_conf)


single_baseline = compute_single_metric(metrics_baseline)

print("Baseline – single metric (confidence ratio):", single_baseline)
print("Baseline – composite D_prod:", D_baseline)


# optional: mulai list perbandingan single vs composite
single_vs_composite = []
single_vs_composite.append({
    "Scenario": "Baseline",
    "SingleMetric": single_baseline,
    "D_prod": D_baseline,
})

Baseline – single metric (confidence ratio): 1.0
Baseline – composite D_prod: 0.9999999837173927


**Satu skenario drift (lighting_loader)**

Buat transform khusus lighting (lebih gelap / lebih terang)

In [None]:
from torchvision import transforms
from PIL import ImageEnhance

# ini baseline_transform yg sudah dipakai
base_transform_no_norm = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# brightness factor: <1 = makin gelap, >1 = makin terang
BRIGHTNESS_FACTOR = 0.4  # contoh: 0.4 = cukup gelap

class LightingDegradation(object):
    def __init__(self, factor=0.4):
        self.factor = factor

    def __call__(self, img):
        # img di sini masih tipe PIL.Image
        enhancer = ImageEnhance.Brightness(img)
        img = enhancer.enhance(self.factor)
        return img

# transform lengkap utk lighting scenario
lighting_transform = transforms.Compose([
    LightingDegradation(factor=BRIGHTNESS_FACTOR),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    ),
])

Buat dataset & loader untuk lighting scenario

In [None]:
from torchvision import datasets
from torch.utils.data import DataLoader

lighting_dataset_path = "/content/drive/MyDrive/TESIS/dataset1"

lighting_dataset = datasets.ImageFolder(
    root=lighting_dataset_path,
    transform=lighting_transform,
)

lighting_loader = DataLoader(
    lighting_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=2,
    pin_memory=True,
)

print("✅ lighting_loader created — total images:", len(lighting_dataset))
print("Classes:", lighting_dataset.class_to_idx)

✅ lighting_loader created — total images: 749
Classes: {'helmet': 0, 'no_helmet': 1}


Jalankan blok “satu skenario drift (lighting_loader)”

In [None]:
# jalankan inferensi di skenario lighting
cur_probs, cur_latencies_ms, cur_error_rate = run_inference_collect(
    model, lighting_loader, device
)

cur_max_conf = cur_probs.max(axis=1)
cur_pos_rate = float((cur_probs[:,1] > 0.5).mean())
p95_latency = float(np.percentile(cur_latencies_ms, 95))

psi, kl, class_shift = compute_stability_components(
    base_probs=base_max_conf,        # bisa pakai distribusi confidence baseline
    cur_probs=cur_max_conf,          # atau fitur tertentu yang kamu pilih
    base_pos_rate=baseline_pos_rate,
    cur_pos_rate=cur_pos_rate,
)

avg_conf, var_conf, ratio_conf = compute_confidence_stats(
    probs=cur_probs,
    base_conf=baseline_conf,
)

metrics_lighting = ProductionMetrics(
    psi=psi,
    kl=kl,
    class_shift=class_shift,
    avg_conf=avg_conf,
    var_conf=var_conf,
    ratio_conf=ratio_conf,
    p95_latency_ms=p95_latency,
    baseline_p95_latency_ms=baseline_p95_latency,
    error_rate=cur_error_rate,
    baseline_error_rate=base_error_rate,
    flag_rate=cur_pos_rate,
    baseline_flag_rate=baseline_flag_rate,
)

D_lighting = compute_composite_production(metrics_lighting)
print("D_prod (lighting):", D_lighting)

D_prod (lighting): 0.8819244850466733


**Satu skenario drift (contoh: blur_loader)**

Buat transform khusus blur

In [None]:
from PIL import Image
import cv2
import numpy as np
from torchvision import transforms

class GaussianBlurDegradation(object):
    def __init__(self, kernel_size=7):
        self.kernel_size = kernel_size if kernel_size % 2 == 1 else kernel_size+1

    def __call__(self, img):
        img_cv = np.array(img)
        img_blur = cv2.GaussianBlur(img_cv, (self.kernel_size, self.kernel_size), 0)
        return Image.fromarray(img_blur)


# Atau kalau mau motion blur:
# class MotionBlurDegradation(object):
#     def __init__(self, kernel_size=9):
#         self.kernel_size = kernel_size
#         self.kernel = np.zeros((kernel_size, kernel_size))
#         np.fill_diagonal(self.kernel, 1)
#         self.kernel /= kernel_size
#
#     def __call__(self, img):
#         img_cv = np.array(img)
#         img_blur = cv2.filter2D(img_cv, -1, self.kernel)
#         return Image.fromarray(img_blur)


blur_transform = transforms.Compose([
    GaussianBlurDegradation(kernel_size=7),
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485,0.456,0.406],
        std=[0.229,0.224,0.225]
    )
])

Dataset and Loader - Blur

In [None]:
from torchvision import datasets
from torch.utils.data import DataLoader

blur_dataset_path = "/content/drive/MyDrive/TESIS/dataset1"

blur_dataset = datasets.ImageFolder(
    root=blur_dataset_path,
    transform=blur_transform,
)

print("Classes:", blur_dataset.class_to_idx)
print("Total blur images:", len(blur_dataset))

blur_loader = DataLoader(
    blur_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=2,
    pin_memory=True
)

print("✅ blur_loader created")

Classes: {'helmet': 0, 'no_helmet': 1}
Total blur images: 749
✅ blur_loader created


Run Scenario - Blur

In [None]:
cur_probs, cur_latencies_ms, cur_error_rate = run_inference_collect(
    model, blur_loader, device
)

cur_max_conf = cur_probs.max(axis=1)
cur_pos_rate = float((cur_probs[:,1] > 0.5).mean())
p95_latency = float(np.percentile(cur_latencies_ms, 95))

psi, kl, class_shift = compute_stability_components(
    base_probs=base_max_conf,
    cur_probs=cur_max_conf,
    base_pos_rate=baseline_pos_rate,
    cur_pos_rate=cur_pos_rate,
)

avg_conf, var_conf, ratio_conf = compute_confidence_stats(
    probs=cur_probs,
    base_conf=baseline_conf,
)

metrics_blur = ProductionMetrics(
    psi=psi,
    kl=kl,
    class_shift=class_shift,
    avg_conf=avg_conf,
    var_conf=var_conf,
    ratio_conf=ratio_conf,
    p95_latency_ms=p95_latency,
    baseline_p95_latency_ms=baseline_p95_latency,
    error_rate=cur_error_rate,
    baseline_error_rate=base_error_rate,
    flag_rate=cur_pos_rate,
    baseline_flag_rate=baseline_flag_rate,
)

D_blur = compute_composite_production(metrics_blur)
print("D_prod (blur):", D_blur)

D_prod (blur): 0.8750822075890523


**Satu skenario drift (contoh: compression_loader)**

Cell 1 — Definisi JPEG Compression Transform
“Di sini saya ingin mensimulasikan kompresi JPEG kualitas rendah (quality=30) untuk meniru kondisi bandwidth sempit / rekaman CCTV yang heavily compressed.”

In [None]:
from PIL import Image
import cv2
import numpy as np
from torchvision import transforms

class JpegCompressionDegradation(object):
    """
    Menerapkan kompresi JPEG kualitas rendah untuk mensimulasikan
    artefak kompresi pada CCTV / video streaming.
    """
    def __init__(self, quality=30):
        """
        quality: 0–100 (semakin kecil → semakin buruk kualitasnya).
        Contoh:
          - 50  = ringan
          - 30  = sedang
          - 10  = berat
        """
        self.quality = int(quality)

    def __call__(self, img):
        # img: PIL.Image → ubah ke NumPy array (RGB)
        img_cv = np.array(img)

        # Encode ke JPEG dengan quality rendah
        encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), self.quality]
        success, encimg = cv2.imencode(".jpg", img_cv, encode_param)
        if not success:
            # kalau gagal encoding, kembalikan gambar asli
            return img

        # Decode kembali ke NumPy (masih dalam bentuk BGR/RGB yang sama)
        decimg = cv2.imdecode(encimg, cv2.IMREAD_COLOR)

        # Konversi balik ke PIL.Image
        return Image.fromarray(decimg)


# Transform lengkap untuk skenario compression
compression_transform = transforms.Compose([
    JpegCompressionDegradation(quality=30),   # kamu bisa ganti 50/10 untuk level lain
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485,0.456,0.406],
        std=[0.229,0.224,0.225]
    )
])

Cell 2 — Dataset & DataLoader untuk Compression Scenario
Struktur datanya sama dengan baseline (helmet / no_helmet), tetapi sebelum masuk model setiap gambar dikompresi ulang dengan JPEG quality 30

In [None]:
from torchvision import datasets
from torch.utils.data import DataLoader
import os

compression_dataset_path = "/content/drive/MyDrive/TESIS/dataset1"

print("Isi folder dataset1:", os.listdir(compression_dataset_path))

compression_dataset = datasets.ImageFolder(
    root=compression_dataset_path,
    transform=compression_transform,
)

print("Kelas & index:", compression_dataset.class_to_idx)
print("Total images (compression):", len(compression_dataset))

compression_loader = DataLoader(
    compression_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=2,   # bisa 0 kalau di Colab bermasalah
    pin_memory=True
)

print("✅ compression_loader created")

Isi folder dataset1: ['helmet', 'no_helmet']
Kelas & index: {'helmet': 0, 'no_helmet': 1}
Total images (compression): 749
✅ compression_loader created


Cell 3 — Inference Scenario Compression + Hitung D_prod
1. “Untuk skenario compression, saya kompres ulang setiap frame dengan JPEG quality=30.”
2. “Kemudian saya hitung PSI, KL, pergeseran distribusi kelas, confidence drift, latency p95, dan error rate.”
3. “Semua dimensi ini saya gabungkan dalam composite score D_prod. Penurunan D_prod pada skenario compression menunjukkan degradasi yang tidak hanya pada performa prediksi, tetapi juga dari sisi stabilitas dan operasional.”

In [None]:
# 1) Jalankan inference pada skenario compression
cur_probs, cur_latencies_ms, cur_error_rate = run_inference_collect(
    model, compression_loader, device
)

# 2) Hitung metric dasar (confidence, class distribution, latency)
cur_max_conf = cur_probs.max(axis=1)
cur_pos_rate = float((cur_probs[:, 1] > 0.5).mean())   # asumsi index 1 = kelas "no_helmet" / pelanggaran
p95_latency = float(np.percentile(cur_latencies_ms, 95))

# 3) Hitung stability components: PSI, KL, class distribution shift
psi, kl, class_shift = compute_stability_components(
    base_probs=base_max_conf,        # distribusi confidence baseline
    cur_probs=cur_max_conf,          # distribusi confidence di skenario compression
    base_pos_rate=baseline_pos_rate,
    cur_pos_rate=cur_pos_rate,
)

# 4) Hitung confidence stats: rata-rata, variansi, dan rasio terhadap baseline
avg_conf, var_conf, ratio_conf = compute_confidence_stats(
    probs=cur_probs,
    base_conf=baseline_conf,
)

# 5) Bungkus ke dalam ProductionMetrics
metrics_compression = ProductionMetrics(
    psi=psi,
    kl=kl,
    class_shift=class_shift,
    avg_conf=avg_conf,
    var_conf=var_conf,
    ratio_conf=ratio_conf,
    p95_latency_ms=p95_latency,
    baseline_p95_latency_ms=baseline_p95_latency,
    error_rate=cur_error_rate,
    baseline_error_rate=base_error_rate,
    flag_rate=cur_pos_rate,
    baseline_flag_rate=baseline_flag_rate,
)

# 6) Hitung Composite Production Score (D_prod) untuk skenario compression
D_compression = compute_composite_production(metrics_compression)

print("===== Compression Scenario =====")
print("PSI          :", psi)
print("KL           :", kl)
print("Class shift  :", class_shift)
print("Avg conf     :", avg_conf)
print("Conf ratio   :", ratio_conf)
print("p95 latency  :", p95_latency)
print("Error rate   :", cur_error_rate)
print("D_prod (compression):", D_compression)

===== Compression Scenario =====
PSI          : 0.021280441175532808
KL           : 0.010416851693180774
Class shift  : 0.001335113484646197
Avg conf     : 0.8161340951919556
Conf ratio   : 0.9839042212138425
p95 latency  : 1043.9660906791687
Error rate   : 0.0
D_prod (compression): 0.9383694572497616


**Skenario noise (Gaussian noise)**

Cell 1 — Definisi Gaussian Noise Transform

Di skenario noise, saya menambahkan Gaussian noise ke tiap frame dengan standar deviasi 25. Ini mensimulasikan gangguan sensor kamera / interferensi sinyal

In [None]:
from PIL import Image
import numpy as np
from torchvision import transforms

class GaussianNoiseDegradation(object):
    """
    Menambahkan Gaussian noise ke gambar untuk mensimulasikan
    gangguan sensor / interferensi pada CCTV.
    """
    def __init__(self, mean=0.0, std=25.0):
        """
        mean: rata-rata noise (0 = netral)
        std : standar deviasi (semakin besar → noise makin kuat)
        """
        self.mean = mean
        self.std = std

    def __call__(self, img):
        # img: PIL.Image → NumPy array
        img_np = np.array(img).astype(np.float32)

        # Buat Gaussian noise
        noise = np.random.normal(self.mean, self.std, img_np.shape).astype(np.float32)

        # Tambahkan noise
        noisy = img_np + noise

        # Clip agar tetap di [0,255]
        noisy = np.clip(noisy, 0, 255).astype(np.uint8)

        # Kembali ke PIL.Image
        return Image.fromarray(noisy)


# Transform lengkap untuk skenario noise
noise_transform = transforms.Compose([
    GaussianNoiseDegradation(mean=0.0, std=25.0),  # bisa dinaikkan std jadi 40/50 untuk noise lebih parah
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485,0.456,0.406],
        std=[0.229,0.224,0.225]
    )
])

Cell 2 — Dataset & DataLoader untuk Noise Scenario

In [None]:
from torchvision import datasets
from torch.utils.data import DataLoader
import os

noise_dataset_path = "/content/drive/MyDrive/TESIS/dataset1"

print("Isi folder dataset1 (noise):", os.listdir(noise_dataset_path))

noise_dataset = datasets.ImageFolder(
    root=noise_dataset_path,
    transform=noise_transform,
)

print("Kelas & index:", noise_dataset.class_to_idx)
print("Total images (noise):", len(noise_dataset))

noise_loader = DataLoader(
    noise_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=2,
    pin_memory=True
)

print("✅ noise_loader created")

Isi folder dataset1 (noise): ['helmet', 'no_helmet']
Kelas & index: {'helmet': 0, 'no_helmet': 1}
Total images (noise): 749
✅ noise_loader created


Cell 3 — Inference Scenario Noise + Hitung D_prod
Untuk noise ringan, ideally D_prod tidak langsung jatuh drastis (model masih robust).”

“Untuk noise berat (bisa diuji dengan std lebih besar), D_prod akan turun karena kombinasi drift, confidence, dan mungkin error rate naik.”

“Ini menunjukkan composite score bisa membedakan gangguan sementara vs degradasi serius.”

In [None]:
# 1) Jalankan inference pada skenario noise
cur_probs, cur_latencies_ms, cur_error_rate = run_inference_collect(
    model, noise_loader, device
)

# 2) Hitung metric dasar (confidence, class distribution, latency)
cur_max_conf = cur_probs.max(axis=1)
cur_pos_rate = float((cur_probs[:, 1] > 0.5).mean())  # asumsi index 1 = kelas "no_helmet" / pelanggaran
p95_latency = float(np.percentile(cur_latencies_ms, 95))

# 3) Hitung stability components: PSI, KL, class distribution shift
psi, kl, class_shift = compute_stability_components(
    base_probs=base_max_conf,
    cur_probs=cur_max_conf,
    base_pos_rate=baseline_pos_rate,
    cur_pos_rate=cur_pos_rate,
)

# 4) Hitung confidence stats: rata-rata, variansi, dan rasio terhadap baseline
avg_conf, var_conf, ratio_conf = compute_confidence_stats(
    probs=cur_probs,
    base_conf=baseline_conf,
)

# 5) Bungkus ke dalam ProductionMetrics
metrics_noise = ProductionMetrics(
    psi=psi,
    kl=kl,
    class_shift=class_shift,
    avg_conf=avg_conf,
    var_conf=var_conf,
    ratio_conf=ratio_conf,
    p95_latency_ms=p95_latency,
    baseline_p95_latency_ms=baseline_p95_latency,
    error_rate=cur_error_rate,
    baseline_error_rate=base_error_rate,
    flag_rate=cur_pos_rate,
    baseline_flag_rate=baseline_flag_rate,
)

# 6) Hitung Composite Production Score (D_prod) untuk skenario noise
D_noise = compute_composite_production(metrics_noise)

print("===== Noise Scenario =====")
print("PSI          :", psi)
print("KL           :", kl)
print("Class shift  :", class_shift)
print("Avg conf     :", avg_conf)
print("Conf ratio   :", ratio_conf)
print("Var conf     :", var_conf)
print("p95 latency  :", p95_latency)
print("Error rate   :", cur_error_rate)
print("D_prod (noise):", D_noise)

===== Noise Scenario =====
PSI          : 0.35742514978672846
KL           : 0.1791860601273596
Class shift  : 0.13484646194926567
Avg conf     : 0.7432430982589722
Conf ratio   : 0.8960292506748656
Var conf     : 0.021026989445090294
p95 latency  : 1074.2562532424924
Error rate   : 0.0
D_prod (noise): 0.5744365595686745


**Single Metric**

In [None]:
single_vs_composite = []

single_lighting = compute_single_metric(metrics_lighting)
single_vs_composite.append({
    "Scenario": "Lighting Degradation",
    "SingleMetric": single_lighting,
    "D_prod": D_lighting,
})

single_blur = compute_single_metric(metrics_blur)
single_vs_composite.append({
    "Scenario": "Blur Degradation",
    "SingleMetric": single_blur,
    "D_prod": D_blur,
})

single_compression = compute_single_metric(metrics_compression)
single_vs_composite.append({
    "Scenario": "Compression (JPEG)",
    "SingleMetric": single_compression,
    "D_prod": D_compression,
})

single_noise = compute_single_metric(metrics_noise)
single_vs_composite.append({
    "Scenario": "Gaussian Noise",
    "SingleMetric": single_noise,
    "D_prod": D_noise,
})

single_vs_composite

[{'Scenario': 'Lighting Degradation',
  'SingleMetric': 1.0003970718518684,
  'D_prod': 0.8819244850466733},
 {'Scenario': 'Blur Degradation',
  'SingleMetric': 0.9852399062795861,
  'D_prod': 0.8750822075890523},
 {'Scenario': 'Compression (JPEG)',
  'SingleMetric': 0.9839042212138425,
  'D_prod': 0.9383694572497616},
 {'Scenario': 'Gaussian Noise',
  'SingleMetric': 0.8960292506748656,
  'D_prod': 0.5744365595686745}]

**Untuk kebutuhan laporan**

1. Baseline

In [None]:
import numpy as np

# --- 1) Baseline inference (kalau belum dilakukan) ---
# base_probs, base_latencies_ms, base_error_rate = run_inference_collect(
#     model, baseline_loader, device
# )

# --- 2) Hitung metrik dasar baseline ---
base_max_conf = base_probs.max(axis=1)               # [N]
baseline_conf = float(base_max_conf.mean())
baseline_conf_var = float(base_max_conf.var())
baseline_pos_rate = float((base_probs[:, 1] > 0.5).mean())  # asumsi idx 1 = kelas pelanggaran
baseline_p95_latency = float(np.percentile(base_latencies_ms, 95))
baseline_flag_rate = baseline_pos_rate

print("Baseline basic metrics:")
print("  Avg confidence :", baseline_conf)
print("  Var confidence :", baseline_conf_var)
print("  Pos rate       :", baseline_pos_rate)
print("  p95 latency    :", baseline_p95_latency)
print("  Error rate     :", base_error_rate)


# --- 3) Bungkus ke ProductionMetrics untuk baseline ---
# karena baseline dibandingkan dengan dirinya sendiri → drift = 0, ratio_conf = 1
metrics_baseline = ProductionMetrics(
    psi=0.0,
    kl=0.0,
    class_shift=0.0,
    avg_conf=baseline_conf,
    var_conf=baseline_conf_var,
    ratio_conf=1.0,
    p95_latency_ms=baseline_p95_latency,
    baseline_p95_latency_ms=baseline_p95_latency,
    error_rate=base_error_rate,
    baseline_error_rate=base_error_rate,
    flag_rate=baseline_flag_rate,
    baseline_flag_rate=baseline_flag_rate,
)

D_baseline = compute_composite_production(metrics_baseline)

print("\nBaseline Composite Score:")
print("  D_prod (baseline) =", D_baseline)


# --- 4) Siapkan list summary untuk semua skenario ---
summary_rows = []

summary_rows.append({
    "Scenario": "Baseline",
    "PSI": metrics_baseline.psi,
    "KL": metrics_baseline.kl,
    "ClassShift": metrics_baseline.class_shift,
    "ConfidenceRatio": metrics_baseline.ratio_conf,
    "p95Latency_ms": baseline_p95_latency,
    "ErrorRate": base_error_rate,
    "D_prod": D_baseline,
    "Status": "Healthy" if D_baseline >= 0.75 else "Degraded"
})

summary_rows

Baseline basic metrics:
  Avg confidence : 0.829485297203064
  Var confidence : 0.019700925797224045
  Pos rate       : 0.12283044058744993
  p95 latency    : 462.8087520599365
  Error rate     : 0.0

Baseline Composite Score:
  D_prod (baseline) = 0.9999999837173927


[{'Scenario': 'Baseline',
  'PSI': 0.0,
  'KL': 0.0,
  'ClassShift': 0.0,
  'ConfidenceRatio': 1.0,
  'p95Latency_ms': 462.8087520599365,
  'ErrorRate': 0.0,
  'D_prod': 0.9999999837173927,
  'Status': 'Healthy'}]

2. Lighting Degradation

In [None]:
import numpy as np

# --- 1) Inference pada skenario Lighting Degradation ---
cur_probs_l, cur_latencies_l, cur_error_l = run_inference_collect(
    model, lighting_loader, device
)

# --- 2) Metric dasar: confidence, class distribution, latency ---
cur_max_conf_l = cur_probs_l.max(axis=1)
cur_pos_rate_l = float((cur_probs_l[:, 1] > 0.5).mean())   # asumsi idx 1 = kelas pelanggaran
p95_latency_l = float(np.percentile(cur_latencies_l, 95))

# --- 3) Stability components: PSI, KL, Class Shift ---
psi_l, kl_l, class_shift_l = compute_stability_components(
    base_probs=base_max_conf,         # distribusi confidence baseline
    cur_probs=cur_max_conf_l,         # distribusi confidence lighting
    base_pos_rate=baseline_pos_rate,
    cur_pos_rate=cur_pos_rate_l,
)

# --- 4) Confidence stats: avg, var, ratio vs baseline ---
avg_conf_l, var_conf_l, ratio_conf_l = compute_confidence_stats(
    probs=cur_probs_l,
    base_conf=baseline_conf,
)

print("Lighting basic metrics:")
print("  PSI          :", psi_l)
print("  KL           :", kl_l)
print("  Class shift  :", class_shift_l)
print("  Avg conf     :", avg_conf_l)
print("  Var conf     :", var_conf_l)
print("  Conf ratio   :", ratio_conf_l)
print("  p95 latency  :", p95_latency_l)
print("  Error rate   :", cur_error_l)

# --- 5) Bungkus ke ProductionMetrics ---
metrics_lighting = ProductionMetrics(
    psi=psi_l,
    kl=kl_l,
    class_shift=class_shift_l,
    avg_conf=avg_conf_l,
    var_conf=var_conf_l,
    ratio_conf=ratio_conf_l,
    p95_latency_ms=p95_latency_l,
    baseline_p95_latency_ms=baseline_p95_latency,
    error_rate=cur_error_l,
    baseline_error_rate=base_error_rate,
    flag_rate=cur_pos_rate_l,
    baseline_flag_rate=baseline_flag_rate,
)

D_lighting = compute_composite_production(metrics_lighting)

print("\nLighting Composite Score:")
print("  D_prod (lighting) =", D_lighting)

# --- 6) Tambahkan ke summary_rows untuk tabel ringkas ---
summary_rows.append({
    "Scenario": "Lighting Degradation",
    "PSI": psi_l,
    "KL": kl_l,
    "ClassShift": class_shift_l,
    "ConfidenceRatio": ratio_conf_l,
    "p95Latency_ms": p95_latency_l,
    "ErrorRate": cur_error_l,
    "D_prod": D_lighting,
    "Status": "Healthy" if D_lighting >= 0.75 else "Degraded"
})

summary_rows

Lighting basic metrics:
  PSI          : 0.0037639731737403375
  KL           : 0.0018663425954703484
  Class shift  : 0.025367156208277702
  Avg conf     : 0.8298146724700928
  Var conf     : 0.019636105746030807
  Conf ratio   : 1.0003970718518684
  p95 latency  : 1207.3722958564754
  Error rate   : 0.0

Lighting Composite Score:
  D_prod (lighting) = 0.7415473019717536


[{'Scenario': 'Baseline',
  'PSI': 0.0,
  'KL': 0.0,
  'ClassShift': 0.0,
  'ConfidenceRatio': 1.0,
  'p95Latency_ms': 462.8087520599365,
  'ErrorRate': 0.0,
  'D_prod': 0.9999999837173927,
  'Status': 'Healthy'},
 {'Scenario': 'Lighting Degradation',
  'PSI': 0.0037639731737403375,
  'KL': 0.0018663425954703484,
  'ClassShift': 0.025367156208277702,
  'ConfidenceRatio': 1.0003970718518684,
  'p95Latency_ms': 1207.3722958564754,
  'ErrorRate': 0.0,
  'D_prod': 0.7415473019717536,
  'Status': 'Degraded'}]

3. Blur Degradation

In [None]:
import numpy as np

# --- 1) Inference pada skenario Blur Degradation ---
cur_probs_b, cur_latencies_b, cur_error_b = run_inference_collect(
    model, blur_loader, device
)

# --- 2) Metric dasar: confidence, class distribution, latency ---
cur_max_conf_b = cur_probs_b.max(axis=1)
cur_pos_rate_b = float((cur_probs_b[:, 1] > 0.5).mean())   # asumsi idx 1 = kelas pelanggaran
p95_latency_b = float(np.percentile(cur_latencies_b, 95))

# --- 3) Stability components: PSI, KL, Class Shift ---
psi_b, kl_b, class_shift_b = compute_stability_components(
    base_probs=base_max_conf,         # distribusi confidence baseline
    cur_probs=cur_max_conf_b,         # distribusi confidence blur
    base_pos_rate=baseline_pos_rate,
    cur_pos_rate=cur_pos_rate_b,
)

# --- 4) Confidence stats: avg, var, ratio vs baseline ---
avg_conf_b, var_conf_b, ratio_conf_b = compute_confidence_stats(
    probs=cur_probs_b,
    base_conf=baseline_conf,
)

print("Blur basic metrics:")
print("  PSI          :", psi_b)
print("  KL           :", kl_b)
print("  Class shift  :", class_shift_b)
print("  Avg conf     :", avg_conf_b)
print("  Var conf     :", var_conf_b)
print("  Conf ratio   :", ratio_conf_b)
print("  p95 latency  :", p95_latency_b)
print("  Error rate   :", cur_error_b)

# --- 5) Bungkus ke ProductionMetrics ---
metrics_blur = ProductionMetrics(
    psi=psi_b,
    kl=kl_b,
    class_shift=class_shift_b,
    avg_conf=avg_conf_b,
    var_conf=var_conf_b,
    ratio_conf=ratio_conf_b,
    p95_latency_ms=p95_latency_b,
    baseline_p95_latency_ms=baseline_p95_latency,
    error_rate=cur_error_b,
    baseline_error_rate=base_error_rate,
    flag_rate=cur_pos_rate_b,
    baseline_flag_rate=baseline_flag_rate,
)

D_blur = compute_composite_production(metrics_blur)

print("\nBlur Composite Score:")
print("  D_prod (blur) =", D_blur)

# --- 6) Tambahkan ke summary_rows untuk tabel ringkas ---
summary_rows.append({
    "Scenario": "Blur Degradation",
    "PSI": psi_b,
    "KL": kl_b,
    "ClassShift": class_shift_b,
    "ConfidenceRatio": ratio_conf_b,
    "p95Latency_ms": p95_latency_b,
    "ErrorRate": cur_error_b,
    "D_prod": D_blur,
    "Status": "Healthy" if D_blur >= 0.75 else "Degraded"
})

summary_rows

Blur basic metrics:
  PSI          : 0.023253209991817087
  KL           : 0.011536982554272789
  Class shift  : 0.005340453938584788
  Avg conf     : 0.8172420263290405
  Var conf     : 0.019413800910115242
  Conf ratio   : 0.9852399062795861
  p95 latency  : 1027.0665526390076
  Error rate   : 0.0

Blur Composite Score:
  D_prod (blur) = 0.7687415693735673


[{'Scenario': 'Baseline',
  'PSI': 0.0,
  'KL': 0.0,
  'ClassShift': 0.0,
  'ConfidenceRatio': 1.0,
  'p95Latency_ms': 462.8087520599365,
  'ErrorRate': 0.0,
  'D_prod': 0.9999999837173927,
  'Status': 'Healthy'},
 {'Scenario': 'Lighting Degradation',
  'PSI': 0.0037639731737403375,
  'KL': 0.0018663425954703484,
  'ClassShift': 0.025367156208277702,
  'ConfidenceRatio': 1.0003970718518684,
  'p95Latency_ms': 1207.3722958564754,
  'ErrorRate': 0.0,
  'D_prod': 0.7415473019717536,
  'Status': 'Degraded'},
 {'Scenario': 'Blur Degradation',
  'PSI': 0.023253209991817087,
  'KL': 0.011536982554272789,
  'ClassShift': 0.005340453938584788,
  'ConfidenceRatio': 0.9852399062795861,
  'p95Latency_ms': 1027.0665526390076,
  'ErrorRate': 0.0,
  'D_prod': 0.7687415693735673,
  'Status': 'Healthy'}]

**Fokus ke Single Baseline**