<a href="https://colab.research.google.com/github/weso500/MOSAICRev/blob/main/TranADTest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# --- Colab Setup: Run this cell first ---
!pip install torch numpy pandas scikit-learn
!mkdir -p '/content/drive/MyDrive/Globecom Paper/For Jason/IOT-Anomaly-Detection/Raw_Data'
# # You must manually upload your RFQ.npy and RFQ_labels.npy files
# # to the 'Raw_Data' directory in your Colab environment.




In [2]:
import numpy as np
import os
import pandas as pd
from sklearn.preprocessing import StandardScaler
from google.colab import drive
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import time

# --- Colab Setup: Run this cell first ---
# NOTE: The pip install and mkdir commands are assumed to have been run successfully.
# !pip install torch numpy pandas scikit-learn
# !mkdir -p '/content/drive/MyDrive/Globecom Paper/For Jason/IOT-Anomaly-Detection/Raw_Data'

# --- Step 1: Mount Google Drive (Essential for accessing files) ---
try:
    print("Mounting Google Drive...")
    # Using 'force_remount=True' is often necessary if the notebook has disconnected/reconnected
    # drive.mount('/content/drive', force_remount=True)
    drive.mount('/content/drive')
    print("Google Drive mounted successfully.")
except Exception as e:
    print(f"Error mounting Google Drive: {e}")


# --- 2. CONFIGURATION ---
SYSTEM = 'RFQ'
# IMPORTANT: Full Google Drive path for correct file access
RAW_DATA_DIR = '/content/drive/MyDrive/Globecom Paper/For Jason/IOT-Anomaly-Detection/Raw_Data'
WINDOW_SIZE = 100         # Sequence length (L) for the Transformer
N_FEATURES = 14           # Dimension of the feature vector (D_model)
PREDICTION_LENGTH = 1     # Target sequence length (1 for anomaly detection)
N_EPOCHS = 20             # Number of training epochs
BATCH_SIZE = 32
LEARNING_RATE = 1e-4

# Transformer Hyperparameters
D_MODEL = N_FEATURES      # Embedding dimension
N_HEADS = 2               # Number of attention heads
N_LAYERS = 2              # Number of encoder/decoder layers
DROPOUT = 0.1

# --- 3. DATA UTILITIES ---

def load_and_preprocess_data(system, raw_data_dir):
    """Loads, splits, and preprocesses the accelerator data."""
    print(f"Loading data for system: {system}")

    # Construct full paths based on the configured RAW_DATA_DIR
    x_path = os.path.join(raw_data_dir, f'{system}.npy')
    y_path = os.path.join(raw_data_dir, f'{system}_labels.npy')

    X, Y = None, None

    if not os.path.exists(x_path) or not os.path.exists(y_path):
        print(f"--- WARNING: Data files not found. Creating DUMMY DATA. ---")
        # Creating dummy data (1000 pulses, 200 time steps, 14 features)
        X = np.random.rand(1000, 200, N_FEATURES)
        # Dummy labels matching the expected structure (index, type, subtype)
        Y = np.array([[i, 'Run', 'type'] for i in range(800)] +
                     [[i + 800, 'Fault', 'type'] for i in range(200)], dtype=object)
    else:
        # --- FIX FOR OBJECT ARRAY ERROR ---
        try:
            # Attempt to load data with default settings
            X = np.load(x_path)
            Y = np.load(y_path, allow_pickle=True)
            print("Real data loaded successfully.")
        except ValueError as e:
            # This catch block specifically addresses the error you encountered.
            print(f"\n--- ERROR HANDLING: Caught ValueError during loading: {e} ---")
            print("Attempting to reload data with explicit allow_pickle=True for both files.")
            try:
                # Reload, forcing allow_pickle=True for the data file too, just in case.
                X = np.load(x_path, allow_pickle=True)
                Y = np.load(y_path, allow_pickle=True)
                # Verify the structure to ensure loading was correct
                if X.ndim < 2 or Y.dtype != object:
                    print("WARNING: Data structure seems unusual after pickling. Proceeding...")
                print("Reload successful using allow_pickle=True.")
            except Exception as inner_e:
                print(f"CRITICAL ERROR: Failed to load data even with allow_pickle=True. {inner_e}")
                raise inner_e # Re-raise the error to stop execution

    # Check if data was loaded/created successfully
    if X is None or Y is None:
        raise RuntimeError("Data failed to load or create dummy data.")

    # Identify normal and fault pulses based on the second column of Y (['Run', 'Fault'])
    fault_indices, normal_indices = np.where(Y[:,1] == 'Fault')[0], np.where(Y[:,1] == 'Run')[0]
    Xnormal = X[normal_indices, :, :]
    Xfault = X[fault_indices, :, :]

    print(f"Normal Data Pulses: {len(Xnormal)}, Fault Data Pulses: {len(Xfault)}")

    # 1. CONCATENATE ALL PULSES INTO A SINGLE LONG TIME SERIES (MTS)
    n_pulses, n_times, n_features = Xnormal.shape
    X_mts = Xnormal.reshape(-1, n_features)

    # 2. STANDARDIZATION (Fit only on normal training data)
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X_mts)
    print(f"Concatenated and Scaled MTS shape: {X_scaled.shape}")

    # Process Fault Data (for testing) using the scaler fitted on normal data
    X_fault_mts = Xfault.reshape(-1, n_features)
    X_fault_scaled = scaler.transform(X_fault_mts)

    return X_scaled, X_fault_scaled, scaler


def create_windows(data, window_size, prediction_length):
    """Slices the time series into sequences (windows)."""
    windows = []
    # Data must be at least window_size long
    total_len = len(data) - window_size + 1

    # Using reconstruction setup (Input == Target) for a true TranAD-like model
    for i in range(total_len):
        input_seq = data[i : i + window_size]
        # Target sequence is the input sequence for reconstruction
        target_seq = data[i : i + window_size]
        windows.append((input_seq, target_seq))

    # Stack the lists into numpy arrays
    X_win = np.array([w[0] for w in windows], dtype=np.float32)
    Y_win = np.array([w[1] for w in windows], dtype=np.float32)

    print(f"Created {len(X_win)} windows of size {window_size}.")

    return X_win, Y_win


class TimeSeriesDataset(Dataset):
    """A PyTorch Dataset for the time series windows."""
    def __init__(self, X, Y):
        self.X = torch.from_numpy(X).float()
        self.Y = torch.from_numpy(Y).float()

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        # Returns (input sequence, target sequence)
        return self.X[idx], self.Y[idx]

# --- 4. PYTORCH MODEL IMPLEMENTATION (Simplified TranAD Baseline) ---

class PositionalEncoding(nn.Module):
    """Simple sinusoidal positional encoding."""
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        # Denominator for the scaling factor
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1) # Shape (Max_Len, 1, D_model)
        self.register_buffer('pe', pe)

    def forward(self, x):
        # x shape: (Sequence_Length, Batch_Size, D_model)
        # Add positional encoding
        return x + self.pe[:x.size(0), :]


class TransformerAD(nn.Module):
    """
    A Transformer Encoder model for time series reconstruction.
    This serves as the TranAD baseline.
    """
    def __init__(self, d_model, n_heads, n_layers, dropout, window_size):
        super().__init__()
        self.d_model = d_model

        # 1. Positional Encoding
        self.pos_encoder = PositionalEncoding(d_model, max_len=window_size)

        # 2. Encoder Stack
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=n_heads,
            dropout=dropout,
            batch_first=False # Expects (L, B, D)
        )
        # [Image of Transformer Encoder-Decoder Architecture]
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=n_layers)

        # 3. Decoder Head (Linear layer for reconstruction)
        self.decoder = nn.Linear(d_model, d_model)


    def forward(self, src):
        # src shape: (Batch_Size, Sequence_Length, D_model) -> (B, L, D)

        # Transpose to (Sequence_Length, Batch_Size, D_model) -> (L, B, D)
        src = src.permute(1, 0, 2)

        # Add positional encoding
        src = self.pos_encoder(src)

        # Encode (L, B, D) -> (L, B, D)
        memory = self.transformer_encoder(src)

        # Decode (reconstruction) - apply linear layer element-wise
        output = self.decoder(memory)

        # Reshape back to (Batch_Size, Sequence_Length, D_model) -> (B, L, D)
        output = output.permute(1, 0, 2)

        # Output is the reconstructed input sequence
        return output

# --- 5. TRAINING AND EVALUATION FUNCTIONS ---

def train_model(model, dataloader, criterion, optimizer, device, n_epochs):
    """Trains the model on normal data."""
    model.train()
    print(f"Training on device: {device}")

    for epoch in range(1, n_epochs + 1):
        total_loss = 0
        for batch_x, batch_y in dataloader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)

            optimizer.zero_grad()

            reconstruction = model(batch_x)

            loss = criterion(reconstruction, batch_y)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        avg_loss = total_loss / len(dataloader)
        print(f"Epoch {epoch}/{n_epochs}, Loss: {avg_loss:.6f}")

    print("Training complete.")


def evaluate_missing_modalities(model, X_normal_test, X_fault_test, window_size, missing_modalities_count, device):
    """
    Performs the crucial missing modality test for the paper.
    Calculates reconstruction error on the full 14-D vector.
    """
    model.eval()

    # 1. Create windowed test data
    X_normal_win, Y_normal_win = create_windows(X_normal_test, window_size, PREDICTION_LENGTH)
    X_fault_win, Y_fault_win = create_windows(X_fault_test, window_size, PREDICTION_LENGTH)

    results = {}

    with torch.no_grad():
        for name, X_win, Y_win in [('Normal', X_normal_win, Y_normal_win), ('Fault', X_fault_win, Y_fault_win)]:

            # Convert to PyTorch tensor
            X_tensor = torch.from_numpy(X_win).float().to(device)
            Y_tensor = torch.from_numpy(Y_win).float().to(device) # Original Ground Truth

            # --- Full Modality Test (Baseline) ---
            reconstruction_full = model(X_tensor)
            error_full = torch.mean((reconstruction_full - Y_tensor)**2).item()
            results[f'{name} - Full Modalities (MSE)'] = error_full

            # --- Missing Modality Test (Core Paper Comparison) ---
            X_missing = X_tensor.clone()

            # Zero out the first 'missing_modalities_count' features in the input sequence
            X_missing[:, :, :missing_modalities_count] = 0.0

            reconstruction_missing = model(X_missing)

            # Calculate error against the *original* ground truth (Y_tensor)
            error_missing = torch.mean((reconstruction_missing - Y_tensor)**2).item()
            results[f'{name} - Missing {missing_modalities_count} Modalities (MSE)'] = error_missing

    print("\n--- ANOMALY DETECTION TEST RESULTS (Reconstruction Error) ---")
    for key, val in results.items():
        print(f"{key}: {val:.6f}")


# --- 6. EXECUTION ---
if __name__ == "__main__":
    # Detect device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # 1. Load and prepare data (MTS)
    X_scaled_mts, X_fault_scaled_mts, scaler = None, None, None
    try:
        X_scaled_mts, X_fault_scaled_mts, scaler = load_and_preprocess_data(SYSTEM, RAW_DATA_DIR)
    except RuntimeError as e:
        print(f"Skipping model execution due to data loading failure: {e}")
        exit() # Exit the script if data failed to load/create
    except Exception as e:
        print(f"An unknown error occurred during data loading: {e}")
        exit()

    # Simple split of normal data into training (first 80%) and testing (last 20%)
    split_idx = int(len(X_scaled_mts) * 0.8)
    X_train_mts = X_scaled_mts[:split_idx]
    X_normal_test_mts = X_scaled_mts[split_idx:]
    X_normal_test_mts = X_normal_test_mts[:18999]
    X_fault_scaled_mts = X_fault_scaled_mts[:999]

    # 2. Create windowed datasets
    X_train_win, Y_train_win = create_windows(X_train_mts, WINDOW_SIZE, PREDICTION_LENGTH)

    # Create the PyTorch DataLoader
    train_dataset = TimeSeriesDataset(X_train_win, Y_train_win)
    train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

    # 3. Initialize Model, Loss, and Optimizer
    model = TransformerAD(D_MODEL, N_HEADS, N_LAYERS, DROPOUT, WINDOW_SIZE).to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE)

    # 4. Train the model
    start_time = time.time()
    train_model(model, train_dataloader, criterion, optimizer, device, N_EPOCHS)
    end_time = time.time()
    print(f"Total training time: {end_time - start_time:.2f} seconds")

    # 5. Run the core experiment: Evaluate Missing Modalities
    evaluate_missing_modalities(
        model,
        X_normal_test_mts,
        X_fault_scaled_mts,
        WINDOW_SIZE,
        missing_modalities_count=1,
        device=device
    )

Mounting Google Drive...
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Google Drive mounted successfully.
Using device: cuda
Loading data for system: RFQ
Real data loaded successfully.
Normal Data Pulses: 690, Fault Data Pulses: 182
Concatenated and Scaled MTS shape: (3105000, 14)
Created 2483901 windows of size 100.




Training on device: cuda
Epoch 1/20, Loss: 0.021878
Epoch 2/20, Loss: 0.007531
Epoch 3/20, Loss: 0.006641
Epoch 4/20, Loss: 0.006204
Epoch 5/20, Loss: 0.005915
Epoch 6/20, Loss: 0.005709
Epoch 7/20, Loss: 0.005549
Epoch 8/20, Loss: 0.005419
Epoch 9/20, Loss: 0.005302
Epoch 10/20, Loss: 0.005204
Epoch 11/20, Loss: 0.005131
Epoch 12/20, Loss: 0.005079
Epoch 13/20, Loss: 0.005042
Epoch 14/20, Loss: 0.005012
Epoch 15/20, Loss: 0.004988
Epoch 16/20, Loss: 0.004969
Epoch 17/20, Loss: 0.004952
Epoch 18/20, Loss: 0.004940
Epoch 19/20, Loss: 0.004927
Epoch 20/20, Loss: 0.004916
Training complete.
Total training time: 11506.68 seconds
Created 18900 windows of size 100.
Created 900 windows of size 100.


OutOfMemoryError: CUDA out of memory. Tried to allocate 14.42 GiB. GPU 0 has a total capacity of 14.74 GiB of which 13.96 GiB is free. Process 23215 has 798.00 MiB memory in use. Of the allocated memory 422.88 MiB is allocated by PyTorch, and 241.12 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [3]:
from sklearn.metrics import roc_auc_score, roc_curve
def evaluate_missing_modalities(model, X_normal_test, X_fault_test, window_size, missing_modalities_count, batch_size, device):
    """
    MODIFIED: Performs the core evaluation and collects the reconstruction error
    (anomaly score) for *each window* to enable AUC calculation.

    Returns: Tuple of (normal_full_scores, fault_full_scores, normal_missing_scores, fault_missing_scores)
    """
    model.eval()

    normal_full_scores = []
    fault_full_scores = []
    normal_missing_scores = []
    fault_missing_scores = []

    print("\nStarting evaluation of missing modalities...")

    with torch.no_grad():
        for name, X_mts in [('Normal', X_normal_test), ('Fault', X_fault_test)]:

            X_win, Y_win = create_windows(X_mts, window_size, PREDICTION_LENGTH)

            # Guard against insufficient data for windows
            if len(X_win) == 0:
                print(f"Warning: Not enough {name} data ({len(X_mts)} steps) to create windows of size {window_size}. Skipping evaluation.")
                continue

            test_dataset = TimeSeriesDataset(X_win, Y_win)
            # Use batch_size for batched evaluation
            test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

            current_scores_full = []
            current_scores_missing = []

            for batch_x, batch_y in test_dataloader:
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)

                # --- Full Modality Test ---
                reconstruction_full = model(batch_x)
                # Calculate MSE per window (B, L, D) -> (B)
                # Anomaly Score = Mean Squared Error across the window sequence and all features
                errors_batch_full = torch.mean((reconstruction_full - batch_y)**2, dim=(1, 2))
                current_scores_full.extend(errors_batch_full.cpu().numpy())

                # --- Missing Modality Test ---
                X_missing = batch_x.clone()
                # Mask the first 'missing_modalities_count' features
                X_missing[:, :, :missing_modalities_count] = 0.0

                reconstruction_missing = model(X_missing)

                # Calculate MSE per window against the *original* target (Y_tensor)
                errors_batch_missing = torch.mean((reconstruction_missing - batch_y)**2, dim=(1, 2))
                current_scores_missing.extend(errors_batch_missing.cpu().numpy())

            if name == 'Normal':
                normal_full_scores = np.array(current_scores_full)
                normal_missing_scores = np.array(current_scores_missing)
            else: # 'Fault'
                fault_full_scores = np.array(current_scores_full)
                fault_missing_scores = np.array(current_scores_missing)

            print(f"Finished processing {name} data. {len(X_win)} windows evaluated.")

    return normal_full_scores, fault_full_scores, normal_missing_scores, fault_missing_scores


def calculate_youden_auc(normal_scores, fault_scores, description):
    """
    Calculates AUC-ROC and finds the optimal threshold using Youden's J statistic.
    """
    if len(normal_scores) == 0 or len(fault_scores) == 0:
        print(f"\n--- SKIPPING METRICS FOR {description} ---")
        print("Not enough normal or fault windows were created/found to calculate ROC/AUC. Please check your data or WINDOW_SIZE.")
        return

    # 1. Prepare data (scores and true labels)
    all_scores = np.concatenate([normal_scores, fault_scores])
    # True labels: 0 for normal, 1 for fault/anomaly
    true_labels = np.concatenate([np.zeros_like(normal_scores), np.ones_like(fault_scores)])

    # Check for binary classification requirement
    if len(np.unique(true_labels)) < 2:
        print(f"\n--- SKIPPING METRICS FOR {description} ---")
        print("Only one class (all normal or all fault) was found in the test set. Cannot calculate AUC.")
        return

    # 2. Calculate ROC curve metrics
    # fpr = False Positive Rate, tpr = True Positive Rate
    fpr, tpr, thresholds = roc_curve(true_labels, all_scores)

    # 3. Calculate AUC
    auc_score = roc_auc_score(true_labels, all_scores)

    # 4. Calculate Youden's J for optimal threshold
    # J = max(Sensitivity + Specificity - 1) = max(TPR + (1 - FPR) - 1) = max(TPR - FPR)
    youden_j = tpr - fpr
    optimal_idx = np.argmax(youden_j)
    optimal_threshold = thresholds[optimal_idx]

    # 5. Calculate final classification metrics using the optimal threshold
    predicted_labels = (all_scores >= optimal_threshold).astype(int)

    # Confusion Matrix components
    TP = np.sum((true_labels == 1) & (predicted_labels == 1))
    TN = np.sum((true_labels == 0) & (predicted_labels == 0))
    FP = np.sum((true_labels == 0) & (predicted_labels == 1))
    FN = np.sum((true_labels == 1) & (predicted_labels == 0))

    # Calculate key metrics
    sensitivity = TP / (TP + FN) if (TP + FN) > 0 else 0
    specificity = TN / (TN + FP) if (TN + FP) > 0 else 0
    accuracy = (TP + TN) / len(true_labels) if len(true_labels) > 0 else 0

    print(f"\n--- CLASSIFICATION METRICS: {description} ---")
    print(f"AUC-ROC Score: {auc_score:.4f}")
    print(f"Optimal Threshold (Youden's J): {optimal_threshold:.6f}")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Sensitivity (TPR): {sensitivity:.4f}")
    print(f"Specificity (TNR): {specificity:.4f}")
    print(f"Youden's J Max Value: {youden_j[optimal_idx]:.4f}")
    # [Image of ROC Curve and Optimal Threshold]


In [4]:
normal_full_scores, fault_full_scores, normal_missing_scores, fault_missing_scores = evaluate_missing_modalities(
        model,
        X_normal_test_mts,
        X_fault_scaled_mts,
        WINDOW_SIZE,
        missing_modalities_count=1,
        batch_size=BATCH_SIZE,
        device=device
    )
missing_modalities_count = 1
    # 6. Calculate AUC and Youden's J for both scenarios
calculate_youden_auc(normal_full_scores, fault_full_scores, "Full Modalities Test")
calculate_youden_auc(normal_missing_scores, fault_missing_scores, f"Missing {missing_modalities_count} Modalities Test")


Starting evaluation of missing modalities...
Created 18900 windows of size 100.
Finished processing Normal data. 18900 windows evaluated.
Created 900 windows of size 100.
Finished processing Fault data. 900 windows evaluated.

--- CLASSIFICATION METRICS: Full Modalities Test ---
AUC-ROC Score: 1.0000
Optimal Threshold (Youden's J): 0.064035
Accuracy: 1.0000
Sensitivity (TPR): 1.0000
Specificity (TNR): 1.0000
Youden's J Max Value: 1.0000

--- CLASSIFICATION METRICS: Missing 1 Modalities Test ---
AUC-ROC Score: 1.0000
Optimal Threshold (Youden's J): 0.063716
Accuracy: 1.0000
Sensitivity (TPR): 1.0000
Specificity (TNR): 1.0000
Youden's J Max Value: 1.0000


In [5]:
normal_full_scores, fault_full_scores, normal_missing_scores, fault_missing_scores = evaluate_missing_modalities(
        model,
        X_normal_test_mts,
        X_fault_scaled_mts,
        WINDOW_SIZE,
        missing_modalities_count=2,
        batch_size=BATCH_SIZE,
        device=device
    )
missing_modalities_count = 2
    # 6. Calculate AUC and Youden's J for both scenarios
calculate_youden_auc(normal_full_scores, fault_full_scores, "Full Modalities Test")
calculate_youden_auc(normal_missing_scores, fault_missing_scores, f"Missing {missing_modalities_count} Modalities Test")


Starting evaluation of missing modalities...
Created 18900 windows of size 100.
Finished processing Normal data. 18900 windows evaluated.
Created 900 windows of size 100.
Finished processing Fault data. 900 windows evaluated.

--- CLASSIFICATION METRICS: Full Modalities Test ---
AUC-ROC Score: 1.0000
Optimal Threshold (Youden's J): 0.064035
Accuracy: 1.0000
Sensitivity (TPR): 1.0000
Specificity (TNR): 1.0000
Youden's J Max Value: 1.0000

--- CLASSIFICATION METRICS: Missing 2 Modalities Test ---
AUC-ROC Score: 0.9663
Optimal Threshold (Youden's J): 0.068509
Accuracy: 0.9667
Sensitivity (TPR): 1.0000
Specificity (TNR): 0.9651
Youden's J Max Value: 0.9651


In [6]:
normal_full_scores, fault_full_scores, normal_missing_scores, fault_missing_scores = evaluate_missing_modalities(
        model,
        X_normal_test_mts,
        X_fault_scaled_mts,
        WINDOW_SIZE,
        missing_modalities_count=3,
        batch_size=BATCH_SIZE,
        device=device
    )
missing_modalities_count = 3
    # 6. Calculate AUC and Youden's J for both scenarios
calculate_youden_auc(normal_full_scores, fault_full_scores, "Full Modalities Test")
calculate_youden_auc(normal_missing_scores, fault_missing_scores, f"Missing {missing_modalities_count} Modalities Test")


Starting evaluation of missing modalities...
Created 18900 windows of size 100.
Finished processing Normal data. 18900 windows evaluated.
Created 900 windows of size 100.
Finished processing Fault data. 900 windows evaluated.

--- CLASSIFICATION METRICS: Full Modalities Test ---
AUC-ROC Score: 1.0000
Optimal Threshold (Youden's J): 0.064035
Accuracy: 1.0000
Sensitivity (TPR): 1.0000
Specificity (TNR): 1.0000
Youden's J Max Value: 1.0000

--- CLASSIFICATION METRICS: Missing 3 Modalities Test ---
AUC-ROC Score: 0.9204
Optimal Threshold (Youden's J): 0.068483
Accuracy: 0.9222
Sensitivity (TPR): 1.0000
Specificity (TNR): 0.9185
Youden's J Max Value: 0.9185


In [7]:
normal_full_scores, fault_full_scores, normal_missing_scores, fault_missing_scores = evaluate_missing_modalities(
        model,
        X_normal_test_mts,
        X_fault_scaled_mts,
        WINDOW_SIZE,
        missing_modalities_count=4,
        batch_size=BATCH_SIZE,
        device=device
    )
missing_modalities_count = 4
    # 6. Calculate AUC and Youden's J for both scenarios
calculate_youden_auc(normal_full_scores, fault_full_scores, "Full Modalities Test")
calculate_youden_auc(normal_missing_scores, fault_missing_scores, f"Missing {missing_modalities_count} Modalities Test")


Starting evaluation of missing modalities...
Created 18900 windows of size 100.
Finished processing Normal data. 18900 windows evaluated.
Created 900 windows of size 100.
Finished processing Fault data. 900 windows evaluated.

--- CLASSIFICATION METRICS: Full Modalities Test ---
AUC-ROC Score: 1.0000
Optimal Threshold (Youden's J): 0.064035
Accuracy: 1.0000
Sensitivity (TPR): 1.0000
Specificity (TNR): 1.0000
Youden's J Max Value: 1.0000

--- CLASSIFICATION METRICS: Missing 4 Modalities Test ---
AUC-ROC Score: 0.5110
Optimal Threshold (Youden's J): 0.061030
Accuracy: 0.5319
Sensitivity (TPR): 1.0000
Specificity (TNR): 0.5096
Youden's J Max Value: 0.5096


In [8]:
normal_full_scores, fault_full_scores, normal_missing_scores, fault_missing_scores = evaluate_missing_modalities(
        model,
        X_normal_test_mts,
        X_fault_scaled_mts,
        WINDOW_SIZE,
        missing_modalities_count=5,
        batch_size=BATCH_SIZE,
        device=device
    )
missing_modalities_count = 5
    # 6. Calculate AUC and Youden's J for both scenarios
calculate_youden_auc(normal_full_scores, fault_full_scores, "Full Modalities Test")
calculate_youden_auc(normal_missing_scores, fault_missing_scores, f"Missing {missing_modalities_count} Modalities Test")


Starting evaluation of missing modalities...
Created 18900 windows of size 100.
Finished processing Normal data. 18900 windows evaluated.
Created 900 windows of size 100.
Finished processing Fault data. 900 windows evaluated.

--- CLASSIFICATION METRICS: Full Modalities Test ---
AUC-ROC Score: 1.0000
Optimal Threshold (Youden's J): 0.064035
Accuracy: 1.0000
Sensitivity (TPR): 1.0000
Specificity (TNR): 1.0000
Youden's J Max Value: 1.0000

--- CLASSIFICATION METRICS: Missing 5 Modalities Test ---
AUC-ROC Score: 0.3032
Optimal Threshold (Youden's J): 0.060565
Accuracy: 0.3346
Sensitivity (TPR): 1.0000
Specificity (TNR): 0.3030
Youden's J Max Value: 0.3030


In [9]:
normal_full_scores, fault_full_scores, normal_missing_scores, fault_missing_scores = evaluate_missing_modalities(
        model,
        X_normal_test_mts,
        X_fault_scaled_mts,
        WINDOW_SIZE,
        missing_modalities_count=6,
        batch_size=BATCH_SIZE,
        device=device
    )
missing_modalities_count = 6
    # 6. Calculate AUC and Youden's J for both scenarios
calculate_youden_auc(normal_full_scores, fault_full_scores, "Full Modalities Test")
calculate_youden_auc(normal_missing_scores, fault_missing_scores, f"Missing {missing_modalities_count} Modalities Test")


Starting evaluation of missing modalities...
Created 18900 windows of size 100.
Finished processing Normal data. 18900 windows evaluated.
Created 900 windows of size 100.
Finished processing Fault data. 900 windows evaluated.

--- CLASSIFICATION METRICS: Full Modalities Test ---
AUC-ROC Score: 1.0000
Optimal Threshold (Youden's J): 0.064035
Accuracy: 1.0000
Sensitivity (TPR): 1.0000
Specificity (TNR): 1.0000
Youden's J Max Value: 1.0000

--- CLASSIFICATION METRICS: Missing 6 Modalities Test ---
AUC-ROC Score: 0.2891
Optimal Threshold (Youden's J): 0.064937
Accuracy: 0.3211
Sensitivity (TPR): 1.0000
Specificity (TNR): 0.2888
Youden's J Max Value: 0.2888


In [10]:
normal_full_scores, fault_full_scores, normal_missing_scores, fault_missing_scores = evaluate_missing_modalities(
        model,
        X_normal_test_mts,
        X_fault_scaled_mts,
        WINDOW_SIZE,
        missing_modalities_count=7,
        batch_size=BATCH_SIZE,
        device=device
    )
missing_modalities_count = 7
    # 6. Calculate AUC and Youden's J for both scenarios
calculate_youden_auc(normal_full_scores, fault_full_scores, "Full Modalities Test")
calculate_youden_auc(normal_missing_scores, fault_missing_scores, f"Missing {missing_modalities_count} Modalities Test")


Starting evaluation of missing modalities...
Created 18900 windows of size 100.
Finished processing Normal data. 18900 windows evaluated.
Created 900 windows of size 100.
Finished processing Fault data. 900 windows evaluated.

--- CLASSIFICATION METRICS: Full Modalities Test ---
AUC-ROC Score: 1.0000
Optimal Threshold (Youden's J): 0.064035
Accuracy: 1.0000
Sensitivity (TPR): 1.0000
Specificity (TNR): 1.0000
Youden's J Max Value: 1.0000

--- CLASSIFICATION METRICS: Missing 7 Modalities Test ---
AUC-ROC Score: 0.1877
Optimal Threshold (Youden's J): 0.069901
Accuracy: 0.2244
Sensitivity (TPR): 1.0000
Specificity (TNR): 0.1875
Youden's J Max Value: 0.1875


In [11]:
normal_full_scores, fault_full_scores, normal_missing_scores, fault_missing_scores = evaluate_missing_modalities(
        model,
        X_normal_test_mts,
        X_fault_scaled_mts,
        WINDOW_SIZE,
        missing_modalities_count=8,
        batch_size=BATCH_SIZE,
        device=device
    )
missing_modalities_count = 8
    # 6. Calculate AUC and Youden's J for both scenarios
calculate_youden_auc(normal_full_scores, fault_full_scores, "Full Modalities Test")
calculate_youden_auc(normal_missing_scores, fault_missing_scores, f"Missing {missing_modalities_count} Modalities Test")


Starting evaluation of missing modalities...
Created 18900 windows of size 100.
Finished processing Normal data. 18900 windows evaluated.
Created 900 windows of size 100.
Finished processing Fault data. 900 windows evaluated.

--- CLASSIFICATION METRICS: Full Modalities Test ---
AUC-ROC Score: 1.0000
Optimal Threshold (Youden's J): 0.064035
Accuracy: 1.0000
Sensitivity (TPR): 1.0000
Specificity (TNR): 1.0000
Youden's J Max Value: 1.0000

--- CLASSIFICATION METRICS: Missing 8 Modalities Test ---
AUC-ROC Score: 0.1839
Optimal Threshold (Youden's J): 0.070760
Accuracy: 0.2209
Sensitivity (TPR): 1.0000
Specificity (TNR): 0.1838
Youden's J Max Value: 0.1838


In [12]:
normal_full_scores, fault_full_scores, normal_missing_scores, fault_missing_scores = evaluate_missing_modalities(
        model,
        X_normal_test_mts,
        X_fault_scaled_mts,
        WINDOW_SIZE,
        missing_modalities_count=9,
        batch_size=BATCH_SIZE,
        device=device
    )
missing_modalities_count = 9
    # 6. Calculate AUC and Youden's J for both scenarios
calculate_youden_auc(normal_full_scores, fault_full_scores, "Full Modalities Test")
calculate_youden_auc(normal_missing_scores, fault_missing_scores, f"Missing {missing_modalities_count} Modalities Test")


Starting evaluation of missing modalities...
Created 18900 windows of size 100.
Finished processing Normal data. 18900 windows evaluated.
Created 900 windows of size 100.
Finished processing Fault data. 900 windows evaluated.

--- CLASSIFICATION METRICS: Full Modalities Test ---
AUC-ROC Score: 1.0000
Optimal Threshold (Youden's J): 0.064035
Accuracy: 1.0000
Sensitivity (TPR): 1.0000
Specificity (TNR): 1.0000
Youden's J Max Value: 1.0000

--- CLASSIFICATION METRICS: Missing 9 Modalities Test ---
AUC-ROC Score: 0.0917
Optimal Threshold (Youden's J): 0.067269
Accuracy: 0.1326
Sensitivity (TPR): 1.0000
Specificity (TNR): 0.0913
Youden's J Max Value: 0.0913


In [13]:
normal_full_scores, fault_full_scores, normal_missing_scores, fault_missing_scores = evaluate_missing_modalities(
        model,
        X_normal_test_mts,
        X_fault_scaled_mts,
        WINDOW_SIZE,
        missing_modalities_count=10,
        batch_size=BATCH_SIZE,
        device=device
    )
missing_modalities_count = 10
    # 6. Calculate AUC and Youden's J for both scenarios
calculate_youden_auc(normal_full_scores, fault_full_scores, "Full Modalities Test")
calculate_youden_auc(normal_missing_scores, fault_missing_scores, f"Missing {missing_modalities_count} Modalities Test")


Starting evaluation of missing modalities...
Created 18900 windows of size 100.
Finished processing Normal data. 18900 windows evaluated.
Created 900 windows of size 100.
Finished processing Fault data. 900 windows evaluated.

--- CLASSIFICATION METRICS: Full Modalities Test ---
AUC-ROC Score: 1.0000
Optimal Threshold (Youden's J): 0.064035
Accuracy: 1.0000
Sensitivity (TPR): 1.0000
Specificity (TNR): 1.0000
Youden's J Max Value: 1.0000

--- CLASSIFICATION METRICS: Missing 10 Modalities Test ---
AUC-ROC Score: 0.0000
Optimal Threshold (Youden's J): inf
Accuracy: 0.9545
Sensitivity (TPR): 0.0000
Specificity (TNR): 1.0000
Youden's J Max Value: 0.0000


In [14]:
normal_full_scores, fault_full_scores, normal_missing_scores, fault_missing_scores = evaluate_missing_modalities(
        model,
        X_normal_test_mts,
        X_fault_scaled_mts,
        WINDOW_SIZE,
        missing_modalities_count=11,
        batch_size=BATCH_SIZE,
        device=device
    )
missing_modalities_count = 11
    # 6. Calculate AUC and Youden's J for both scenarios
calculate_youden_auc(normal_full_scores, fault_full_scores, "Full Modalities Test")
calculate_youden_auc(normal_missing_scores, fault_missing_scores, f"Missing {missing_modalities_count} Modalities Test")


Starting evaluation of missing modalities...
Created 18900 windows of size 100.
Finished processing Normal data. 18900 windows evaluated.
Created 900 windows of size 100.
Finished processing Fault data. 900 windows evaluated.

--- CLASSIFICATION METRICS: Full Modalities Test ---
AUC-ROC Score: 1.0000
Optimal Threshold (Youden's J): 0.064035
Accuracy: 1.0000
Sensitivity (TPR): 1.0000
Specificity (TNR): 1.0000
Youden's J Max Value: 1.0000

--- CLASSIFICATION METRICS: Missing 11 Modalities Test ---
AUC-ROC Score: 0.0000
Optimal Threshold (Youden's J): inf
Accuracy: 0.9545
Sensitivity (TPR): 0.0000
Specificity (TNR): 1.0000
Youden's J Max Value: 0.0000


In [15]:
normal_full_scores, fault_full_scores, normal_missing_scores, fault_missing_scores = evaluate_missing_modalities(
        model,
        X_normal_test_mts,
        X_fault_scaled_mts,
        WINDOW_SIZE,
        missing_modalities_count=12,
        batch_size=BATCH_SIZE,
        device=device
    )
missing_modalities_count = 12
    # 6. Calculate AUC and Youden's J for both scenarios
calculate_youden_auc(normal_full_scores, fault_full_scores, "Full Modalities Test")
calculate_youden_auc(normal_missing_scores, fault_missing_scores, f"Missing {missing_modalities_count} Modalities Test")


Starting evaluation of missing modalities...
Created 18900 windows of size 100.
Finished processing Normal data. 18900 windows evaluated.
Created 900 windows of size 100.
Finished processing Fault data. 900 windows evaluated.

--- CLASSIFICATION METRICS: Full Modalities Test ---
AUC-ROC Score: 1.0000
Optimal Threshold (Youden's J): 0.064035
Accuracy: 1.0000
Sensitivity (TPR): 1.0000
Specificity (TNR): 1.0000
Youden's J Max Value: 1.0000

--- CLASSIFICATION METRICS: Missing 12 Modalities Test ---
AUC-ROC Score: 0.0000
Optimal Threshold (Youden's J): inf
Accuracy: 0.9545
Sensitivity (TPR): 0.0000
Specificity (TNR): 1.0000
Youden's J Max Value: 0.0000
