# HAI-20.07 Dataset Analysis: Optimized TCN Model

This notebook implements an optimized Temporal Convolutional Network (TCN) model for attack detection in industrial control systems using the HAI-20.07 dataset.

## 1. Import Libraries

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import time
import os
import pickle
import gc
import psutil
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, precision_score, recall_score, roc_auc_score

# PyTorch libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, WeightedRandomSampler

# Set random seeds for reproducibility
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)

# Check for GPU availability
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU device name: {torch.cuda.get_device_name(0)}")

# Set device (GPU if available, otherwise CPU)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

## 2. Load Preprocessed Data

In [None]:
# Load preprocessed sequence data
with open('preprocessed_data/sequence_data.pkl', 'rb') as f:
    sequence_data = pickle.load(f)

X_train_seq = sequence_data['X_train_seq']
y_train_seq = sequence_data['y_train_seq']
X_test_seq = sequence_data['X_test_seq']
y_test_seq = sequence_data['y_test_seq']
X_train_seq_balanced = sequence_data['X_train_seq_balanced']
y_train_seq_balanced = sequence_data['y_train_seq_balanced']
TIME_STEPS = sequence_data['TIME_STEPS']
STRIDE = sequence_data['STRIDE']

print("X_train_seq_balanced shape:", X_train_seq_balanced.shape)
print("y_train_seq_balanced shape:", y_train_seq_balanced.shape)
print("X_test_seq shape:", X_test_seq.shape)
print("y_test_seq shape:", y_test_seq.shape)

## 3. Define Utility Functions

In [None]:
# Define a function to measure memory usage accurately
def get_memory_usage():
    """Get current memory usage in MB"""
    # Force garbage collection before measuring memory
    gc.collect()
    process = psutil.Process(os.getpid())
    memory_info = process.memory_info()
    return memory_info.rss / (1024 * 1024)  # Convert to MB

# Function to calculate model size in MB
def get_model_size(model):
    torch.save(model.state_dict(), "temp_model.pt")
    size_bytes = os.path.getsize("temp_model.pt")
    os.remove("temp_model.pt")
    return size_bytes / (1024 * 1024)  # Convert to MB

## 4. Define Optimized TCN Model

In [None]:
# Define an optimized lightweight TCN model using PyTorch
class OptimizedTCN(nn.Module):
    def __init__(self, input_size, num_channels=[16, 8], kernel_size=3, dropout=0.1):
        super(OptimizedTCN, self).__init__()
        layers = []
        num_levels = len(num_channels)
        
        # Input layer with fewer channels
        layers.append(nn.Conv1d(in_channels=input_size, out_channels=num_channels[0], kernel_size=kernel_size, padding=kernel_size//2))
        layers.append(nn.ReLU())
        layers.append(nn.BatchNorm1d(num_channels[0]))
        layers.append(nn.Dropout(dropout))
        
        # Hidden layers with dilated convolutions
        for i in range(num_levels - 1):
            dilation_size = 2 ** i
            in_channels = num_channels[i]
            out_channels = num_channels[i + 1]
            layers.append(nn.Conv1d(in_channels=in_channels, out_channels=out_channels,
                                   kernel_size=kernel_size, padding=dilation_size, dilation=dilation_size))
            layers.append(nn.ReLU())
            layers.append(nn.BatchNorm1d(out_channels))
            layers.append(nn.Dropout(dropout))
        
        self.network = nn.Sequential(*layers)
        self.linear = nn.Linear(num_channels[-1], 1)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        # x shape: [batch, time_steps, features]
        # Convert to [batch, features, time_steps] for Conv1D
        x = x.permute(0, 2, 1)
        x = self.network(x)
        # Global average pooling
        x = torch.mean(x, dim=2)
        x = self.linear(x)
        x = self.sigmoid(x)
        return x

## 5. Train and Evaluate TCN Model

In [None]:
# Train the optimized TCN model
def train_tcn_model(X_train, y_train, X_test, y_test):
    # Convert data to PyTorch tensors
    X_train_tensor = torch.FloatTensor(X_train)
    y_train_tensor = torch.FloatTensor(y_train).reshape(-1, 1)
    X_test_tensor = torch.FloatTensor(X_test)
    
    # Create weighted sampler for imbalanced data
    class_counts = np.bincount(y_train.astype(int))
    class_weights = 1. / torch.tensor(class_counts, dtype=torch.float)
    sample_weights = class_weights[torch.tensor(y_train.astype(int))]
    sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True)
    
    # Create DataLoader for batch processing
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=64, sampler=sampler)
    
    # Initialize model, loss function, and optimizer
    input_size = X_train.shape[2]  # Number of features
    model = OptimizedTCN(input_size).to(device)
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)  # Added weight decay for regularization
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True)
    
    # Measure memory usage before training
    memory_before = get_memory_usage()
    
    # Train the model
    start_time = time.time()
    num_epochs = 10
    best_val_loss = float('inf')
    patience = 5
    patience_counter = 0
    
    # Create a small validation set
    val_size = int(0.1 * len(X_train_tensor))
    train_indices = list(range(len(X_train_tensor) - val_size))
    val_indices = list(range(len(X_train_tensor) - val_size, len(X_train_tensor)))
    
    X_val_tensor = X_train_tensor[val_indices].to(device)
    y_val_tensor = y_train_tensor[val_indices].to(device)
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            # Zero the parameter gradients
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            # Backward pass and optimize
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)
        
        # Validate
        model.eval()
        with torch.no_grad():
            val_outputs = model(X_val_tensor)
            val_loss = criterion(val_outputs, y_val_tensor)
            scheduler.step(val_loss)
            
            # Early stopping
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                patience_counter = 0
            else:
                patience_counter += 1
                if patience_counter >= patience:
                    print(f"Early stopping at epoch {epoch+1}")
                    break
        
        epoch_loss = running_loss / len(train_loader.dataset)
        if (epoch + 1) % 2 == 0 or epoch == 0:
            print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Val Loss: {val_loss:.4f}')
    
    training_time = time.time() - start_time
    print(f"Training time: {training_time:.2f} seconds")
    
    # Measure memory usage after training
    memory_after = get_memory_usage()
    memory_used = memory_after - memory_before
    print(f"Memory used: {memory_used:.2f} MB")
    
    # Calculate model size
    model_size = get_model_size(model)
    print(f"Model size: {model_size:.2f} MB")
    
    # Evaluate the model
    model.eval()
    with torch.no_grad():
        # Measure inference time
        inference_start = time.time()
        X_test_tensor = X_test_tensor.to(device)
        y_pred_proba = model(X_test_tensor).cpu().numpy()
        inference_time = (time.time() - inference_start) / len(X_test_tensor)
        print(f"Average inference time per sample: {inference_time*1000:.4f} ms")
        
        y_pred = (y_pred_proba > 0.5).astype(int).reshape(-1)
    
    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, zero_division=0)
    recall = recall_score(y_test, y_pred, zero_division=0)
    f1 = f1_score(y_test, y_pred, zero_division=0)
    auc_score = roc_auc_score(y_test, y_pred_proba.reshape(-1))
    
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"AUC: {auc_score:.4f}")
    
    # Print classification report
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))
    
    # Plot confusion matrix
    plt.figure(figsize=(8, 6))
    cm = confusion_matrix(y_test, y_pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix - Optimized TCN')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()
    
    # Save results
    results = {
        'model_name': 'Optimized TCN',
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'auc': auc_score,
        'training_time': training_time,
        'inference_time': inference_time,
        'memory_used': memory_used,
        'model_size': model_size,
        'y_pred': y_pred,
        'y_pred_proba': y_pred_proba.reshape(-1)
    }
    
    # Create directory for results if it doesn't exist
    if not os.path.exists('model_results'):
        os.makedirs('model_results')
    
    # Save results
    with open('model_results/tcn_results.pkl', 'wb') as f:
        pickle.dump(results, f)
    
    # Save model
    torch.save(model.state_dict(), 'model_results/tcn_model.pt')
    
    return model, y_pred, y_pred_proba.reshape(-1), results

In [None]:
# Train the TCN model
print("Training Optimized TCN model...")
tcn_model, y_pred_tcn, y_prob_tcn, tcn_results = train_tcn_model(X_train_seq_balanced, y_train_seq_balanced, X_test_seq, y_test_seq)

## 6. Analyze Feature Importance

In [None]:
# Analyze feature importance using permutation importance
from sklearn.inspection import permutation_importance

# Define a function to get predictions from the PyTorch model
def get_predictions(model, X):
    model.eval()
    with torch.no_grad():
        X_tensor = torch.FloatTensor(X).to(device)
        y_pred = model(X_tensor).cpu().numpy().reshape(-1)
    return y_pred

# Calculate permutation importance
result = permutation_importance(
    estimator=lambda X: get_predictions(tcn_model, X),
    X=X_test_seq.reshape(X_test_seq.shape[0], -1),  # Flatten the sequence data
    y=y_test_seq,
    n_repeats=5,
    random_state=RANDOM_SEED,
    n_jobs=-1
)

# Get feature importances
importances = result.importances_mean

# Plot feature importances
plt.figure(figsize=(12, 8))
plt.barh(range(len(importances)), importances)
plt.yticks(range(len(importances)), [f'Feature {i}' for i in range(len(importances))])
plt.title('Feature Importance (Permutation Importance)')
plt.xlabel('Importance')
plt.tight_layout()
plt.show()

## 7. Visualize Model Performance

In [None]:
# Plot ROC curve
from sklearn.metrics import roc_curve, auc

fpr, tpr, _ = roc_curve(y_test_seq, y_prob_tcn)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(10, 8))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.3f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve - Optimized TCN')
plt.legend(loc="lower right")
plt.grid(True)
plt.show()

# Plot precision-recall curve
from sklearn.metrics import precision_recall_curve

precision, recall, _ = precision_recall_curve(y_test_seq, y_prob_tcn)
pr_auc = auc(recall, precision)

plt.figure(figsize=(10, 8))
plt.plot(recall, precision, color='blue', lw=2, label=f'PR curve (area = {pr_auc:.3f})')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve - Optimized TCN')
plt.legend(loc="best")
plt.grid(True)
plt.show()

## 8. Threshold Optimization

In [None]:
# Optimize threshold for better F1 score
thresholds = np.arange(0.1, 0.9, 0.05)
f1_scores = []

for threshold in thresholds:
    y_pred_threshold = (y_prob_tcn > threshold).astype(int)
    f1 = f1_score(y_test_seq, y_pred_threshold)
    f1_scores.append(f1)

# Find the best threshold
best_threshold_idx = np.argmax(f1_scores)
best_threshold = thresholds[best_threshold_idx]
best_f1 = f1_scores[best_threshold_idx]

print(f"Best threshold: {best_threshold:.2f} with F1 score: {best_f1:.4f}")

# Plot F1 scores for different thresholds
plt.figure(figsize=(10, 6))
plt.plot(thresholds, f1_scores, marker='o')
plt.axvline(x=best_threshold, color='r', linestyle='--', label=f'Best threshold: {best_threshold:.2f}')
plt.xlabel('Threshold')
plt.ylabel('F1 Score')
plt.title('F1 Score vs. Threshold')
plt.legend()
plt.grid(True)
plt.show()

# Recalculate metrics with the optimized threshold
y_pred_optimized = (y_prob_tcn > best_threshold).astype(int)
accuracy_optimized = accuracy_score(y_test_seq, y_pred_optimized)
precision_optimized = precision_score(y_test_seq, y_pred_optimized, zero_division=0)
recall_optimized = recall_score(y_test_seq, y_pred_optimized, zero_division=0)
f1_optimized = f1_score(y_test_seq, y_pred_optimized, zero_division=0)

print(f"Optimized Metrics:")
print(f"Accuracy: {accuracy_optimized:.4f}")
print(f"Precision: {precision_optimized:.4f}")
print(f"Recall: {recall_optimized:.4f}")
print(f"F1 Score: {f1_optimized:.4f}")

# Plot confusion matrix with optimized threshold
plt.figure(figsize=(8, 6))
cm_optimized = confusion_matrix(y_test_seq, y_pred_optimized)
sns.heatmap(cm_optimized, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix - Optimized TCN (Optimized Threshold)')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()