# HAI-20.07 Dataset Analysis: Efficient GRU with Attention Model

This notebook implements an efficient Gated Recurrent Unit (GRU) model with attention mechanism for attack detection in industrial control systems using the HAI-20.07 dataset.

## 1. Import Libraries

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import time
import os
import pickle
import gc
import psutil
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, precision_score, recall_score, roc_auc_score

# TensorFlow libraries
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, GRU, Conv1D, Flatten, Input, Dropout, BatchNormalization, MaxPooling1D, GlobalAveragePooling1D
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.utils import class_weight

# Set random seeds for reproducibility
RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)

# Check for GPU availability
print("TensorFlow version:", tf.__version__)
print("GPU available:", len(tf.config.list_physical_devices('GPU')) > 0)
if len(tf.config.list_physical_devices('GPU')) > 0:
    print("GPU devices:", tf.config.list_physical_devices('GPU'))

## 2. Load Preprocessed Data

In [None]:
# Load preprocessed sequence data
with open('preprocessed_data/sequence_data.pkl', 'rb') as f:
    sequence_data = pickle.load(f)

X_train_seq = sequence_data['X_train_seq']
y_train_seq = sequence_data['y_train_seq']
X_test_seq = sequence_data['X_test_seq']
y_test_seq = sequence_data['y_test_seq']
X_train_seq_balanced = sequence_data['X_train_seq_balanced']
y_train_seq_balanced = sequence_data['y_train_seq_balanced']
TIME_STEPS = sequence_data['TIME_STEPS']
STRIDE = sequence_data['STRIDE']

print("X_train_seq_balanced shape:", X_train_seq_balanced.shape)
print("y_train_seq_balanced shape:", y_train_seq_balanced.shape)
print("X_test_seq shape:", X_test_seq.shape)
print("y_test_seq shape:", y_test_seq.shape)

## 3. Define Utility Functions

In [None]:
# Define a function to measure memory usage accurately
def get_memory_usage():
    """Get current memory usage in MB"""
    # Force garbage collection before measuring memory
    gc.collect()
    process = psutil.Process(os.getpid())
    memory_info = process.memory_info()
    return memory_info.rss / (1024 * 1024)  # Convert to MB

# Function to calculate TensorFlow model size in MB
def get_tf_model_size(model):
    model.save("temp_model")
    size_bytes = sum(os.path.getsize(os.path.join("temp_model", f)) for f in os.listdir("temp_model") if os.path.isfile(os.path.join("temp_model", f)))
    import shutil
    shutil.rmtree("temp_model")
    return size_bytes / (1024 * 1024)  # Convert to MB

## 4. Define Efficient GRU with Attention Model

In [None]:
# Define an efficient GRU model with attention using TensorFlow
def create_gru_attention_model(input_shape, gru_units=16):
    # Input layer
    inputs = Input(shape=input_shape)
    
    # GRU layer (using GRU instead of LSTM for efficiency)
    gru_out = GRU(gru_units, return_sequences=True)(inputs)
    
    # Attention mechanism
    attention = Dense(1, activation='tanh')(gru_out)
    attention = tf.keras.layers.Reshape((-1,))(attention)
    attention = tf.keras.layers.Activation('softmax')(attention)
    attention = tf.keras.layers.Reshape((-1, 1))(attention)
    
    # Apply attention weights
    context = tf.keras.layers.Multiply()([gru_out, attention])
    context = tf.keras.layers.Lambda(lambda x: tf.keras.backend.sum(x, axis=1))(context)
    
    # Output layers
    x = Dense(8, activation='relu')(context)
    x = Dropout(0.1)(x)
    outputs = Dense(1, activation='sigmoid')(x)
    
    # Create model
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                 loss='binary_crossentropy',
                 metrics=['accuracy', tf.keras.metrics.AUC(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])
    
    return model

## 5. Train and Evaluate GRU Model

In [None]:
# Train the GRU model
def train_gru_model(X_train, y_train, X_test, y_test):
    # Calculate class weights for imbalanced data
    class_weights = {0: 1.0, 1: len(y_train) / (2 * np.sum(y_train))}
    print(f"Class weights: {class_weights}")
    
    # Define callbacks
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=0.0001)
    
    # Measure memory usage before training
    memory_before = get_memory_usage()
    
    # Create model
    input_shape = (X_train.shape[1], X_train.shape[2])
    model = create_gru_attention_model(input_shape)
    
    # Print model summary
    model.summary()
    
    # Train model
    start_time = time.time()
    history = model.fit(
        X_train, y_train,
        epochs=15,
        batch_size=64,
        validation_split=0.1,
        class_weight=class_weights,
        callbacks=[early_stopping, reduce_lr],
        verbose=1
    )
    
    training_time = time.time() - start_time
    print(f"Training time: {training_time:.2f} seconds")
    
    # Measure memory usage after training
    memory_after = get_memory_usage()
    memory_used = memory_after - memory_before
    print(f"Memory used: {memory_used:.2f} MB")
    
    # Calculate model size
    model_size = get_tf_model_size(model)
    print(f"Model size: {model_size:.2f} MB")
    
    # Evaluate model
    inference_start = time.time()
    y_pred_proba = model.predict(X_test).reshape(-1)
    inference_time = (time.time() - inference_start) / len(X_test)
    print(f"Average inference time per sample: {inference_time*1000:.4f} ms")
    
    y_pred = (y_pred_proba > 0.5).astype(int)
    
    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, zero_division=0)
    recall = recall_score(y_test, y_pred, zero_division=0)
    f1 = f1_score(y_test, y_pred, zero_division=0)
    auc_score = roc_auc_score(y_test, y_pred_proba)
    
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"AUC: {auc_score:.4f}")
    
    # Print classification report
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))
    
    # Plot confusion matrix
    plt.figure(figsize=(8, 6))
    cm = confusion_matrix(y_test, y_pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix - GRU with Attention')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()
    
    # Plot training history
    plt.figure(figsize=(12, 4))
    
    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    
    plt.subplot(1, 2, 2)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    
    plt.tight_layout()
    plt.show()
    
    # Save results
    results = {
        'model_name': 'GRU with Attention',
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'auc': auc_score,
        'training_time': training_time,
        'inference_time': inference_time,
        'memory_used': memory_used,
        'model_size': model_size,
        'y_pred': y_pred,
        'y_pred_proba': y_pred_proba,
        'history': history.history
    }
    
    # Create directory for results if it doesn't exist
    if not os.path.exists('model_results'):
        os.makedirs('model_results')
    
    # Save results
    with open('model_results/gru_results.pkl', 'wb') as f:
        pickle.dump(results, f)
    
    # Save model
    model.save('model_results/gru_model')
    
    return model, y_pred, y_pred_proba, results

In [None]:
# Train the GRU model
print("Training Efficient GRU with Attention model...")
gru_model, y_pred_gru, y_prob_gru, gru_results = train_gru_model(X_train_seq_balanced, y_train_seq_balanced, X_test_seq, y_test_seq)

## 6. Visualize Attention Weights

In [None]:
# Create a model to extract attention weights
def create_attention_extraction_model(trained_model):
    # Get the input layer
    inputs = trained_model.input
    
    # Get the GRU layer output
    gru_out = trained_model.layers[1].output
    
    # Get the attention weights
    attention = trained_model.layers[2].output
    attention = trained_model.layers[3].output
    attention = trained_model.layers[4].output
    attention = trained_model.layers[5].output
    
    # Create a model that outputs attention weights
    attention_model = Model(inputs=inputs, outputs=attention)
    
    return attention_model

# Extract attention weights for a sample
attention_model = create_attention_extraction_model(gru_model)

# Get a few attack samples from the test set
attack_indices = np.where(y_test_seq == 1)[0]
normal_indices = np.where(y_test_seq == 0)[0]

# Select a few samples
num_samples = 3
attack_samples = X_test_seq[attack_indices[:num_samples]]
normal_samples = X_test_seq[normal_indices[:num_samples]]

# Get attention weights
attack_attention = attention_model.predict(attack_samples)
normal_attention = attention_model.predict(normal_samples)

# Plot attention weights
plt.figure(figsize=(15, 10))

for i in range(num_samples):
    plt.subplot(2, num_samples, i + 1)
    plt.plot(attack_attention[i].reshape(-1))
    plt.title(f'Attack Sample {i+1} Attention')
    plt.xlabel('Time Step')
    plt.ylabel('Attention Weight')
    
    plt.subplot(2, num_samples, num_samples + i + 1)
    plt.plot(normal_attention[i].reshape(-1))
    plt.title(f'Normal Sample {i+1} Attention')
    plt.xlabel('Time Step')
    plt.ylabel('Attention Weight')

plt.tight_layout()
plt.show()

## 7. Visualize Model Performance

In [None]:
# Plot ROC curve
from sklearn.metrics import roc_curve, auc

fpr, tpr, _ = roc_curve(y_test_seq, y_prob_gru)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(10, 8))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.3f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve - GRU with Attention')
plt.legend(loc="lower right")
plt.grid(True)
plt.show()

# Plot precision-recall curve
from sklearn.metrics import precision_recall_curve

precision, recall, _ = precision_recall_curve(y_test_seq, y_prob_gru)
pr_auc = auc(recall, precision)

plt.figure(figsize=(10, 8))
plt.plot(recall, precision, color='blue', lw=2, label=f'PR curve (area = {pr_auc:.3f})')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve - GRU with Attention')
plt.legend(loc="best")
plt.grid(True)
plt.show()

## 8. Threshold Optimization

In [None]:
# Optimize threshold for better F1 score
thresholds = np.arange(0.1, 0.9, 0.05)
f1_scores = []

for threshold in thresholds:
    y_pred_threshold = (y_prob_gru > threshold).astype(int)
    f1 = f1_score(y_test_seq, y_pred_threshold)
    f1_scores.append(f1)

# Find the best threshold
best_threshold_idx = np.argmax(f1_scores)
best_threshold = thresholds[best_threshold_idx]
best_f1 = f1_scores[best_threshold_idx]

print(f"Best threshold: {best_threshold:.2f} with F1 score: {best_f1:.4f}")

# Plot F1 scores for different thresholds
plt.figure(figsize=(10, 6))
plt.plot(thresholds, f1_scores, marker='o')
plt.axvline(x=best_threshold, color='r', linestyle='--', label=f'Best threshold: {best_threshold:.2f}')
plt.xlabel('Threshold')
plt.ylabel('F1 Score')
plt.title('F1 Score vs. Threshold')
plt.legend()
plt.grid(True)
plt.show()

# Recalculate metrics with the optimized threshold
y_pred_optimized = (y_prob_gru > best_threshold).astype(int)
accuracy_optimized = accuracy_score(y_test_seq, y_pred_optimized)
precision_optimized = precision_score(y_test_seq, y_pred_optimized, zero_division=0)
recall_optimized = recall_score(y_test_seq, y_pred_optimized, zero_division=0)
f1_optimized = f1_score(y_test_seq, y_pred_optimized, zero_division=0)

print(f"Optimized Metrics:")
print(f"Accuracy: {accuracy_optimized:.4f}")
print(f"Precision: {precision_optimized:.4f}")
print(f"Recall: {recall_optimized:.4f}")
print(f"F1 Score: {f1_optimized:.4f}")

# Plot confusion matrix with optimized threshold
plt.figure(figsize=(8, 6))
cm_optimized = confusion_matrix(y_test_seq, y_pred_optimized)
sns.heatmap(cm_optimized, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix - GRU with Attention (Optimized Threshold)')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()