# Adaptive and Multi-Threshold Strategies for HAI-21.03 Dataset

This notebook implements advanced adaptive and multi-threshold strategies for anomaly detection on the HAI-21.03 industrial control system security dataset.

In [None]:
# Import required libraries
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import tensorflow as tf
from tensorflow.keras.models import load_model
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, precision_recall_curve, roc_curve, auc
from tqdm import tqdm
import itertools
import warnings
warnings.filterwarnings('ignore')

# Set plot style
plt.style.use('ggplot')
sns.set(style="darkgrid")

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

## 1. Global Variables Setup

In [None]:
# Set paths
OUTPUT_DIR = 'hai-security-dataset/processed'
FEATURE_DIR = 'hai-security-dataset/features'
MODEL_DIR = 'hai-security-dataset/models'
RESULTS_DIR = 'hai-security-dataset/results'

# Create results directory if it doesn't exist
os.makedirs(RESULTS_DIR, exist_ok=True)

# Set parameters for post-processing
MIN_ANOMALY_LENGTH = 30  # Minimum length of anomalies to keep
GAP_THRESHOLD = 3        # Maximum gap between anomalies to merge

## 2. Load Model and Detector

In [None]:
# Load model
model = load_model(os.path.join(MODEL_DIR, 'improved_lstm_autoencoder.h5'))
print("Loaded model")

# Load model parameters
with open(os.path.join(MODEL_DIR, 'improved_model_params.pkl'), 'rb') as f:
    model_params = pickle.load(f)

SEQ_LENGTH = model_params['seq_length']
STRIDE = model_params['stride']
ensemble_features = model_params['ensemble_features']
ensemble_indices = model_params['ensemble_indices']
thresholds = model_params['thresholds']
operating_points = model_params['operating_points']

print(f"Loaded model parameters: SEQ_LENGTH={SEQ_LENGTH}, STRIDE={STRIDE}")
print(f"Number of selected features: {len(ensemble_features)}")

# Load detector
with open(os.path.join(MODEL_DIR, 'multi_threshold_detector.pkl'), 'rb') as f:
    detector = pickle.load(f)

print("Loaded multi-threshold detector")
print("Operating points:")
for point, threshold in detector.operating_points.items():
    print(f"  {point}: {threshold:.6f}")

## 3. Load Test Data

In [None]:
# Load test data
test_files = [f for f in os.listdir(OUTPUT_DIR) if f.startswith('test') and f.endswith('_processed_enhanced.csv')]
test_data = {}

for file in test_files:
    file_path = os.path.join(OUTPUT_DIR, file)
    file_name = file.split('_')[0]  # Extract test file name (e.g., 'test1')
    df = pd.read_csv(file_path)
    test_data[file_name] = df
    print(f"Loaded {file_name}: {df.shape[0]} rows, {df.shape[1]} columns")

## 4. Define Evaluation Functions

In [None]:
def post_process_anomalies(anomaly_labels, min_anomaly_length=30, gap_threshold=3):
    """
    Apply post-processing to reduce false positives and false negatives.
    
    Args:
        anomaly_labels (np.array): Binary anomaly labels
        min_anomaly_length (int): Minimum length of anomalies to keep
        gap_threshold (int): Maximum gap between anomalies to merge
        
    Returns:
        np.array: Processed binary anomaly labels
    """
    # Make a copy to avoid modifying the original
    processed_labels = anomaly_labels.copy()
    
    # Remove short anomalies (likely false positives)
    i = 0
    while i < len(processed_labels):
        if processed_labels[i] == 1:
            # Find the end of this anomaly
            j = i
            while j < len(processed_labels) and processed_labels[j] == 1:
                j += 1
            
            # If anomaly is too short, remove it
            if j - i < min_anomaly_length:
                processed_labels[i:j] = 0
            
            i = j
        else:
            i += 1
    
    # Merge nearby anomalies
    i = 0
    while i < len(processed_labels):
        if processed_labels[i] == 1:
            # Find the end of this anomaly
            j = i
            while j < len(processed_labels) and processed_labels[j] == 1:
                j += 1
            
            # Look for another anomaly nearby
            if j < len(processed_labels) - gap_threshold:
                next_start = j
                while next_start < j + gap_threshold and next_start < len(processed_labels) and processed_labels[next_start] == 0:
                    next_start += 1
                
                if next_start < j + gap_threshold and next_start < len(processed_labels) and processed_labels[next_start] == 1:
                    processed_labels[j:next_start] = 1
            
            i = j
        else:
            i += 1
    
    return processed_labels

In [None]:
def evaluate_model(y_true, y_pred):
    """
    Evaluate model performance.
    
    Args:
        y_true (np.array): Ground truth labels
        y_pred (np.array): Predicted labels
        
    Returns:
        dict: Evaluation metrics
    """
    # Calculate metrics
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    
    # Calculate confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    tn, fp, fn, tp = cm.ravel()
    
    # Calculate additional metrics
    accuracy = (tp + tn) / (tp + tn + fp + fn)
    false_positive_rate = fp / (fp + tn) if (fp + tn) > 0 else 0
    false_negative_rate = fn / (fn + tp) if (fn + tp) > 0 else 0
    
    # Store results
    results = {
        'precision': precision,
        'recall': recall,
        'f1_score': f1,
        'accuracy': accuracy,
        'false_positive_rate': false_positive_rate,
        'false_negative_rate': false_negative_rate,
        'confusion_matrix': cm,
        'true_positives': tp,
        'false_positives': fp,
        'true_negatives': tn,
        'false_negatives': fn
    }
    
    # Print metrics
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"False Positive Rate: {false_positive_rate:.4f}")
    print(f"False Negative Rate: {false_negative_rate:.4f}")
    
    return results

In [None]:
def plot_confusion_matrix(cm, classes, title='Confusion Matrix', cmap=plt.cm.Blues):
    """
    Plot confusion matrix.
    
    Args:
        cm (np.array): Confusion matrix
        classes (list): Class names
        title (str): Plot title
        cmap: Colormap
    """
    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    
    # Add text annotations
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], 'd'),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.show()

## 5. Evaluate with Different Operating Points

In [None]:
def evaluate_test_file(test_name, test_df, operating_point='balanced'):
    """
    Evaluate model on a single test file with a specific operating point.
    
    Args:
        test_name (str): Name of the test file
        test_df (pd.DataFrame): Test dataframe
        operating_point (str): Operating point to use
        
    Returns:
        dict: Evaluation results
    """
    print(f"\nEvaluating {test_name} with operating point: {operating_point}")
    
    # Extract features and target
    feature_cols = [col for col in test_df.columns if col not in ['time', 'attack']]
    X_test = test_df[feature_cols].values
    y_test = test_df['attack'].values
    
    # Predict anomalies
    anomaly_scores, anomaly_labels, threshold = detector.predict(X_test, operating_point=operating_point)
    
    # Apply post-processing
    processed_labels = post_process_anomalies(anomaly_labels, min_anomaly_length=MIN_ANOMALY_LENGTH, gap_threshold=GAP_THRESHOLD)
    
    # Evaluate model
    eval_results = evaluate_model(y_test, processed_labels)
    
    # Visualize results
    plt.figure(figsize=(20, 10))
    
    # Plot ground truth
    plt.subplot(3, 1, 1)
    plt.plot(y_test, 'b-', label='Ground Truth')
    plt.title(f'Ground Truth - {test_name}')
    plt.ylabel('Anomaly')
    plt.yticks([0, 1])
    plt.grid(True)
    plt.legend()
    
    # Plot anomaly scores
    plt.subplot(3, 1, 2)
    plt.plot(anomaly_scores, 'r-', label='Anomaly Scores')
    plt.axhline(y=threshold, color='g', linestyle='--', label=f'Threshold ({threshold:.4f})')
    plt.title(f'Anomaly Scores - {test_name} ({operating_point})')
    plt.ylabel('Score')
    plt.grid(True)
    plt.legend()
    
    # Plot predictions
    plt.subplot(3, 1, 3)
    plt.plot(processed_labels, 'g-', label='Predictions')
    plt.title(f'Predictions - {test_name} ({operating_point})')
    plt.xlabel('Time')
    plt.ylabel('Anomaly')
    plt.yticks([0, 1])
    plt.grid(True)
    plt.legend()
    
    plt.tight_layout()
    plt.show()
    
    # Plot confusion matrix
    plot_confusion_matrix(eval_results['confusion_matrix'], classes=['Normal', 'Anomaly'], title=f'Confusion Matrix - {test_name} ({operating_point})')
    
    # Return results
    return {
        'test_name': test_name,
        'operating_point': operating_point,
        'threshold': threshold,
        'precision': eval_results['precision'],
        'recall': eval_results['recall'],
        'f1_score': eval_results['f1_score'],
        'accuracy': eval_results['accuracy'],
        'false_positive_rate': eval_results['false_positive_rate'],
        'false_negative_rate': eval_results['false_negative_rate'],
        'anomaly_scores': anomaly_scores,
        'anomaly_labels': processed_labels,
        'ground_truth': y_test
    }

In [None]:
# Define operating points to evaluate
operating_points = ['high_precision', 'balanced', 'high_recall', 'adaptive']

# Evaluate first test file with different operating points
test_name = list(test_data.keys())[0]
test_df = test_data[test_name]

results = {}
for point in operating_points:
    results[point] = evaluate_test_file(test_name, test_df, operating_point=point)

In [None]:
# Compare results across operating points
comparison = []
for point in operating_points:
    comparison.append({
        'Operating Point': point,
        'Threshold': results[point]['threshold'],
        'Precision': results[point]['precision'],
        'Recall': results[point]['recall'],
        'F1 Score': results[point]['f1_score'],
        'Accuracy': results[point]['accuracy'],
        'False Positive Rate': results[point]['false_positive_rate'],
        'False Negative Rate': results[point]['false_negative_rate']
    })

comparison_df = pd.DataFrame(comparison)
comparison_df

In [None]:
# Visualize metrics across operating points
plt.figure(figsize=(15, 6))

# Plot precision, recall, and F1 score
metrics = ['Precision', 'Recall', 'F1 Score']
for i, metric in enumerate(metrics):
    plt.subplot(1, 3, i+1)
    plt.bar(comparison_df['Operating Point'], comparison_df[metric])
    plt.title(metric)
    plt.xticks(rotation=45)
    plt.ylim(0, 1)
    
    # Add value labels
    for j, v in enumerate(comparison_df[metric]):
        plt.text(j, v + 0.02, f"{v:.4f}", ha='center')

plt.tight_layout()
plt.show()

In [None]:
# Visualize error rates
plt.figure(figsize=(12, 6))

# Plot false positive and false negative rates
error_metrics = ['False Positive Rate', 'False Negative Rate']
for i, metric in enumerate(error_metrics):
    plt.subplot(1, 2, i+1)
    plt.bar(comparison_df['Operating Point'], comparison_df[metric])
    plt.title(metric)
    plt.xticks(rotation=45)
    plt.ylim(0, max(comparison_df[metric].max() * 1.2, 0.1))
    
    # Add value labels
    for j, v in enumerate(comparison_df[metric]):
        plt.text(j, v + 0.005, f"{v:.4f}", ha='center')

plt.tight_layout()
plt.show()

## 6. Evaluate All Test Files with Best Operating Point

In [None]:
# Determine best operating point based on F1 score
best_point = comparison_df.loc[comparison_df['F1 Score'].idxmax(), 'Operating Point']
print(f"Best operating point based on F1 score: {best_point}")

# Evaluate all test files with best operating point
all_results = []
for test_name, test_df in test_data.items():
    result = evaluate_test_file(test_name, test_df, operating_point=best_point)
    all_results.append({
        'Test File': result['test_name'],
        'Operating Point': result['operating_point'],
        'Threshold': result['threshold'],
        'Precision': result['precision'],
        'Recall': result['recall'],
        'F1 Score': result['f1_score'],
        'Accuracy': result['accuracy'],
        'False Positive Rate': result['false_positive_rate'],
        'False Negative Rate': result['false_negative_rate']
    })

In [None]:
# Create DataFrame with results
results_df = pd.DataFrame(all_results)

# Display results
results_df

In [None]:
# Visualize metrics across test files
plt.figure(figsize=(15, 6))

# Plot precision, recall, and F1 score
metrics = ['Precision', 'Recall', 'F1 Score']
for i, metric in enumerate(metrics):
    plt.subplot(1, 3, i+1)
    plt.bar(results_df['Test File'], results_df[metric])
    plt.title(metric)
    plt.xticks(rotation=45)
    plt.ylim(0, 1)
    
    # Add value labels
    for j, v in enumerate(results_df[metric]):
        plt.text(j, v + 0.02, f"{v:.4f}", ha='center')

plt.tight_layout()
plt.show()

In [None]:
# Visualize error rates
plt.figure(figsize=(12, 6))

# Plot false positive and false negative rates
error_metrics = ['False Positive Rate', 'False Negative Rate']
for i, metric in enumerate(error_metrics):
    plt.subplot(1, 2, i+1)
    plt.bar(results_df['Test File'], results_df[metric])
    plt.title(metric)
    plt.xticks(rotation=45)
    plt.ylim(0, max(results_df[metric].max() * 1.2, 0.1))
    
    # Add value labels
    for j, v in enumerate(results_df[metric]):
        plt.text(j, v + 0.005, f"{v:.4f}", ha='center')

plt.tight_layout()
plt.show()

In [None]:
# Calculate average metrics
avg_metrics = {
    'Precision': results_df['Precision'].mean(),
    'Recall': results_df['Recall'].mean(),
    'F1 Score': results_df['F1 Score'].mean(),
    'Accuracy': results_df['Accuracy'].mean(),
    'False Positive Rate': results_df['False Positive Rate'].mean(),
    'False Negative Rate': results_df['False Negative Rate'].mean()
}

# Display average metrics
print("Average Metrics Across All Test Files:")
for metric, value in avg_metrics.items():
    print(f"{metric}: {value:.4f}")

## 7. Implement Dynamic Threshold Adaptation

In [None]:
class DynamicThresholdDetector:
    """
    Dynamic threshold detector that adapts to data distribution changes.
    """
    def __init__(self, detector, window_size=1000, update_interval=100):
        """
        Initialize the detector.
        
        Args:
            detector: Base detector
            window_size: Size of the sliding window for threshold adaptation
            update_interval: Interval for updating thresholds
        """
        self.detector = detector
        self.window_size = window_size
        self.update_interval = update_interval
        self.error_history = []
        self.threshold_history = []
    
    def predict(self, X, y=None):
        """
        Predict anomalies with dynamic threshold adaptation.
        
        Args:
            X: Input data
            y: Ground truth labels (optional, for evaluation)
            
        Returns:
            tuple: (anomaly_scores, anomaly_labels, thresholds)
        """
        # Scale features
        X_scaled = self.detector.scaler.transform(X)
        
        # Select ensemble features
        X_ensemble = X_scaled[:, self.detector.ensemble_indices]
        
        # Apply PCA
        X_pca = self.detector.pca.transform(X_ensemble)
        
        # Create sequences
        X_seq = create_sequences(X_pca, self.detector.seq_length, self.detector.stride)
        
        # Get predictions
        X_pred = self.detector.model.predict(X_seq)
        
        # Calculate reconstruction errors
        mse = np.mean(np.square(X_seq - X_pred), axis=(1, 2))
        
        # Initialize arrays
        anomaly_scores = np.zeros(len(X))
        count = np.zeros(len(X))
        thresholds = np.zeros(len(X))
        
        # Initial threshold
        current_threshold = self.detector.operating_points['balanced']
        
        # For each sequence
        for i, error in enumerate(mse):
            idx = i * self.detector.stride
            if idx + self.detector.seq_length <= len(X):
                # Add error to history
                self.error_history.append(error)
                if len(self.error_history) > self.window_size:
                    self.error_history.pop(0)
                
                # Update threshold periodically
                if i % self.update_interval == 0 and len(self.error_history) > 0:
                    # Calculate new threshold based on recent errors
                    mean = np.mean(self.error_history)
                    std = np.std(self.error_history)
                    current_threshold = mean + 3 * std
                    self.threshold_history.append(current_threshold)
                
                # Store threshold
                thresholds[idx:idx+self.detector.seq_length] = current_threshold
                
                # Calculate anomaly score
                anomaly_scores[idx:idx+self.detector.seq_length] += error
                count[idx:idx+self.detector.seq_length] += 1
        
        # Normalize scores by count
        anomaly_scores = np.divide(anomaly_scores, count, out=np.zeros_like(anomaly_scores), where=count!=0)
        
        # Apply threshold to get binary labels
        anomaly_labels = (anomaly_scores > thresholds).astype(int)
        
        # Apply post-processing
        processed_labels = self.detector.post_process_anomalies(anomaly_labels)
        
        return anomaly_scores, processed_labels, thresholds

In [None]:
# Initialize dynamic threshold detector
dynamic_detector = DynamicThresholdDetector(detector, window_size=1000, update_interval=100)

# Test dynamic threshold detector on first test file
test_name = list(test_data.keys())[0]
test_df = test_data[test_name]

# Extract features and target
feature_cols = [col for col in test_df.columns if col not in ['time', 'attack']]
X_test = test_df[feature_cols].values
y_test = test_df['attack'].values

# Predict anomalies
anomaly_scores, anomaly_labels, thresholds = dynamic_detector.predict(X_test)

# Evaluate model
eval_results = evaluate_model(y_test, anomaly_labels)

In [None]:
# Visualize dynamic threshold results
plt.figure(figsize=(20, 12))

# Plot ground truth
plt.subplot(4, 1, 1)
plt.plot(y_test, 'b-', label='Ground Truth')
plt.title(f'Ground Truth - {test_name}')
plt.ylabel('Anomaly')
plt.yticks([0, 1])
plt.grid(True)
plt.legend()

# Plot anomaly scores
plt.subplot(4, 1, 2)
plt.plot(anomaly_scores, 'r-', label='Anomaly Scores')
plt.title(f'Anomaly Scores - {test_name} (Dynamic Threshold)')
plt.ylabel('Score')
plt.grid(True)
plt.legend()

# Plot dynamic thresholds
plt.subplot(4, 1, 3)
plt.plot(thresholds, 'g-', label='Dynamic Thresholds')
plt.title(f'Dynamic Thresholds - {test_name}')
plt.ylabel('Threshold')
plt.grid(True)
plt.legend()

# Plot predictions
plt.subplot(4, 1, 4)
plt.plot(anomaly_labels, 'g-', label='Predictions')
plt.title(f'Predictions - {test_name} (Dynamic Threshold)')
plt.xlabel('Time')
plt.ylabel('Anomaly')
plt.yticks([0, 1])
plt.grid(True)
plt.legend()

plt.tight_layout()
plt.show()

# Plot confusion matrix
plot_confusion_matrix(eval_results['confusion_matrix'], classes=['Normal', 'Anomaly'], title=f'Confusion Matrix - {test_name} (Dynamic Threshold)')

In [None]:
# Compare dynamic threshold with static thresholds
comparison_df = comparison_df.append({
    'Operating Point': 'Dynamic',
    'Threshold': np.mean(thresholds),
    'Precision': eval_results['precision'],
    'Recall': eval_results['recall'],
    'F1 Score': eval_results['f1_score'],
    'Accuracy': eval_results['accuracy'],
    'False Positive Rate': eval_results['false_positive_rate'],
    'False Negative Rate': eval_results['false_negative_rate']
}, ignore_index=True)

# Display updated comparison
comparison_df

In [None]:
# Visualize updated comparison
plt.figure(figsize=(15, 6))

# Plot precision, recall, and F1 score
metrics = ['Precision', 'Recall', 'F1 Score']
for i, metric in enumerate(metrics):
    plt.subplot(1, 3, i+1)
    plt.bar(comparison_df['Operating Point'], comparison_df[metric])
    plt.title(metric)
    plt.xticks(rotation=45)
    plt.ylim(0, 1)
    
    # Add value labels
    for j, v in enumerate(comparison_df[metric]):
        plt.text(j, v + 0.02, f"{v:.4f}", ha='center')

plt.tight_layout()
plt.show()

## 8. Save Results

In [None]:
# Save comparison results
comparison_df.to_csv(os.path.join(RESULTS_DIR, 'threshold_comparison.csv'), index=False)
print(f"Saved threshold comparison to {os.path.join(RESULTS_DIR, 'threshold_comparison.csv')}")

# Save test results
results_df.to_csv(os.path.join(RESULTS_DIR, 'test_results.csv'), index=False)
print(f"Saved test results to {os.path.join(RESULTS_DIR, 'test_results.csv')}")

# Save dynamic threshold detector
with open(os.path.join(MODEL_DIR, 'dynamic_threshold_detector.pkl'), 'wb') as f:
    pickle.dump(dynamic_detector, f)
print(f"Saved dynamic threshold detector to {os.path.join(MODEL_DIR, 'dynamic_threshold_detector.pkl')}")