In [19]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve, confusion_matrix
from sklearn.neural_network import MLPRegressor
from river.drift import ADWIN, PageHinkley, KSWIN
from river.tree import HoeffdingTreeClassifier, HoeffdingAdaptiveTreeClassifier
from river.ensemble import LeveragingBaggingClassifier, AdaBoostClassifier
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from xgboost import XGBClassifier
import copy
import os
from scipy.stats import bootstrap, ttest_rel
import torch
import torch.nn as nn
warnings.filterwarnings('ignore')

# Mock ARF/SRP if not available
ENSEMBLE_AVAILABLE = False
try:
    from river.ensemble import ARFClassifier
    EnsembleClassifier = ARFClassifier
    ENSEMBLE_NAME = 'ARF'
    ENSEMBLE_AVAILABLE = True
except ImportError:
    try:
        from river.ensemble import SRPClassifier
        EnsembleClassifier = SRPClassifier
        ENSEMBLE_NAME = 'SRP'
        ENSEMBLE_AVAILABLE = True
    except ImportError:
        print("Warning: Using RF as ensemble fallback.")
        class EnsembleClassifier(RandomForestClassifier): pass
        ENSEMBLE_NAME = 'RF_Fallback'
        ENSEMBLE_AVAILABLE = True

# Set seeds for reproducibility
np.random.seed(42)
torch.manual_seed(42)

# Custom DDM
class DDM:
    def __init__(self, min_num_instances=30, warning_level=2.0, drift_level=3.0):
        self.min_num_instances = min_num_instances
        self.warning_level = warning_level
        self.drift_level = drift_level
        self.mean = 0.0
        self.std = 0.0
        self.n = 0
        self.drift_detected = False
        self.mean_min = float('inf')
        self.std_min = float('inf')
    def add_element(self, error):
        self.n += 1
        if self.n == 1: self.mean, self.std = error, 0.0
        else:
            old_mean = self.mean
            self.mean += (error - old_mean) / self.n
            self.std = np.sqrt((self.std**2 * (self.n - 1) + (error - self.mean) * (error - old_mean)) / self.n)
        if self.n >= self.min_num_instances:
            if self.mean + self.std * self.warning_level > self.mean_min + self.drift_level * self.std_min:
                self.drift_detected = True
            else: self.drift_detected = False
            if not self.drift_detected:
                self.mean_min = min(self.mean_min, self.mean)
                self.std_min = min(self.std_min, self.std)
        return self.drift_detected

# Custom EDDM
class EDDM:
    def __init__(self, min_num_instances=30, warning_level=0.95, drift_level=0.9):
        self.min_num_instances = min_num_instances
        self.warning_level = warning_level
        self.drift_level = drift_level
        self.mean = 0.0
        self.std = 0.0
        self.n = 0
        self.last_error = 0
        self.distances = []
        self.drift_detected = False
        self.max_mean = 0.0
    def add_element(self, prediction, true_label):
        error = 1 if prediction != true_label else 0
        if self.n > 0 and error == 1:
            self.distances.append(self.n - self.last_error)
        if error == 1:
            self.last_error = self.n
        self.n += 1
        if len(self.distances) > 1:
            self.mean = np.mean(self.distances)
            self.std = np.std(self.distances)
        if self.n >= self.min_num_instances and len(self.distances) > 1:
            m = (self.mean + 2 * self.std) / self.max_mean if self.max_mean > 0 else float('inf')
            if m < self.drift_level:
                self.drift_detected = True
            else:
                self.drift_detected = False
            if not self.drift_detected:
                self.max_mean = max(self.max_mean, self.mean + 2 * self.std)
        return self.drift_detected

# Custom PCDM
class PCDM:
    def __init__(self, window_size=50, n_permutations=100, alpha=0.01):
        self.window_size = window_size
        self.n_permutations = n_permutations
        self.alpha = alpha
        self.reference_window = []
        self.current_window = []
        self.drift_detected = False
    def add_element(self, value):
        self.current_window.append(value)
        if len(self.current_window) > self.window_size:
            self.current_window.pop(0)
        if len(self.reference_window) < self.window_size:
            self.reference_window.append(value)
            return False
        if len(self.current_window) == self.window_size:
            stat, p_value = self._permutation_test()
            self.drift_detected = p_value < self.alpha
            if self.drift_detected:
                self.reference_window = self.current_window.copy()
        return self.drift_detected
    def _permutation_test(self):
        ref = np.array(self.reference_window)
        curr = np.array(self.current_window)
        observed_diff = np.abs(np.mean(ref) - np.mean(curr))
        combined = np.concatenate([ref, curr])
        perm_diffs = []
        for _ in range(self.n_permutations):
            np.random.shuffle(combined)
            perm_ref = combined[:self.window_size]
            perm_curr = combined[self.window_size:]
            perm_diffs.append(np.abs(np.mean(perm_ref) - np.mean(perm_curr)))
        p_value = np.sum(np.array(perm_diffs) >= observed_diff) / self.n_permutations
        return observed_diff, p_value

# Custom RBFSVMDriftDetector
class RBFSVMDriftDetector:
    def __init__(self, window_size=50, threshold=0.1):
        self.window_size = window_size
        self.threshold = threshold
        self.reference_window = []
        self.current_window = []
        self.svm = SVC(kernel='rbf', C=1.0, probability=True)
        self.drift_detected = False
    def add_element(self, x, y):
        self.current_window.append((x, y))
        if len(self.current_window) > self.window_size:
            self.current_window.pop(0)
        if len(self.reference_window) < self.window_size:
            self.reference_window.append((x, y))
            return False
        if len(self.current_window) == self.window_size:
            margin_density = self._compute_margin_density()
            self.drift_detected = margin_density > self.threshold
            if self.drift_detected:
                self.reference_window = self.current_window.copy()
        return self.drift_detected
    def _compute_margin_density(self):
        X_ref, y_ref = zip(*self.reference_window)
        X_curr, y_curr = zip(*self.current_window)
        X_ref, X_curr = np.array(X_ref), np.array(X_curr)
        y_ref, y_curr = np.array(y_ref), np.array(y_curr)
        self.svm.fit(X_ref, y_ref)
        decision_scores = self.svm.decision_function(X_curr)
        margin_density = np.mean(np.abs(decision_scores) < 1.0)
        return margin_density

# Custom HDDM_W
class HDDM_W:
    def __init__(self, window_size=50, delta=0.001):
        self.window_size = window_size
        self.delta = delta
        self.reference_window = []
        self.current_window = []
        self.drift_detected = False
    def _hellinger_distance(self, hist1, hist2):
        return np.sqrt(np.sum((np.sqrt(hist1) - np.sqrt(hist2))**2)) / np.sqrt(2)
    def add_element(self, value):
        self.current_window.append(value)
        if len(self.current_window) > self.window_size:
            self.current_window.pop(0)
        if len(self.reference_window) < self.window_size:
            self.reference_window.append(value)
            return False
        if len(self.current_window) == self.window_size:
            hist_ref, _ = np.histogram(self.reference_window, bins=10, density=True)
            hist_curr, _ = np.histogram(self.current_window, bins=10, density=True)
            distance = self._hellinger_distance(hist_ref, hist_curr)
            self.drift_detected = distance > self.delta
            if self.drift_detected:
                self.reference_window = self.current_window.copy()
        return self.drift_detected

# Custom HDDM_A
class HDDM_A:
    def __init__(self, window_size=50, delta=0.001):
        self.window_size = window_size
        self.delta = delta
        self.reference_window = []
        self.current_window = []
        self.drift_detected = False
    def _hellinger_distance(self, hist1, hist2):
        return np.sqrt(np.sum((np.sqrt(hist1) - np.sqrt(hist2))**2)) / np.sqrt(2)
    def add_element(self, value):
        self.current_window.append(value)
        if len(self.current_window) > self.window_size:
            self.current_window.pop(0)
        if len(self.reference_window) < self.window_size:
            self.reference_window.append(value)
            return False
        if len(self.current_window) == self.window_size:
            hist_ref, _ = np.histogram(self.reference_window, bins=10, density=True)
            hist_curr, _ = np.histogram(self.current_window, bins=10, density=True)
            distance = self._hellinger_distance(hist_ref, hist_curr)
            self.drift_detected = distance > self.delta
            if self.drift_detected:
                self.reference_window = self.current_window.copy()
        return self.drift_detected

# Improved Autoencoder Drift Detector
class AutoencoderDriftDetector:
    def __init__(self, threshold_multiplier=3.0, max_iter=500):
        self.threshold_multiplier = threshold_multiplier
        self.max_iter = max_iter
        self.autoencoder = MLPRegressor(hidden_layer_sizes=(10,), max_iter=max_iter, random_state=42)
        self.reference_mse = None
        self.feature_std = None
    def fit(self, X_ref):
        self.feature_std = np.std(X_ref, axis=0) + 1e-6
        self.autoencoder.fit(X_ref, X_ref)
        recon = self.autoencoder.predict(X_ref)
        mse = np.mean((X_ref - recon) ** 2, axis=1)
        self.reference_mse = np.mean(mse)
        self.threshold = self.threshold_multiplier * self.reference_mse
    def add_element(self, x, ref=False):
        if ref:
            return False
        recon = self.autoencoder.predict([x])
        mse = np.mean(((x - recon) ** 2) / (self.feature_std ** 2))
        drift_detected = mse > self.threshold
        return drift_detected

# Snapshot Ensemble
class SnapshotEnsemble:
    def __init__(self, base_model, n_snapshots=3):
        self.base_model = base_model
        self.n_snapshots = n_snapshots
        self.snapshots = []
    def fit(self, X, y):
        self.snapshots = []
        for _ in range(self.n_snapshots):
            model = copy.deepcopy(self.base_model)
            model.fit(X, y)
            self.snapshots.append(model)
    def predict(self, X):
        predictions = np.array([model.predict(X) for model in self.snapshots])
        return np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
    def predict_proba(self, X):
        probas = np.array([model.predict_proba(X) for model in self.snapshots])
        return np.mean(probas, axis=0)

# Online Knowledge Distillation
class OnlineKnowledgeDistillation:
    def __init__(self, teacher_model, student_model, alpha=0.5):
        self.teacher = teacher_model
        self.student = student_model
        self.alpha = alpha
    def learn_one(self, x, y):
        teacher_pred = self.teacher.predict_one(x)
        self.student.learn_one(x, y)
        return teacher_pred
    def predict_one(self, x):
        return self.student.predict_one(x)

# Keystroke/Mouse Generator using RNN
class FeatureGenerator(nn.Module):
    def __init__(self, input_size, hidden_size=64):
        super().__init__()
        self.rnn = nn.LSTM(input_size, hidden_size)
        self.linear = nn.Linear(hidden_size, input_size)
    def forward(self, x):
        out, _ = self.rnn(x)
        return self.linear(out)

# Enhanced simulate_attacks with taxonomy-based drifts
def simulate_attacks(data, data_type, attack_type, drift_type, n_attack=200, use_generator=False):
    attack_data = data.copy().sample(n=n_attack, random_state=42)
    if data_type == 'keystroke':
        features = ['dwell_time', 'flight_time', 'up_down_time', 'session_duration', 'rhythm']
    else:  # mouse
        features = ['speed', 'distance', 'delta_x', 'delta_y']
    
    if use_generator:
        input_size = len(features)
        generator = FeatureGenerator(input_size)
        optimizer = torch.optim.Adam(generator.parameters(), lr=0.001)
        criterion = nn.MSELoss()
        benign_tensor = torch.tensor(data[features].values[:n_attack], dtype=torch.float32).unsqueeze(1)
        for epoch in range(20):
            out = generator(benign_tensor)
            loss = criterion(out, benign_tensor)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        with torch.no_grad():
            perturbed = generator(benign_tensor).squeeze(1).numpy()
        attack_data[features] = perturbed + np.random.normal(0, 0.1, perturbed.shape)  # Increased noise
    else:
        # Taxonomy-based noise scales
        noise_scales = {'abrupt': 0.15, 'gradual': 0.05, 'recurring': 0.1, 'incremental': 0.07}
        base_scale = noise_scales.get(drift_type, 0.05)
        for feature in features:
            attack_data[feature] += np.random.normal(0, base_scale * attack_data[feature].std(), n_attack)
        
        if data_type == 'keystroke':
            if attack_type == 'session_hijacking' and drift_type == 'abrupt':
                attack_data['dwell_time'] *= 1.3 + np.random.uniform(-0.1, 0.1, n_attack)  # Jitter
                attack_data['flight_time'] *= 1.4 + np.random.uniform(-0.1, 0.1, n_attack)
            elif attack_type == 'mitm' and drift_type == 'recurring':
                mask = attack_data.index % 2 == 0
                attack_data.loc[mask, 'dwell_time'] *= 1.2 + np.random.uniform(-0.05, 0.05, sum(mask))
                attack_data.loc[~mask, 'dwell_time'] *= 0.8 + np.random.uniform(-0.05, 0.05, sum(~mask))
            elif attack_type == 'card_skimming' and drift_type == 'abrupt':
                attack_data['flight_time'] *= 1.5 + np.random.uniform(-0.1, 0.1, n_attack)
                attack_data['rhythm'] += np.random.uniform(-0.1, 0.1, n_attack)
            elif attack_type == 'phishing' and drift_type == 'gradual':
                attack_data['dwell_time'] += np.linspace(0, 0.2, n_attack) * attack_data['dwell_time'] + np.random.normal(0, 0.07, n_attack)
            elif attack_type == 'identity_theft' and drift_type == 'incremental':
                attack_data['dwell_time'] += np.linspace(0, 0.1, n_attack) * attack_data['dwell_time'] + np.random.normal(0, 0.05, n_attack)
        else:  # mouse
            if attack_type == 'session_hijacking' and drift_type == 'abrupt':
                attack_data['speed'] *= 1.3 + np.random.uniform(-0.1, 0.1, n_attack)
                attack_data['distance'] *= 1.4 + np.random.uniform(-0.1, 0.1, n_attack)
            elif attack_type == 'mitm' and drift_type == 'recurring':
                mask = attack_data.index % 2 == 0
                attack_data.loc[mask, 'speed'] *= 1.2 + np.random.uniform(-0.05, 0.05, sum(mask))
                attack_data.loc[~mask, 'speed'] *= 0.8 + np.random.uniform(-0.05, 0.05, sum(~mask))
            elif attack_type == 'card_skimming' and drift_type == 'abrupt':
                attack_data['speed'] *= 1.5 + np.random.uniform(-0.1, 0.1, n_attack)
                attack_data['distance'] += np.random.uniform(-0.1, 0.1, n_attack)
            elif attack_type == 'phishing' and drift_type == 'gradual':
                attack_data['speed'] += np.linspace(0, 0.2, n_attack) * attack_data['speed'] + np.random.normal(0, 0.07, n_attack)
            elif attack_type == 'identity_theft' and drift_type == 'incremental':
                attack_data['speed'] += np.linspace(0, 0.1, n_attack) * attack_data['speed'] + np.random.normal(0, 0.05, n_attack)
        attack_data['label'] = 1
    return attack_data

# Load and preprocess data
def load_data(data_file, data_type):
    try:
        df = pd.read_csv(data_file)
        print(f"Initial {data_type} Rows: {len(df)}")
        if data_type == 'keystroke':
            features = ['dwell_time', 'flight_time', 'up_down_time', 'session_duration', 'rhythm']
        else:  # mouse
            features = ['speed', 'distance']
            if 'delta' in df.columns:
                try:
                    df['delta_x'] = df['delta'].apply(lambda x: float(x.split(',')[0].strip('() ')) if isinstance(x, str) else x[0] if isinstance(x, (list, tuple)) else np.nan)
                    df['delta_y'] = df['delta'].apply(lambda x: float(x.split(',')[1].strip('() ')) if isinstance(x, str) else x[1] if isinstance(x, (list, tuple)) else np.nan)
                    features += ['delta_x', 'delta_y']
                except Exception as e:
                    print(f"Warning: Failed to parse 'delta' column in {data_type} dataset. Error: {e}")
                    df = df.drop(columns=['delta'])
        for feature in features:
            if feature not in df.columns:
                raise ValueError(f"Feature '{feature}' not found in {data_type} dataset")
        df = df.dropna(subset=features)
        print(f"{data_type} Rows after NaN drop: {len(df)}")
        df['label'] = 0  # Benign
        return df, features
    except Exception as e:
        print(f"Error loading {data_type} dataset: {e}. Using synthetic data.")
        return generate_synthetic_data(data_type)

# Synthetic data generator
def generate_synthetic_data(data_type, n_samples=1000):
    np.random.seed(42)
    if data_type == 'keystroke':
        df = pd.DataFrame({
            'dwell_time': np.random.normal(100, 20, n_samples),
            'flight_time': np.random.normal(150, 30, n_samples),
            'up_down_time': np.random.normal(120, 25, n_samples),
            'session_duration': np.random.normal(500, 100, n_samples),
            'rhythm': np.random.normal(0.8, 0.1, n_samples),
            'label': np.random.choice([0, 1], n_samples, p=[0.8, 0.2])  # 80% benign
        })
        features = ['dwell_time', 'flight_time', 'up_down_time', 'session_duration', 'rhythm']
    else:  # mouse
        times = np.cumsum(np.random.exponential(0.1, n_samples))
        x = np.cumsum(np.random.normal(0, 10, n_samples))
        y = np.cumsum(np.random.normal(0, 10, n_samples))
        delta_x = np.diff(x, prepend=x[0])
        delta_y = np.diff(y, prepend=y[0])
        dt = np.diff(times, prepend=times[0])
        speed = np.sqrt(delta_x**2 + delta_y**2) / (dt + 1e-6)
        distance = np.sqrt(delta_x**2 + delta_y**2)
        df = pd.DataFrame({
            'speed': speed,
            'distance': distance,
            'delta_x': delta_x,
            'delta_y': delta_y,
            'label': np.random.choice([0, 1], n_samples, p=[0.8, 0.2])
        })
        features = ['speed', 'distance', 'delta_x', 'delta_y']
    return df, features

# Combine benign and attack data
def create_combined_dataset(benign_df, data_type, attack_type, drift_type, n_attack=200, use_generator=False):
    attack_df = simulate_attacks(benign_df, data_type, attack_type, drift_type, n_attack, use_generator)
    combined_df = pd.concat([benign_df, attack_df], ignore_index=True)
    output_dir = '/Users/festusedward-n/Documents/Datasets'
    os.makedirs(output_dir, exist_ok=True)
    output_file = f'{output_dir}/{data_type}_{attack_type}_{drift_type}_data.csv'
    combined_df.to_csv(output_file, index=False)
    return combined_df

# Tune detector based on drift type
def get_tuned_detector(detector_class, drift_type):
    if drift_type in ['gradual', 'incremental', 'recurring']:
        if detector_class == ADWIN:
            return ADWIN(delta=0.0001)
        elif detector_class == PageHinkley:
            return PageHinkley(threshold=10)
        elif detector_class == KSWIN:
            return KSWIN(alpha=0.005)
        elif detector_class == PCDM:
            return PCDM(alpha=0.005)
        elif detector_class == RBFSVMDriftDetector:
            return RBFSVMDriftDetector(threshold=0.05)
        elif detector_class == HDDM_W:
            return HDDM_W(delta=0.0001)
        elif detector_class == HDDM_A:
            return HDDM_A(delta=0.0001)
        elif detector_class == DDM:
            return DDM(drift_level=2.0)
        elif detector_class == EDDM:
            return EDDM(drift_level=0.8)
    else:  # Abrupt
        if detector_class == ADWIN:
            return ADWIN(delta=0.001)
        return detector_class()

# Evaluate batch models
def evaluate_batch_model(name, model, X_scaled, y, features, window_size, step_size, true_drifts, drift_points, output_dir, drift_type, adaptive=True):
    window_results = []
    adaptation_latencies = []
    window_accs = []
    base_model = copy.deepcopy(model)
    autoencoder = AutoencoderDriftDetector(threshold_multiplier=3.0, max_iter=500)
    is_first_window = True

    detector_classes = [ADWIN, PageHinkley, KSWIN, DDM, EDDM, PCDM, RBFSVMDriftDetector, HDDM_W, HDDM_A]
    detectors = {cls.__name__: get_tuned_detector(cls, drift_type) for cls in detector_classes}
    detectors['AutoencoderDriftDetector'] = autoencoder

    for start in range(0, len(X_scaled) - window_size + 1, step_size):
        end = start + window_size
        X_window = X_scaled[start:end]
        y_window = y[start:end]
        if len(np.unique(y_window)) < 2:
            continue
        try:
            X_train, X_test, y_train, y_test = train_test_split(X_window, y_window, test_size=0.2, random_state=42, stratify=y_window)
            if len(np.unique(y_train)) < 2 or len(np.unique(y_test)) < 2:
                continue
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            cm = confusion_matrix(y_test, y_pred)
            tn, fp, fn, tp = cm.ravel() if cm.shape == (2, 2) else (0, 0, 0, 0)
            tpr = tp / (tp + fn) if (tp + fn) > 0 else 0
            fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
            tar = recall_score(y_test, y_pred, average='macro', zero_division=0) if len(np.unique(y_test)) > 1 else 0
            frr = 1 - tar
            window_metrics = {
                'Model': name,
                'Window': start,
                'Accuracy': accuracy_score(y_test, y_pred),
                'Precision': precision_score(y_test, y_pred, average='weighted', zero_division=0),
                'Recall': recall_score(y_test, y_pred, average='weighted', zero_division=0),
                'F1-Score': f1_score(y_test, y_pred, average='weighted', zero_division=0),
                'TPR': tpr,
                'FPR': fpr,
                'TAR': tar,
                'FRR': frr
            }
            if hasattr(model, 'predict_proba'):
                y_proba = model.predict_proba(X_test)[:, 1]
                window_metrics['ROC-AUC'] = roc_auc_score(y_test, y_proba) if len(np.unique(y_test)) > 1 else np.nan
            window_results.append(window_metrics)
            window_accs.append(window_metrics['Accuracy'])

            if is_first_window:
                autoencoder.fit(X_train)
                is_first_window = False

            drift_detected = False
            for i in range(len(X_window)):
                x = X_window[i]
                y_true = y_window[i]
                pred = model.predict([x])[0]
                error = 1.0 if pred != y_true else 0.0
                global_idx = start + i
                for det_name, det in detectors.items():
                    if det_name == 'DDM':
                        update = det.add_element(error)
                    elif det_name == 'EDDM':
                        update = det.add_element(pred, y_true)
                    elif det_name == 'PCDM':
                        update = det.add_element(x[0])
                    elif det_name == 'RBFSVMDriftDetector':
                        update = det.add_element(x, y_true)
                    else:
                        update = det.update(x[0])
                    if update:
                        key = f'{det_name.lower()}_{features[0]}'
                        if key in drift_points[name]:
                            drift_points[name][key].append(global_idx)
                        drift_detected = True
                        adaptation_latencies.append(global_idx)

            if adaptive and drift_detected:
                model = copy.deepcopy(base_model)
                model.fit(X_train, y_train)
                y_pred_post = model.predict(X_test)
                window_metrics['Post-Accuracy'] = accuracy_score(y_test, y_pred_post)

            window_results[-1] = window_metrics
        except Exception as e:
            print(f"Error evaluating {name} at window {start}: {e}")
    return window_results, adaptation_latencies, window_accs

# Evaluate streaming models
def evaluate_streaming_model(name, model, X_scaled, y, features, window_size, step_size, true_drifts, drift_points, output_dir, drift_type, adaptive=True):
    window_results = []
    adaptation_latencies = []
    window_accs = []
    base_model = copy.deepcopy(model)
    autoencoder = AutoencoderDriftDetector(threshold_multiplier=3.0, max_iter=500)
    is_first_window = True

    detector_classes = [ADWIN, PageHinkley, KSWIN, DDM, EDDM, PCDM, RBFSVMDriftDetector, HDDM_W, HDDM_A]
    detectors = {cls.__name__: get_tuned_detector(cls, drift_type) for cls in detector_classes}
    detectors['AutoencoderDriftDetector'] = autoencoder

    for start in range(0, len(X_scaled) - window_size + 1, step_size):
        end = start + window_size
        X_window = X_scaled[start:end]
        y_window = y[start:end]
        if len(np.unique(y_window)) < 2:
            continue
        try:
            y_pred = []
            for i in range(1, len(X_window)):
                sample = dict(zip(features, X_window[i]))
                pred = model.predict_one(sample) or 0
                y_pred.append(pred)
                model.learn_one(sample, y_window[i])
            y_true = y_window[1:]
            if len(y_pred) == 0:
                continue
            cm = confusion_matrix(y_true, y_pred)
            tn, fp, fn, tp = cm.ravel() if cm.shape == (2, 2) else (0, 0, 0, 0)
            tpr = tp / (tp + fn) if (tp + fn) > 0 else 0
            fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
            tar = recall_score(y_true, y_pred, average='macro', zero_division=0) if len(np.unique(y_true)) > 1 else 0
            frr = 1 - tar
            window_metrics = {
                'Model': name,
                'Window': start,
                'Accuracy': accuracy_score(y_true, y_pred),
                'Precision': precision_score(y_true, y_pred, average='weighted', zero_division=0),
                'Recall': recall_score(y_true, y_pred, average='weighted', zero_division=0),
                'F1-Score': f1_score(y_true, y_pred, average='weighted', zero_division=0),
                'TPR': tpr,
                'FPR': fpr,
                'TAR': tar,
                'FRR': frr
            }
            window_results.append(window_metrics)
            window_accs.append(window_metrics['Accuracy'])

            if is_first_window:
                autoencoder.fit(X_window[:int(0.8 * len(X_window))])
                is_first_window = False

            drift_detected = False
            for i in range(len(X_window)):
                x = X_window[i]
                y_true = y_window[i]
                pred = model.predict_one(dict(zip(features, x))) or 0
                error = 1.0 if pred != y_true else 0.0
                global_idx = start + i
                for det_name, det in detectors.items():
                    if det_name == 'DDM':
                        update = det.add_element(error)
                    elif det_name == 'EDDM':
                        update = det.add_element(pred, y_true)
                    elif det_name == 'PCDM':
                        update = det.add_element(x[0])
                    elif det_name == 'RBFSVMDriftDetector':
                        update = det.add_element(x, y_true)
                    else:
                        update = det.update(x[0])
                    if update:
                        key = f'{det_name.lower()}_{features[0]}'
                        if key in drift_points[name]:
                            drift_points[name][key].append(global_idx)
                        drift_detected = True
                        adaptation_latencies.append(global_idx)

            if adaptive and drift_detected:
                model = copy.deepcopy(base_model)
                for j in range(len(X_window)):
                    sample = dict(zip(features, X_window[j]))
                    model.learn_one(sample, y_window[j])
                y_pred_post = [model.predict_one(dict(zip(features, x))) or 0 for x in X_window[1:]]
                window_metrics['Post-Accuracy'] = accuracy_score(y_true, y_pred_post)

            window_results[-1] = window_metrics
        except Exception as e:
            print(f"Error evaluating {name} at window {start}: {e}")
    return window_results, adaptation_latencies, window_accs

# Main evaluation function with DDA and AL
def evaluate_models(data_df, data_type, features, attack_type, drift_type, window_size=100, step_size=10):
    try:
        X = data_df[features].values
        y = data_df['label'].values
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)
        models = {
            'Random Forest': RandomForestClassifier(n_estimators=50, max_depth=3, min_samples_split=10, random_state=42),  # Reduced complexity
            'SVM': SVC(kernel='rbf', C=0.5, probability=True, random_state=42),
            'KNN': KNeighborsClassifier(n_neighbors=5),
            'XGBoost': XGBClassifier(n_estimators=50, max_depth=2, learning_rate=0.01, reg_lambda=1.0, random_state=42),
            'Voting': VotingClassifier(estimators=[
                ('rf', RandomForestClassifier(n_estimators=50, max_depth=3, min_samples_split=10, random_state=42)),
                ('svm', SVC(kernel='rbf', C=0.5, probability=True, random_state=42)),
                ('knn', KNeighborsClassifier(n_neighbors=5))
            ], voting='soft'),
            'Snapshot RF': SnapshotEnsemble(RandomForestClassifier(n_estimators=50, max_depth=3, min_samples_split=10, random_state=42), n_snapshots=2),
            'Hoeffding Tree': HoeffdingTreeClassifier(grace_period=50, delta=0.01),  # Corrected parameter
            'HAT': HoeffdingAdaptiveTreeClassifier(grace_period=50, delta=0.01),    # Corrected parameter
            'Leveraging Bagging': LeveragingBaggingClassifier(model=HoeffdingTreeClassifier(grace_period=50, delta=0.01), n_models=5),
            'Online Boosting': AdaBoostClassifier(model=HoeffdingTreeClassifier(grace_period=50, delta=0.01), n_models=5),
            'OKD': OnlineKnowledgeDistillation(
                teacher_model=HoeffdingAdaptiveTreeClassifier(grace_period=50, delta=0.01),
                student_model=HoeffdingTreeClassifier(grace_period=50, delta=0.01),
                alpha=0.5
            )
        }
        if ENSEMBLE_AVAILABLE:
            if ENSEMBLE_NAME == 'ARF':
                models[ENSEMBLE_NAME] = EnsembleClassifier(n_estimators=5)
            else:  # SRP
                models[ENSEMBLE_NAME] = EnsembleClassifier(n_models=5)
        
        results = []
        detectors = ['adwin', 'page_hinkley', 'ddm', 'eddm', 'kswin', 'pcdm', 'rbf_svm', 'hddm_w', 'hddm_a', 'autoencoder']
        drift_points = {name: {f'{detector}_{feature}': [] for detector in detectors for feature in features} for name in models}
        true_drifts = [len(data_df[data_df['label'] == 0]) + 50]  # Drift near attack start
        adaptation_latencies = {name: [] for name in models}
        output_dir = f'/Users/festusedward-n/Documents/Datasets/{data_type}_{attack_type}_{drift_type}'
        os.makedirs(output_dir, exist_ok=True)
        
        streaming_models = ['Hoeffding Tree', 'HAT', ENSEMBLE_NAME, 'Leveraging Bagging', 'Online Boosting', 'OKD']
        
        for name, model in models.items():
            print(f"Training and evaluating {name}...")
            try:
                eval_func = evaluate_streaming_model if name in streaming_models else evaluate_batch_model
                
                # Adaptive run with consensus-based drift
                adaptive_results, adaptive_latencies, adaptive_accs = eval_func(name, model, X_scaled, y, features, window_size, step_size, true_drifts, drift_points, output_dir, drift_type, adaptive=True)
                adaptation_latencies[name] = adaptive_latencies
                
                # Static ablation run
                static_results, _, static_accs = eval_func(name, model, X_scaled, y, features, window_size, step_size, true_drifts, drift_points, output_dir, drift_type, adaptive=False)
                
                # Aggregate adaptive results
                aggregated = {
                    'Model': name,
                    'Accuracy_Mean': np.mean([r['Accuracy'] for r in adaptive_results]),
                    'Accuracy_CI_Low': bootstrap_ci([r['Accuracy'] for r in adaptive_results])[0],
                    'Accuracy_CI_High': bootstrap_ci([r['Accuracy'] for r in adaptive_results])[1],
                    'TPR_Mean': np.mean([r['TPR'] for r in adaptive_results]),
                    'TPR_CI_Low': bootstrap_ci([r['TPR'] for r in adaptive_results])[0],
                    'TPR_CI_High': bootstrap_ci([r['TPR'] for r in adaptive_results])[1],
                    'FRR_Mean': np.mean([r['FRR'] for r in adaptive_results]),
                    'FRR_CI_Low': bootstrap_ci([r['FRR'] for r in adaptive_results])[0],
                    'FRR_CI_High': bootstrap_ci([r['FRR'] for r in adaptive_results])[1],
                    'Precision_Mean': np.mean([r['Precision'] for r in adaptive_results]),
                    'Recall_Mean': np.mean([r['Recall'] for r in adaptive_results]),
                    'F1_Mean': np.mean([r['F1-Score'] for r in adaptive_results])
                }
                
                # t-test vs static
                if len(adaptive_accs) == len(static_accs) and adaptive_accs and static_accs:
                    t_stat, p_val = ttest_rel(adaptive_accs, static_accs)
                    aggregated['Delta_Acc_vs_Static'] = np.mean(adaptive_accs) - np.mean(static_accs)
                    aggregated['p_value_vs_Static'] = p_val
                else:
                    aggregated['Delta_Acc_vs_Static'] = np.nan
                    aggregated['p_value_vs_Static'] = np.nan
                
                # Compute DDA and AL
                total_detections = sum(len(v) for v in drift_points[name].values())
                correct_detections = sum(len([d for d in detected if any(abs(d - t) < 50 for t in true_drifts)]) for detected in drift_points[name].values())
                aggregated['DDA_Mean'] = correct_detections / max(1, total_detections) if total_detections else 0.0
                aggregated['AL_Mean'] = np.mean(adaptation_latencies[name]) if adaptation_latencies[name] else float('inf')
                
                if 'Post-Accuracy' in adaptive_results[0]:
                    aggregated['Post_Accuracy_Mean'] = np.mean([r['Post-Accuracy'] for r in adaptive_results])
                
                if name in ['Random Forest', 'SVM', 'KNN', 'XGBoost', 'Voting', 'Snapshot RF']:
                    roc_vals = [r['ROC-AUC'] for r in adaptive_results if 'ROC-AUC' in r and not np.isnan(r['ROC-AUC'])]
                    aggregated['ROC_AUC_Mean'] = np.mean(roc_vals)
                
                results.append(aggregated)
            except Exception as e:
                print(f"Failed to evaluate {name}: {e}")
        
        results_df = pd.DataFrame(results)
        output_file = f'{output_dir}/results.csv'
        results_df.to_csv(output_file, index=False)
        
        print(f"\nResults for {data_type}_{attack_type}_{drift_type}:")
        print(results_df[['Model', 'Accuracy_Mean', 'TPR_Mean', 'FRR_Mean', 'DDA_Mean', 'AL_Mean', 'Delta_Acc_vs_Static', 'p_value_vs_Static']])
        print("Drift Points summary:", {k: len(v) for k, v in drift_points.items() if v})
        
        # Plotting (simplified)
        plt.figure(figsize=(12, 6))
        plt.savefig(f'{output_dir}/f1.png', dpi=300, bbox_inches='tight')
        plt.close()
        
        return results_df, drift_points
    except Exception as e:
        print(f"Error evaluating {data_type} dataset: {e}")
        return None, None

# Load and preprocess data
def load_data(data_file, data_type):
    try:
        df = pd.read_csv(data_file)
        print(f"Initial {data_type} Rows: {len(df)}")
        if data_type == 'keystroke':
            features = ['dwell_time', 'flight_time', 'up_down_time', 'session_duration', 'rhythm']
        else:  # mouse
            features = ['speed', 'distance']
            if 'delta' in df.columns:
                try:
                    df['delta_x'] = df['delta'].apply(lambda x: float(x.split(',')[0].strip('() ')) if isinstance(x, str) else x[0] if isinstance(x, (list, tuple)) else np.nan)
                    df['delta_y'] = df['delta'].apply(lambda x: float(x.split(',')[1].strip('() ')) if isinstance(x, str) else x[1] if isinstance(x, (list, tuple)) else np.nan)
                    features += ['delta_x', 'delta_y']
                except Exception as e:
                    print(f"Warning: Failed to parse 'delta' column in {data_type} dataset. Error: {e}")
                    df = df.drop(columns=['delta'])
        for feature in features:
            if feature not in df.columns:
                raise ValueError(f"Feature '{feature}' not found in {data_type} dataset")
        df = df.dropna(subset=features)
        print(f"{data_type} Rows after NaN drop: {len(df)}")
        df['label'] = 0  # Benign
        return df, features
    except Exception as e:
        print(f"Error loading {data_type} dataset: {e}. Using synthetic data.")
        return generate_synthetic_data(data_type)

# Synthetic data generator
def generate_synthetic_data(data_type, n_samples=1000):
    np.random.seed(42)
    if data_type == 'keystroke':
        df = pd.DataFrame({
            'dwell_time': np.random.normal(100, 20, n_samples),
            'flight_time': np.random.normal(150, 30, n_samples),
            'up_down_time': np.random.normal(120, 25, n_samples),
            'session_duration': np.random.normal(500, 100, n_samples),
            'rhythm': np.random.normal(0.8, 0.1, n_samples),
            'label': np.random.choice([0, 1], n_samples, p=[0.8, 0.2])  # 80% benign
        })
        features = ['dwell_time', 'flight_time', 'up_down_time', 'session_duration', 'rhythm']
    else:  # mouse
        times = np.cumsum(np.random.exponential(0.1, n_samples))
        x = np.cumsum(np.random.normal(0, 10, n_samples))
        y = np.cumsum(np.random.normal(0, 10, n_samples))
        delta_x = np.diff(x, prepend=x[0])
        delta_y = np.diff(y, prepend=y[0])
        dt = np.diff(times, prepend=times[0])
        speed = np.sqrt(delta_x**2 + delta_y**2) / (dt + 1e-6)
        distance = np.sqrt(delta_x**2 + delta_y**2)
        df = pd.DataFrame({
            'speed': speed,
            'distance': distance,
            'delta_x': delta_x,
            'delta_y': delta_y,
            'label': np.random.choice([0, 1], n_samples, p=[0.8, 0.2])
        })
        features = ['speed', 'distance', 'delta_x', 'delta_y']
    return df, features

# Combine benign and attack data
def create_combined_dataset(benign_df, data_type, attack_type, drift_type, n_attack=200, use_generator=False):
    attack_df = simulate_attacks(benign_df, data_type, attack_type, drift_type, n_attack, use_generator)
    combined_df = pd.concat([benign_df, attack_df], ignore_index=True)
    output_dir = '/Users/festusedward-n/Documents/Datasets'
    os.makedirs(output_dir, exist_ok=True)
    output_file = f'{output_dir}/{data_type}_{attack_type}_{drift_type}_data.csv'
    combined_df.to_csv(output_file, index=False)
    return combined_df

# Tune detector based on drift type
def get_tuned_detector(detector_class, drift_type):
    if drift_type in ['gradual', 'incremental', 'recurring']:
        if detector_class == ADWIN:
            return ADWIN(delta=0.0001)
        elif detector_class == PageHinkley:
            return PageHinkley(threshold=10)
        elif detector_class == KSWIN:
            return KSWIN(alpha=0.005)
        elif detector_class == PCDM:
            return PCDM(alpha=0.005)
        elif detector_class == RBFSVMDriftDetector:
            return RBFSVMDriftDetector(threshold=0.05)
        elif detector_class == HDDM_W:
            return HDDM_W(delta=0.0001)
        elif detector_class == HDDM_A:
            return HDDM_A(delta=0.0001)
        elif detector_class == DDM:
            return DDM(drift_level=2.0)
        elif detector_class == EDDM:
            return EDDM(drift_level=0.8)
    else:  # Abrupt
        if detector_class == ADWIN:
            return ADWIN(delta=0.001)
        return detector_class()

# Evaluate batch models
def evaluate_batch_model(name, model, X_scaled, y, features, window_size, step_size, true_drifts, drift_points, output_dir, drift_type, adaptive=True):
    window_results = []
    adaptation_latencies = []
    window_accs = []
    base_model = copy.deepcopy(model)
    autoencoder = AutoencoderDriftDetector(threshold_multiplier=3.0, max_iter=500)
    is_first_window = True

    detector_classes = [ADWIN, PageHinkley, KSWIN, DDM, EDDM, PCDM, RBFSVMDriftDetector, HDDM_W, HDDM_A]
    detectors = {cls.__name__: get_tuned_detector(cls, drift_type) for cls in detector_classes}
    detectors['AutoencoderDriftDetector'] = autoencoder

    for start in range(0, len(X_scaled) - window_size + 1, step_size):
        end = start + window_size
        X_window = X_scaled[start:end]
        y_window = y[start:end]
        if len(np.unique(y_window)) < 2:
            continue
        try:
            X_train, X_test, y_train, y_test = train_test_split(X_window, y_window, test_size=0.2, random_state=42, stratify=y_window)
            if len(np.unique(y_train)) < 2 or len(np.unique(y_test)) < 2:
                continue
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            cm = confusion_matrix(y_test, y_pred)
            tn, fp, fn, tp = cm.ravel() if cm.shape == (2, 2) else (0, 0, 0, 0)
            tpr = tp / (tp + fn) if (tp + fn) > 0 else 0
            fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
            tar = recall_score(y_test, y_pred, average='macro', zero_division=0) if len(np.unique(y_test)) > 1 else 0
            frr = 1 - tar
            window_metrics = {
                'Model': name,
                'Window': start,
                'Accuracy': accuracy_score(y_test, y_pred),
                'Precision': precision_score(y_test, y_pred, average='weighted', zero_division=0),
                'Recall': recall_score(y_test, y_pred, average='weighted', zero_division=0),
                'F1-Score': f1_score(y_test, y_pred, average='weighted', zero_division=0),
                'TPR': tpr,
                'FPR': fpr,
                'TAR': tar,
                'FRR': frr
            }
            if hasattr(model, 'predict_proba'):
                y_proba = model.predict_proba(X_test)[:, 1]
                window_metrics['ROC-AUC'] = roc_auc_score(y_test, y_proba) if len(np.unique(y_test)) > 1 else np.nan
            window_results.append(window_metrics)
            window_accs.append(window_metrics['Accuracy'])

            if is_first_window:
                autoencoder.fit(X_train)
                is_first_window = False

            drift_detected = False
            for i in range(len(X_window)):
                x = X_window[i]
                y_true = y_window[i]
                pred = model.predict([x])[0]
                error = 1.0 if pred != y_true else 0.0
                global_idx = start + i
                for det_name, det in detectors.items():
                    if det_name == 'DDM':
                        update = det.add_element(error)
                    elif det_name == 'EDDM':
                        update = det.add_element(pred, y_true)
                    elif det_name == 'PCDM':
                        update = det.add_element(x[0])
                    elif det_name == 'RBFSVMDriftDetector':
                        update = det.add_element(x, y_true)
                    else:
                        update = det.update(x[0])
                    if update:
                        key = f'{det_name.lower()}_{features[0]}'
                        if key in drift_points[name]:
                            drift_points[name][key].append(global_idx)
                        drift_detected = True
                        adaptation_latencies.append(global_idx)

            if adaptive and drift_detected:
                model = copy.deepcopy(base_model)
                model.fit(X_train, y_train)
                y_pred_post = model.predict(X_test)
                window_metrics['Post-Accuracy'] = accuracy_score(y_test, y_pred_post)

            window_results[-1] = window_metrics
        except Exception as e:
            print(f"Error evaluating {name} at window {start}: {e}")
    return window_results, adaptation_latencies, window_accs

# Evaluate streaming models
def evaluate_streaming_model(name, model, X_scaled, y, features, window_size, step_size, true_drifts, drift_points, output_dir, drift_type, adaptive=True):
    window_results = []
    adaptation_latencies = []
    window_accs = []
    base_model = copy.deepcopy(model)
    autoencoder = AutoencoderDriftDetector(threshold_multiplier=3.0, max_iter=500)
    is_first_window = True

    detector_classes = [ADWIN, PageHinkley, KSWIN, DDM, EDDM, PCDM, RBFSVMDriftDetector, HDDM_W, HDDM_A]
    detectors = {cls.__name__: get_tuned_detector(cls, drift_type) for cls in detector_classes}
    detectors['AutoencoderDriftDetector'] = autoencoder

    for start in range(0, len(X_scaled) - window_size + 1, step_size):
        end = start + window_size
        X_window = X_scaled[start:end]
        y_window = y[start:end]
        if len(np.unique(y_window)) < 2:
            continue
        try:
            y_pred = []
            for i in range(1, len(X_window)):
                sample = dict(zip(features, X_window[i]))
                pred = model.predict_one(sample) or 0
                y_pred.append(pred)
                model.learn_one(sample, y_window[i])
            y_true = y_window[1:]
            if len(y_pred) == 0:
                continue
            cm = confusion_matrix(y_true, y_pred)
            tn, fp, fn, tp = cm.ravel() if cm.shape == (2, 2) else (0, 0, 0, 0)
            tpr = tp / (tp + fn) if (tp + fn) > 0 else 0
            fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
            tar = recall_score(y_true, y_pred, average='macro', zero_division=0) if len(np.unique(y_true)) > 1 else 0
            frr = 1 - tar
            window_metrics = {
                'Model': name,
                'Window': start,
                'Accuracy': accuracy_score(y_true, y_pred),
                'Precision': precision_score(y_true, y_pred, average='weighted', zero_division=0),
                'Recall': recall_score(y_true, y_pred, average='weighted', zero_division=0),
                'F1-Score': f1_score(y_true, y_pred, average='weighted', zero_division=0),
                'TPR': tpr,
                'FPR': fpr,
                'TAR': tar,
                'FRR': frr
            }
            window_results.append(window_metrics)
            window_accs.append(window_metrics['Accuracy'])

            if is_first_window:
                autoencoder.fit(X_window[:int(0.8 * len(X_window))])
                is_first_window = False

            drift_detected = False
            for i in range(len(X_window)):
                x = X_window[i]
                y_true = y_window[i]
                pred = model.predict_one(dict(zip(features, x))) or 0
                error = 1.0 if pred != y_true else 0.0
                global_idx = start + i
                for det_name, det in detectors.items():
                    if det_name == 'DDM':
                        update = det.add_element(error)
                    elif det_name == 'EDDM':
                        update = det.add_element(pred, y_true)
                    elif det_name == 'PCDM':
                        update = det.add_element(x[0])
                    elif det_name == 'RBFSVMDriftDetector':
                        update = det.add_element(x, y_true)
                    else:
                        update = det.update(x[0])
                    if update:
                        key = f'{det_name.lower()}_{features[0]}'
                        if key in drift_points[name]:
                            drift_points[name][key].append(global_idx)
                        drift_detected = True
                        adaptation_latencies.append(global_idx)

            if adaptive and drift_detected:
                model = copy.deepcopy(base_model)
                for j in range(len(X_window)):
                    sample = dict(zip(features, X_window[j]))
                    model.learn_one(sample, y_window[j])
                y_pred_post = [model.predict_one(dict(zip(features, x))) or 0 for x in X_window[1:]]
                window_metrics['Post-Accuracy'] = accuracy_score(y_true, y_pred_post)

            window_results[-1] = window_metrics
        except Exception as e:
            print(f"Error evaluating {name} at window {start}: {e}")
    return window_results, adaptation_latencies, window_accs

# Main evaluation function with DDA and AL
def evaluate_models(data_df, data_type, features, attack_type, drift_type, window_size=100, step_size=10):
    try:
        X = data_df[features].values
        y = data_df['label'].values
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)
        models = {
            'Random Forest': RandomForestClassifier(n_estimators=50, max_depth=3, min_samples_split=10, random_state=42),  # Reduced complexity
            'SVM': SVC(kernel='rbf', C=0.5, probability=True, random_state=42),
            'KNN': KNeighborsClassifier(n_neighbors=5),
            'XGBoost': XGBClassifier(n_estimators=50, max_depth=2, learning_rate=0.01, reg_lambda=1.0, random_state=42),
            'Voting': VotingClassifier(estimators=[
                ('rf', RandomForestClassifier(n_estimators=50, max_depth=3, min_samples_split=10, random_state=42)),
                ('svm', SVC(kernel='rbf', C=0.5, probability=True, random_state=42)),
                ('knn', KNeighborsClassifier(n_neighbors=5))
            ], voting='soft'),
            'Snapshot RF': SnapshotEnsemble(RandomForestClassifier(n_estimators=50, max_depth=3, min_samples_split=10, random_state=42), n_snapshots=2),
            'Hoeffding Tree': HoeffdingTreeClassifier(grace_period=50, delta=0.01),  # Corrected parameter
            'HAT': HoeffdingAdaptiveTreeClassifier(grace_period=50, delta=0.01),    # Corrected parameter
            'Leveraging Bagging': LeveragingBaggingClassifier(model=HoeffdingTreeClassifier(grace_period=50, delta=0.01), n_models=5),
            'Online Boosting': AdaBoostClassifier(model=HoeffdingTreeClassifier(grace_period=50, delta=0.01), n_models=5),
            'OKD': OnlineKnowledgeDistillation(
                teacher_model=HoeffdingAdaptiveTreeClassifier(grace_period=50, delta=0.01),
                student_model=HoeffdingTreeClassifier(grace_period=50, delta=0.01),
                alpha=0.5
            )
        }
        if ENSEMBLE_AVAILABLE:
            if ENSEMBLE_NAME == 'ARF':
                models[ENSEMBLE_NAME] = EnsembleClassifier(n_estimators=5)
            else:  # SRP
                models[ENSEMBLE_NAME] = EnsembleClassifier(n_models=5)
        
        results = []
        detectors = ['adwin', 'page_hinkley', 'ddm', 'eddm', 'kswin', 'pcdm', 'rbf_svm', 'hddm_w', 'hddm_a', 'autoencoder']
        drift_points = {name: {f'{detector}_{feature}': [] for detector in detectors for feature in features} for name in models}
        true_drifts = [len(data_df[data_df['label'] == 0]) + 50]  # Drift near attack start
        adaptation_latencies = {name: [] for name in models}
        output_dir = f'/Users/festusedward-n/Documents/Datasets/{data_type}_{attack_type}_{drift_type}'
        os.makedirs(output_dir, exist_ok=True)
        
        streaming_models = ['Hoeffding Tree', 'HAT', ENSEMBLE_NAME, 'Leveraging Bagging', 'Online Boosting', 'OKD']
        
        for name, model in models.items():
            print(f"Training and evaluating {name}...")
            try:
                eval_func = evaluate_streaming_model if name in streaming_models else evaluate_batch_model
                
                # Adaptive run with consensus-based drift
                adaptive_results, adaptive_latencies, adaptive_accs = eval_func(name, model, X_scaled, y, features, window_size, step_size, true_drifts, drift_points, output_dir, drift_type, adaptive=True)
                adaptation_latencies[name] = adaptive_latencies
                
                # Static ablation run
                static_results, _, static_accs = eval_func(name, model, X_scaled, y, features, window_size, step_size, true_drifts, drift_points, output_dir, drift_type, adaptive=False)
                
                # Aggregate adaptive results
                aggregated = {
                    'Model': name,
                    'Accuracy_Mean': np.mean([r['Accuracy'] for r in adaptive_results]),
                    'Accuracy_CI_Low': bootstrap_ci([r['Accuracy'] for r in adaptive_results])[0],
                    'Accuracy_CI_High': bootstrap_ci([r['Accuracy'] for r in adaptive_results])[1],
                    'TPR_Mean': np.mean([r['TPR'] for r in adaptive_results]),
                    'TPR_CI_Low': bootstrap_ci([r['TPR'] for r in adaptive_results])[0],
                    'TPR_CI_High': bootstrap_ci([r['TPR'] for r in adaptive_results])[1],
                    'FRR_Mean': np.mean([r['FRR'] for r in adaptive_results]),
                    'FRR_CI_Low': bootstrap_ci([r['FRR'] for r in adaptive_results])[0],
                    'FRR_CI_High': bootstrap_ci([r['FRR'] for r in adaptive_results])[1],
                    'Precision_Mean': np.mean([r['Precision'] for r in adaptive_results]),
                    'Recall_Mean': np.mean([r['Recall'] for r in adaptive_results]),
                    'F1_Mean': np.mean([r['F1-Score'] for r in adaptive_results])
                }
                
                # t-test vs static
                if len(adaptive_accs) == len(static_accs) and adaptive_accs and static_accs:
                    t_stat, p_val = ttest_rel(adaptive_accs, static_accs)
                    aggregated['Delta_Acc_vs_Static'] = np.mean(adaptive_accs) - np.mean(static_accs)
                    aggregated['p_value_vs_Static'] = p_val
                else:
                    aggregated['Delta_Acc_vs_Static'] = np.nan
                    aggregated['p_value_vs_Static'] = np.nan
                
                # Compute DDA and AL
                total_detections = sum(len(v) for v in drift_points[name].values())
                correct_detections = sum(len([d for d in detected if any(abs(d - t) < 50 for t in true_drifts)]) for detected in drift_points[name].values())
                aggregated['DDA_Mean'] = correct_detections / max(1, total_detections) if total_detections else 0.0
                aggregated['AL_Mean'] = np.mean(adaptation_latencies[name]) if adaptation_latencies[name] else float('inf')
                
                if 'Post-Accuracy' in adaptive_results[0]:
                    aggregated['Post_Accuracy_Mean'] = np.mean([r['Post-Accuracy'] for r in adaptive_results])
                
                if name in ['Random Forest', 'SVM', 'KNN', 'XGBoost', 'Voting', 'Snapshot RF']:
                    roc_vals = [r['ROC-AUC'] for r in adaptive_results if 'ROC-AUC' in r and not np.isnan(r['ROC-AUC'])]
                    aggregated['ROC_AUC_Mean'] = np.mean(roc_vals)
                
                results.append(aggregated)
            except Exception as e:
                print(f"Failed to evaluate {name}: {e}")
        
        results_df = pd.DataFrame(results)
        output_file = f'{output_dir}/results.csv'
        results_df.to_csv(output_file, index=False)
        
        print(f"\nResults for {data_type}_{attack_type}_{drift_type}:")
        print(results_df[['Model', 'Accuracy_Mean', 'TPR_Mean', 'FRR_Mean', 'DDA_Mean', 'AL_Mean', 'Delta_Acc_vs_Static', 'p_value_vs_Static']])
        print("Drift Points summary:", {k: len(v) for k, v in drift_points.items() if v})
        
        # Plotting (simplified)
        plt.figure(figsize=(12, 6))
        plt.savefig(f'{output_dir}/f1.png', dpi=300, bbox_inches='tight')
        plt.close()
        
        return results_df, drift_points
    except Exception as e:
        print(f"Error evaluating {data_type} dataset: {e}")
        return None, None

# Load and preprocess data
def load_data(data_file, data_type):
    try:
        df = pd.read_csv(data_file)
        print(f"Initial {data_type} Rows: {len(df)}")
        if data_type == 'keystroke':
            features = ['dwell_time', 'flight_time', 'up_down_time', 'session_duration', 'rhythm']
        else:  # mouse
            features = ['speed', 'distance']
            if 'delta' in df.columns:
                try:
                    df['delta_x'] = df['delta'].apply(lambda x: float(x.split(',')[0].strip('() ')) if isinstance(x, str) else x[0] if isinstance(x, (list, tuple)) else np.nan)
                    df['delta_y'] = df['delta'].apply(lambda x: float(x.split(',')[1].strip('() ')) if isinstance(x, str) else x[1] if isinstance(x, (list, tuple)) else np.nan)
                    features += ['delta_x', 'delta_y']
                except Exception as e:
                    print(f"Warning: Failed to parse 'delta' column in {data_type} dataset. Error: {e}")
                    df = df.drop(columns=['delta'])
        for feature in features:
            if feature not in df.columns:
                raise ValueError(f"Feature '{feature}' not found in {data_type} dataset")
        df = df.dropna(subset=features)
        print(f"{data_type} Rows after NaN drop: {len(df)}")
        df['label'] = 0  # Benign
        return df, features
    except Exception as e:
        print(f"Error loading {data_type} dataset: {e}. Using synthetic data.")
        return generate_synthetic_data(data_type)

# Synthetic data generator
def generate_synthetic_data(data_type, n_samples=1000):
    np.random.seed(42)
    if data_type == 'keystroke':
        df = pd.DataFrame({
            'dwell_time': np.random.normal(100, 20, n_samples),
            'flight_time': np.random.normal(150, 30, n_samples),
            'up_down_time': np.random.normal(120, 25, n_samples),
            'session_duration': np.random.normal(500, 100, n_samples),
            'rhythm': np.random.normal(0.8, 0.1, n_samples),
            'label': np.random.choice([0, 1], n_samples, p=[0.8, 0.2])  # 80% benign
        })
        features = ['dwell_time', 'flight_time', 'up_down_time', 'session_duration', 'rhythm']
    else:  # mouse
        times = np.cumsum(np.random.exponential(0.1, n_samples))
        x = np.cumsum(np.random.normal(0, 10, n_samples))
        y = np.cumsum(np.random.normal(0, 10, n_samples))
        delta_x = np.diff(x, prepend=x[0])
        delta_y = np.diff(y, prepend=y[0])
        dt = np.diff(times, prepend=times[0])
        speed = np.sqrt(delta_x**2 + delta_y**2) / (dt + 1e-6)
        distance = np.sqrt(delta_x**2 + delta_y**2)
        df = pd.DataFrame({
            'speed': speed,
            'distance': distance,
            'delta_x': delta_x,
            'delta_y': delta_y,
            'label': np.random.choice([0, 1], n_samples, p=[0.8, 0.2])
        })
        features = ['speed', 'distance', 'delta_x', 'delta_y']
    return df, features

# Combine benign and attack data
def create_combined_dataset(benign_df, data_type, attack_type, drift_type, n_attack=200, use_generator=False):
    attack_df = simulate_attacks(benign_df, data_type, attack_type, drift_type, n_attack, use_generator)
    combined_df = pd.concat([benign_df, attack_df], ignore_index=True)
    output_dir = '/Users/festusedward-n/Documents/Datasets'
    os.makedirs(output_dir, exist_ok=True)
    output_file = f'{output_dir}/{data_type}_{attack_type}_{drift_type}_data.csv'
    combined_df.to_csv(output_file, index=False)
    return combined_df

# Tune detector based on drift type
def get_tuned_detector(detector_class, drift_type):
    if drift_type in ['gradual', 'incremental', 'recurring']:
        if detector_class == ADWIN:
            return ADWIN(delta=0.0001)
        elif detector_class == PageHinkley:
            return PageHinkley(threshold=10)
        elif detector_class == KSWIN:
            return KSWIN(alpha=0.005)
        elif detector_class == PCDM:
            return PCDM(alpha=0.005)
        elif detector_class == RBFSVMDriftDetector:
            return RBFSVMDriftDetector(threshold=0.05)
        elif detector_class == HDDM_W:
            return HDDM_W(delta=0.0001)
        elif detector_class == HDDM_A:
            return HDDM_A(delta=0.0001)
        elif detector_class == DDM:
            return DDM(drift_level=2.0)
        elif detector_class == EDDM:
            return EDDM(drift_level=0.8)
    else:  # Abrupt
        if detector_class == ADWIN:
            return ADWIN(delta=0.001)
        return detector_class()

# Main execution
if __name__ == '__main__':
    attack_types = [
        ('session_hijacking', 'abrupt'),
        ('mitm', 'recurring'),
        ('card_skimming', 'abrupt'),
        ('phishing', 'gradual'),
        ('identity_theft', 'incremental')
    ]
    data_types = ['keystroke', 'mouse']
    datasets = {
        'keystroke': '/Users/festusedward-n/Documents/Datasets/imputed_keystroke_data.csv',
        'mouse': '/Users/festusedward-n/Documents/Datasets/mouse_modified_trimmed_clean_imputed.csv'
    }
    for data_type, data_file in datasets.items():
        benign_df, features = load_data(data_file, data_type)
        if benign_df is None:
            continue
        for attack_type, drift_type in attack_types:
            print(f"\nCreating dataset for {data_type}_{attack_type}_{drift_type}")
            combined_df = create_combined_dataset(benign_df, data_type, attack_type, drift_type, use_generator=True)
            print(f"Evaluating models for {data_type}_{attack_type}_{drift_type}")
            results_df, drift_points = evaluate_models(combined_df, data_type, features, attack_type, drift_type, window_size=100, step_size=10)
            if results_df is not None:
                print(f"\nResults for {data_type}_{attack_type}_{drift_type}:")
                print(results_df[['Model', 'Accuracy_Mean', 'TPR_Mean', 'FRR_Mean', 'DDA_Mean', 'AL_Mean', 'Delta_Acc_vs_Static', 'p_value_vs_Static']])
                print("Drift Points summary:", {k: len(v) for k, v in drift_points.items() if v})

Initial keystroke Rows: 19996
keystroke Rows after NaN drop: 19996

Creating dataset for keystroke_session_hijacking_abrupt
Evaluating models for keystroke_session_hijacking_abrupt
Training and evaluating Random Forest...
Failed to evaluate Random Forest: list index out of range
Training and evaluating SVM...
Failed to evaluate SVM: list index out of range
Training and evaluating KNN...
Failed to evaluate KNN: list index out of range
Training and evaluating XGBoost...
Failed to evaluate XGBoost: list index out of range
Training and evaluating Voting...
Failed to evaluate Voting: list index out of range
Training and evaluating Snapshot RF...
Failed to evaluate Snapshot RF: list index out of range
Training and evaluating Hoeffding Tree...
Failed to evaluate Hoeffding Tree: list index out of range
Training and evaluating HAT...
Failed to evaluate HAT: list index out of range
Training and evaluating Leveraging Bagging...
Failed to evaluate Leveraging Bagging: list index out of range
Traini