In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_validate
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve, confusion_matrix
from sklearn.neural_network import MLPRegressor
from river.drift import ADWIN, PageHinkley, KSWIN
from river.tree import HoeffdingTreeClassifier, HoeffdingAdaptiveTreeClassifier
from river.ensemble import LeveragingBaggingClassifier, AdaBoostClassifier
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from xgboost import XGBClassifier
import copy
import os
import torch
import torch.nn as nn
warnings.filterwarnings('ignore')

# Try importing ARFClassifier or SRPClassifier
ENSEMBLE_AVAILABLE = False
try:
    from river.ensemble import ARFClassifier
    EnsembleClassifier = ARFClassifier
    ENSEMBLE_NAME = 'ARF'
    ENSEMBLE_AVAILABLE = True
except ImportError:
    try:
        from river.ensemble import SRPClassifier
        EnsembleClassifier = SRPClassifier
        ENSEMBLE_NAME = 'SRP'
        ENSEMBLE_AVAILABLE = True
    except ImportError:
        print("Warning: Neither ARFClassifier nor SRPClassifier found. Excluding ensemble model.")

# Set random seed
np.random.seed(42)
torch.manual_seed(42)

# Custom DDM implementation
class DDM:
    def __init__(self, min_num_instances=30, warning_level=2.0, drift_level=3.0):
        self.min_num_instances = min_num_instances
        self.warning_level = warning_level
        self.drift_level = drift_level
        self.mean = 0.0
        self.std = 0.0
        self.n = 0
        self.drift_detected = False
        self.mean_min = float('inf')
        self.std_min = float('inf')
    
    def add_element(self, error):
        self.n += 1
        if self.n == 1:
            self.mean = error
            self.std = 0.0
        else:
            old_mean = self.mean
            self.mean += (error - old_mean) / self.n
            self.std = np.sqrt((self.std**2 * (self.n - 1) + (error - self.mean) * (error - old_mean)) / self.n)
        if self.n >= self.min_num_instances:
            if self.mean + self.std > self.mean_min + self.drift_level * self.std_min:
                self.drift_detected = True
            else:
                self.drift_detected = False
            if not self.drift_detected:
                self.mean_min = min(self.mean_min, self.mean)
                self.std_min = min(self.std_min, self.std)
        return self.drift_detected

# Custom EDDM implementation
class EDDM:
    def __init__(self, min_num_instances=30, warning_level=0.95, drift_level=0.9):
        self.min_num_instances = min_num_instances
        self.warning_level = warning_level
        self.drift_level = drift_level
        self.mean = 0.0
        self.std = 0.0
        self.n = 0
        self.last_error = 0
        self.distances = []
        self.drift_detected = False
        self.max_mean = 0.0
    
    def add_element(self, prediction, true_label):
        error = 1 if prediction != true_label else 0
        if self.n > 0 and error == 1:
            self.distances.append(self.n - self.last_error)
        if error == 1:
            self.last_error = self.n
        self.n += 1
        if len(self.distances) > 1:
            self.mean = np.mean(self.distances)
            self.std = np.std(self.distances)
        if self.n >= self.min_num_instances and len(self.distances) > 1:
            m = (self.mean + 2 * self.std) / self.max_mean if self.max_mean > 0 else float('inf')
            if m < self.drift_level:
                self.drift_detected = True
            else:
                self.drift_detected = False
            if not self.drift_detected:
                self.max_mean = max(self.max_mean, self.mean + 2 * self.std)
        return self.drift_detected

# Custom PCDM implementation
class PCDM:
    def __init__(self, window_size=50, n_permutations=100, alpha=0.01):
        self.window_size = window_size
        self.n_permutations = n_permutations
        self.alpha = alpha
        self.reference_window = []
        self.current_window = []
        self.drift_detected = False
    
    def add_element(self, value):
        self.current_window.append(value)
        if len(self.current_window) > self.window_size:
            self.current_window.pop(0)
        if len(self.reference_window) < self.window_size:
            self.reference_window.append(value)
            return False
        if len(self.current_window) == self.window_size:
            stat, p_value = self._permutation_test()
            self.drift_detected = p_value < self.alpha
            if self.drift_detected:
                self.reference_window = self.current_window.copy()
        return self.drift_detected
    
    def _permutation_test(self):
        ref = np.array(self.reference_window)
        curr = np.array(self.current_window)
        observed_diff = np.abs(np.mean(ref) - np.mean(curr))
        combined = np.concatenate([ref, curr])
        perm_diffs = []
        for _ in range(self.n_permutations):
            np.random.shuffle(combined)
            perm_ref = combined[:self.window_size]
            perm_curr = combined[self.window_size:]
            perm_diffs.append(np.abs(np.mean(perm_ref) - np.mean(perm_curr)))
        p_value = np.sum(np.array(perm_diffs) >= observed_diff) / self.n_permutations
        return observed_diff, p_value

# Custom RBFSVMDriftDetector
class RBFSVMDriftDetector:
    def __init__(self, window_size=50, threshold=0.1):
        self.window_size = window_size
        self.threshold = threshold
        self.reference_window = []
        self.current_window = []
        self.svm = SVC(kernel='rbf', C=1.0, probability=True)
        self.drift_detected = False
    
    def add_element(self, x, y):
        self.current_window.append((x, y))
        if len(self.current_window) > self.window_size:
            self.current_window.pop(0)
        if len(self.reference_window) < self.window_size:
            self.reference_window.append((x, y))
            return False
        if len(self.current_window) == self.window_size:
            margin_density = self._compute_margin_density()
            self.drift_detected = margin_density > self.threshold
            if self.drift_detected:
                self.reference_window = self.current_window.copy()
        return self.drift_detected
    
    def _compute_margin_density(self):
        X_ref, y_ref = zip(*self.reference_window)
        X_curr, y_curr = zip(*self.current_window)
        X_ref, X_curr = np.array(X_ref), np.array(X_curr)
        y_ref, y_curr = np.array(y_ref), np.array(y_curr)
        self.svm.fit(X_ref, y_ref)
        decision_scores = self.svm.decision_function(X_curr)
        margin_density = np.mean(np.abs(decision_scores) < 1.0)
        return margin_density

# Custom HDDM_W implementation
class HDDM_W:
    def __init__(self, window_size=50, delta=0.001):
        self.window_size = window_size
        self.delta = delta
        self.reference_window = []
        self.current_window = []
        self.drift_detected = False
    
    def _hellinger_distance(self, hist1, hist2):
        return np.sqrt(np.sum((np.sqrt(hist1) - np.sqrt(hist2))**2)) / np.sqrt(2)
    
    def add_element(self, value):
        self.current_window.append(value)
        if len(self.current_window) > self.window_size:
            self.current_window.pop(0)
        if len(self.reference_window) < self.window_size:
            self.reference_window.append(value)
            return False
        if len(self.current_window) == self.window_size:
            hist_ref, _ = np.histogram(self.reference_window, bins=10, density=True)
            hist_curr, _ = np.histogram(self.current_window, bins=10, density=True)
            distance = self._hellinger_distance(hist_ref, hist_curr)
            self.drift_detected = distance > self.delta
            if self.drift_detected:
                self.reference_window = self.current_window.copy()
        return self.drift_detected

# Custom HDDM_A implementation
class HDDM_A:
    def __init__(self, window_size=50, delta=0.001):
        self.window_size = window_size
        self.delta = delta
        self.reference_window = []
        self.current_window = []
        self.drift_detected = False
    
    def _hellinger_distance(self, hist1, hist2):
        return np.sqrt(np.sum((np.sqrt(hist1) - np.sqrt(hist2))**2)) / np.sqrt(2)
    
    def add_element(self, value):
        self.current_window.append(value)
        if len(self.current_window) > self.window_size:
            self.current_window.pop(0)
        if len(self.reference_window) < self.window_size:
            self.reference_window.append(value)
            return False
        if len(self.current_window) == self.window_size:
            hist_ref, _ = np.histogram(self.reference_window, bins=10, density=True)
            hist_curr, _ = np.histogram(self.current_window, bins=10, density=True)
            distance = self._hellinger_distance(hist_ref, hist_curr)
            self.drift_detected = distance > self.delta
            if self.drift_detected:
                self.reference_window = self.current_window.copy()
        return self.drift_detected

# Improved Autoencoder Drift Detector
class AutoencoderDriftDetector:
    def __init__(self, threshold_multiplier=3.0, max_iter=500):
        self.threshold_multiplier = threshold_multiplier
        self.max_iter = max_iter
        self.autoencoder = MLPRegressor(hidden_layer_sizes=(10,), max_iter=max_iter, random_state=42)
        self.reference_mse = None
        self.feature_std = None
    
    def fit(self, X_ref):
        self.feature_std = np.std(X_ref, axis=0) + 1e-6
        self.autoencoder.fit(X_ref, X_ref)
        recon = self.autoencoder.predict(X_ref)
        mse = np.mean((X_ref - recon) ** 2, axis=1)
        self.reference_mse = np.mean(mse)
        self.threshold = self.threshold_multiplier * self.reference_mse
    
    def add_element(self, x, ref=False):
        if ref:
            return False
        recon = self.autoencoder.predict([x])
        mse = np.mean(((x - recon) ** 2) / (self.feature_std ** 2))
        drift_detected = mse > self.threshold
        return drift_detected

# Snapshot Ensemble
class SnapshotEnsemble:
    def __init__(self, base_model, n_snapshots=3):
        self.base_model = base_model
        self.n_snapshots = n_snapshots
        self.snapshots = []
    
    def fit(self, X, y):
        self.snapshots = []
        for _ in range(self.n_snapshots):
            model = copy.deepcopy(self.base_model)
            model.fit(X, y)
            self.snapshots.append(model)
    
    def predict(self, X):
        predictions = np.array([model.predict(X) for model in self.snapshots])
        return np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
    
    def predict_proba(self, X):
        probas = np.array([model.predict_proba(X) for model in self.snapshots])
        return np.mean(probas, axis=0)

# Online Knowledge Distillation
class OnlineKnowledgeDistillation:
    def __init__(self, teacher_model, student_model, alpha=0.5):
        self.teacher = teacher_model
        self.student = student_model
        self.alpha = alpha
    
    def learn_one(self, x, y):
        teacher_pred = self.teacher.predict_one(x)
        self.student.learn_one(x, y)
        return teacher_pred
    
    def predict_one(self, x):
        return self.student.predict_one(x)

# Keystroke/Mouse Generator using RNN
class FeatureGenerator(nn.Module):
    def __init__(self, input_size, hidden_size=64):
        super().__init__()
        self.rnn = nn.LSTM(input_size, hidden_size)
        self.linear = nn.Linear(hidden_size, input_size)
    
    def forward(self, x):
        out, _ = self.rnn(x)
        return self.linear(out)

# Load and preprocess data (unchanged from original)
def load_data(data_file, data_type, dataset_name):
    try:
        if data_type == 'keystroke' and dataset_name == 'cmu':
            features = ['dwell_time', 'flight_time', 'up_down_time', 'session_duration', 'rhythm']
            df = pd.read_csv(data_file)
            print(f"Initial {data_type} Rows (CMU): {len(df)}")
            print(f"Columns in CMU dataset: {df.columns.tolist()}")
            
            hold_cols = [col for col in df.columns if col.startswith('H')]
            dd_cols = [col for col in df.columns if col.startswith('DD')]
            ud_cols = [col for col in df.columns if col.startswith('UD')]
            
            if hold_cols and dd_cols and ud_cols:
                df['dwell_time'] = df[hold_cols].mean(axis=1)
                df['flight_time'] = df[dd_cols].mean(axis=1)
                df['up_down_time'] = df[ud_cols].mean(axis=1)
                df['session_duration'] = df[hold_cols].sum(axis=1) + df[dd_cols].sum(axis=1)
                df['rhythm'] = df[hold_cols].std(axis=1)
            else:
                key_cols = [col for col in df.columns if 'press' in col.lower()]
                release_cols = [col for col in df.columns if 'release' in col.lower()]
                
                if len(key_cols) > 0 and len(release_cols) > 0:
                    key_cols = sorted(key_cols)
                    release_cols = sorted(release_cols)
                    if len(key_cols) != len(release_cols):
                        raise ValueError("Mismatched press and release columns")
                    
                    dwell_times = [df[release_cols[i]] - df[key_cols[i]] for i in range(len(key_cols))]
                    if not dwell_times:
                        raise ValueError("No valid dwell times computed")
                    df['dwell_time'] = pd.concat(dwell_times, axis=1).mean(axis=1)
                    
                    flight_times = [df[key_cols[i+1]] - df[release_cols[i]] for i in range(len(key_cols)-1)]
                    if not flight_times:
                        raise ValueError("No valid flight times computed")
                    df['flight_time'] = pd.concat(flight_times, axis=1).mean(axis=1)
                    
                    df['up_down_time'] = df['flight_time']
                    df['session_duration'] = df[release_cols[-1]] - df[key_cols[0]]
                    df['rhythm'] = pd.concat(dwell_times, axis=1).std(axis=1)
                else:
                    raise ValueError("No recognizable press/release or precomputed columns found")
            
            # Increase sample size if possible
            if len(df) > 5000:
                df = df.sample(n=5000, random_state=42)
                print(f"Subsampled {data_type} Rows (CMU): {len(df)}")
            
            df = df.dropna(subset=features)
            print(f"{data_type} Rows after NaN drop (CMU): {len(df)}")
            df['label'] = 0
            return df, features
        elif data_type == 'mouse' and dataset_name == 'ub':
            features = ['speed', 'distance', 'delta_x', 'delta_y']
            dfs = []
            for file_name in os.listdir(data_file):
                if file_name.endswith('.txt'):
                    file_path = os.path.join(data_file, file_name)
                    try:
                        for encoding in ['utf-8', 'latin1', 'iso-8859-1']:
                            try:
                                df_temp = pd.read_csv(file_path, sep='\t', encoding=encoding, on_bad_lines='skip')
                                print(f"Successfully read {file_name} with encoding {encoding}")
                                dfs.append(df_temp)
                                break
                            except UnicodeDecodeError:
                                continue
                        else:
                            print(f"Failed to read {file_name} with any encoding")
                    except Exception as e:
                        print(f"Error reading {file_name}: {e}")
            
            if not dfs:
                raise ValueError("No valid .txt files loaded from UB dataset")
            
            df = pd.concat(dfs, ignore_index=True)
            print(f"Initial {data_type} Rows (UB): {len(df)}")
            print(f"Columns in UB dataset: {df.columns.tolist()}")
            
            if 'event_type' in df.columns:
                df = df[df['event_type'].str.contains('move', case=False, na=False)]
            elif 'EventType' in df.columns:
                df = df[df['EventType'].str.contains('move', case=False, na=False)]
            
            if 'timestamp' in df.columns:
                df = df.sort_values(by='timestamp')
            elif 'Timestamp' in df.columns:
                df = df.sort_values(by='Timestamp')
                df = df.rename(columns={'Timestamp': 'timestamp'})
            
            x_col = 'x' if 'x' in df.columns else 'X'
            y_col = 'y' if 'y' in df.columns else 'Y'
            if x_col not in df.columns or y_col not in df.columns:
                raise ValueError(f"Missing x/y columns in UB dataset; found: {df.columns}")
            
            df['delta_x'] = df[x_col].diff()
            df['delta_y'] = df[y_col].diff()
            df['distance'] = np.sqrt(df['delta_x']**2 + df['delta_y']**2)
            df['time_diff'] = df['timestamp'].diff()
            df['speed'] = df['distance'] / df['time_diff'].replace(0, np.nan)
            
            if len(df) > 5000:
                df = df.sample(n=5000, random_state=42)
                print(f"Subsampled {data_type} Rows (UB): {len(df)}")
            
            df = df.dropna(subset=features)
            print(f"{data_type} Rows after NaN drop (UB): {len(df)}")
            df['label'] = 0
            return df, features
        else:
            raise ValueError(f"Unsupported dataset_name: {dataset_name} for data_type: {data_type}")
    except Exception as e:
        print(f"Error loading {data_type} dataset ({dataset_name}): {e}")
        return None, None

# Updated simulate_attacks
def simulate_attacks(data, data_type, attack_type, drift_type, n_attack=500, use_generator=False):
    attack_data = data.copy().sample(n=n_attack, random_state=42)
    if data_type == 'keystroke':
        features = ['dwell_time', 'flight_time', 'up_down_time', 'session_duration', 'rhythm']
    else:
        features = ['speed', 'distance', 'delta_x', 'delta_y']
    
    if use_generator:
        input_size = len(features)
        generator = FeatureGenerator(input_size)
        optimizer = torch.optim.Adam(generator.parameters(), lr=0.001)
        criterion = nn.MSELoss()
        benign_tensor = torch.tensor(data[features].values[:n_attack], dtype=torch.float32).unsqueeze(1)
        for epoch in range(10):
            out = generator(benign_tensor)
            loss = criterion(out, benign_tensor)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        with torch.no_grad():
            perturbed = generator(benign_tensor).squeeze(1).numpy()
        feature_stds = np.std(perturbed, axis=0) + 1e-6
        noise = np.random.normal(0, 0.02 * feature_stds, perturbed.shape)
        attack_data[features] = perturbed + noise
    else:
        noise_scale = 0.02
        feature_stds = attack_data[features].std() + 1e-6
        if data_type == 'keystroke':
            if attack_type == 'session_hijacking' and drift_type == 'abrupt':
                attack_data['dwell_time'] *= 1.05 + np.random.normal(0, 0.02, n_attack)
                attack_data['flight_time'] *= 1.05 + np.random.normal(0, 0.02, n_attack)
            elif attack_type == 'mitm' and drift_type == 'recurring':
                mask = attack_data.index % 2 == 0
                attack_data.loc[mask, 'dwell_time'] *= 1.05 + np.random.normal(0, 0.02, sum(mask))
                attack_data.loc[~mask, 'dwell_time'] *= 0.95 + np.random.normal(0, 0.02, sum(~mask))
            elif attack_type == 'card_skimming' and drift_type == 'abrupt':
                attack_data['flight_time'] *= 1.05 + np.random.normal(0, 0.02, n_attack)
                attack_data['rhythm'] += np.random.normal(0, 0.02, n_attack)
            elif attack_type == 'phishing' and drift_type == 'gradual':
                attack_data['dwell_time'] += np.linspace(0, 0.05, n_attack) * attack_data['dwell_time'] + np.random.normal(0, noise_scale, n_attack)
            elif attack_type == 'identity_theft' and drift_type == 'incremental':
                attack_data['dwell_time'] += np.linspace(0, 0.03, n_attack) * attack_data['dwell_time'] + np.random.normal(0, noise_scale, n_attack)
        else:
            if attack_type == 'session_hijacking' and drift_type == 'abrupt':
                attack_data['speed'] *= 1.05 + np.random.normal(0, 0.02, n_attack)
                attack_data['distance'] *= 1.05 + np.random.normal(0, 0.02, n_attack)
            elif attack_type == 'mitm' and drift_type == 'recurring':
                mask = attack_data.index % 2 == 0
                attack_data.loc[mask, 'speed'] *= 1.05 + np.random.normal(0, 0.02, sum(mask))
                attack_data.loc[~mask, 'speed'] *= 0.95 + np.random.normal(0, 0.02, sum(~mask))
            elif attack_type == 'card_skimming' and drift_type == 'abrupt':
                attack_data['speed'] *= 1.05 + np.random.normal(0, 0.02, n_attack)
                attack_data['distance'] += np.random.normal(0, 0.02, n_attack)
            elif attack_type == 'phishing' and drift_type == 'gradual':
                attack_data['speed'] += np.linspace(0, 0.05, n_attack) * attack_data['speed'] + np.random.normal(0, noise_scale, n_attack)
            elif attack_type == 'identity_theft' and drift_type == 'incremental':
                attack_data['speed'] += np.linspace(0, 0.03, n_attack) * attack_data['speed'] + np.random.normal(0, noise_scale, n_attack)
        for feature in features:
            attack_data[feature] += np.random.normal(0, noise_scale * feature_stds[feature], n_attack)
    attack_data['label'] = 1
    return attack_data

# Updated create_combined_dataset
def create_combined_dataset(benign_df, data_type, attack_type, drift_type, n_attack=500, use_generator=False):
    attack_df = simulate_attacks(benign_df, data_type, attack_type, drift_type, n_attack, use_generator)
    combined_df = pd.concat([benign_df, attack_df], ignore_index=True).sample(frac=1, random_state=42).reset_index(drop=True)
    output_dir = '/Users/festusedward-n/Documents/Datasets'
    os.makedirs(output_dir, exist_ok=True)
    output_file = f'{output_dir}/{data_type}_{attack_type}_{drift_type}_results.csv'
    combined_df.to_csv(output_file, index=False)
    return combined_df

# Tune detector (unchanged)
def get_tuned_detector(detector_class, drift_type):
    if drift_type in ['gradual', 'incremental', 'recurring']:
        if detector_class == ADWIN:
            return ADWIN(delta=0.0001)
        elif detector_class == PageHinkley:
            return PageHinkley(threshold=10)
        elif detector_class == KSWIN:
            return KSWIN(alpha=0.005)
        elif detector_class == PCDM:
            return PCDM(alpha=0.005)
        elif detector_class == RBFSVMDriftDetector:
            return RBFSVMDriftDetector(threshold=0.05)
        elif detector_class == HDDM_W:
            return HDDM_W(delta=0.0001)
        elif detector_class == HDDM_A:
            return HDDM_A(delta=0.0001)
        elif detector_class == DDM:
            return DDM(drift_level=2.0)
        elif detector_class == EDDM:
            return EDDM(drift_level=0.8)
    else:
        if detector_class == ADWIN:
            return ADWIN(delta=0.001)
        return detector_class()

# Updated evaluate_batch_model
def evaluate_batch_model(name, model, X_scaled, y, features, window_size, step_size, true_drifts, drift_points, output_dir, drift_type):
    window_results = []
    adaptation_latencies = []
    base_model = copy.deepcopy(model)
    autoencoder = AutoencoderDriftDetector(threshold_multiplier=3.0, max_iter=500)
    is_first_window = True
    
    for start in range(0, len(X_scaled) - window_size + 1, step_size):
        end = start + window_size
        X_window = X_scaled[start:end]
        y_window = y[start:end]
        if len(np.unique(y_window)) < 2:
            print(f"Skipping window {start} due to single class")
            continue
        try:
            scoring = {
                'accuracy': 'accuracy',
                'precision': 'precision_weighted',
                'recall': 'recall_weighted',
                'f1': 'f1_weighted',
                'roc_auc': 'roc_auc' if hasattr(model, 'predict_proba') else None
            }
            cv_results = cross_validate(model, X_window, y_window, cv=5, scoring=scoring)
            window_metrics = {
                'Model': name,
                'Window': start,
                'Accuracy': np.mean(cv_results['test_accuracy']),
                'Precision': np.mean(cv_results['test_precision']),
                'Recall': np.mean(cv_results['test_recall']),
                'F1-Score': np.mean(cv_results['test_f1']),
            }
            if 'test_roc_auc' in cv_results:
                window_metrics['ROC-AUC'] = np.mean(cv_results['test_roc_auc'])
            
            model.fit(X_window, y_window)
            y_pred = model.predict(X_window)
            cm = confusion_matrix(y_window, y_pred)
            tn, fp, fn, tp = cm.ravel() if cm.shape == (2, 2) else (0, 0, 0, 0)
            tpr = tp / (tp + fn) if (tp + fn) > 0 else 0
            fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
            tar = recall_score(y_window, y_pred, average='macro', zero_division=0) if len(np.unique(y_window)) > 1 else 0
            frr = 1 - tar
            window_metrics.update({
                'TPR': tpr,
                'FPR': fpr,
                'TAR': tar,
                'FRR': frr
            })
            
            if is_first_window:
                autoencoder.fit(X_window[:int(0.8 * len(X_window))])
                is_first_window = False
            
            drift_detected = False
            for f_idx, feature in enumerate(features):
                adwin = get_tuned_detector(ADWIN, drift_type)
                ph = get_tuned_detector(PageHinkley, drift_type)
                ddm = get_tuned_detector(DDM, drift_type)
                eddm = get_tuned_detector(EDDM, drift_type)
                kswin = get_tuned_detector(KSWIN, drift_type)
                pcdm = get_tuned_detector(PCDM, drift_type)
                rbf_svm = get_tuned_detector(RBFSVMDriftDetector, drift_type)
                hddm_w = get_tuned_detector(HDDM_W, drift_type)
                hddm_a = get_tuned_detector(HDDM_A, drift_type)
                for i, (x, y_true) in enumerate(zip(X_window, y_window)):
                    pred = model.predict([x])[0]
                    error = 1.0 if pred != y_true else 0.0
                    global_idx = start + i
                    updates = [
                        (adwin.update(x[f_idx]), f'adwin_{feature}'),
                        (ph.update(x[f_idx]), f'page_hinkley_{feature}'),
                        (ddm.add_element(error), f'ddm_{feature}'),
                        (eddm.add_element(pred, y_true), f'eddm_{feature}'),
                        (kswin.update(x[f_idx]), f'kswin_{feature}'),
                        (pcdm.add_element(x[f_idx]), f'pcdm_{feature}'),
                        (rbf_svm.add_element(x, y_true), f'rbf_svm_{feature}'),
                        (hddm_w.add_element(x[f_idx]), f'hddm_w_{feature}'),
                        (hddm_a.add_element(x[f_idx]), f'hddm_a_{feature}'),
                        (autoencoder.add_element(x), f'autoencoder_{feature}')
                    ]
                    for update, key in updates:
                        if update:
                            drift_points[name][key].append(global_idx)
                            drift_detected = True
            if drift_detected:
                model = copy.deepcopy(base_model)
                model.fit(X_window, y_window)
                y_pred_post = model.predict(X_window)
                window_metrics['Post-Accuracy'] = accuracy_score(y_window, y_pred_post)
            window_results.append(window_metrics)
        except Exception as e:
            print(f"Error evaluating {name} at window {start}: {e}")
    return window_results, adaptation_latencies

# Evaluate streaming models (unchanged from original)
def evaluate_streaming_model(name, model, X_scaled, y, features, window_size, step_size, true_drifts, drift_points, output_dir, drift_type):
    window_results = []
    adaptation_latencies = []
    base_model = copy.deepcopy(model)
    autoencoder = AutoencoderDriftDetector(threshold_multiplier=3.0, max_iter=500)
    is_first_window = True
    
    for start in range(0, len(X_scaled) - window_size + 1, step_size):
        end = start + window_size
        X_window = X_scaled[start:end]
        y_window = y[start:end]
        if len(np.unique(y_window)) < 2:
            continue
        try:
            y_pred = []
            for i in range(1, len(X_window)):
                sample = dict(zip(features, X_window[i]))
                pred = model.predict_one(sample) or 0
                y_pred.append(pred)
                model.learn_one(sample, y_window[i])
            y_true = y_window[1:]
            if len(y_pred) == 0:
                continue
            cm = confusion_matrix(y_true, y_pred)
            tn, fp, fn, tp = cm.ravel() if cm.shape == (2, 2) else (0, 0, 0, 0)
            tpr = tp / (tp + fn) if (tp + fn) > 0 else 0
            fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
            tar = recall_score(y_true, y_pred, average='macro', zero_division=0) if len(np.unique(y_true)) > 1 else 0
            frr = 1 - tar
            window_metrics = {
                'Model': name,
                'Window': start,
                'Accuracy': accuracy_score(y_true, y_pred),
                'Precision': precision_score(y_true, y_pred, average='weighted', zero_division=0),
                'Recall': recall_score(y_true, y_pred, average='weighted', zero_division=0),
                'F1-Score': f1_score(y_true, y_pred, average='weighted', zero_division=0),
                'TPR': tpr,
                'FPR': fpr,
                'TAR': tar,
                'FRR': frr
            }
            window_results.append(window_metrics)
            if is_first_window:
                autoencoder.fit(X_window[:int(0.8 * len(X_window))])
                is_first_window = False
            drift_detected = False
            for f_idx, feature in enumerate(features):
                adwin = get_tuned_detector(ADWIN, drift_type)
                ph = get_tuned_detector(PageHinkley, drift_type)
                ddm = get_tuned_detector(DDM, drift_type)
                eddm = get_tuned_detector(EDDM, drift_type)
                kswin = get_tuned_detector(KSWIN, drift_type)
                pcdm = get_tuned_detector(PCDM, drift_type)
                rbf_svm = get_tuned_detector(RBFSVMDriftDetector, drift_type)
                hddm_w = get_tuned_detector(HDDM_W, drift_type)
                hddm_a = get_tuned_detector(HDDM_A, drift_type)
                for i, (x, y_true) in enumerate(zip(X_window, y_window)):
                    pred = model.predict_one(dict(zip(features, x)))
                    error = 1.0 if pred != y_true else 0.0
                    global_idx = start + i
                    updates = [
                        (adwin.update(x[f_idx]), f'adwin_{feature}'),
                        (ph.update(x[f_idx]), f'page_hinkley_{feature}'),
                        (ddm.add_element(error), f'ddm_{feature}'),
                        (eddm.add_element(pred, y_true), f'eddm_{feature}'),
                        (kswin.update(x[f_idx]), f'kswin_{feature}'),
                        (pcdm.add_element(x[f_idx]), f'pcdm_{feature}'),
                        (rbf_svm.add_element(x, y_true), f'rbf_svm_{feature}'),
                        (hddm_w.add_element(x[f_idx]), f'hddm_w_{feature}'),
                        (hddm_a.add_element(x[f_idx]), f'hddm_a_{feature}'),
                        (autoencoder.add_element(x), f'autoencoder_{feature}')
                    ]
                    for update, key in updates:
                        if update:
                            drift_points[name][key].append(global_idx)
                            drift_detected = True
                            adaptation_latencies.append(global_idx)
            if drift_detected:
                model = copy.deepcopy(base_model)
                for j in range(len(X_window)):
                    sample = dict(zip(features, X_window[j]))
                    model.learn_one(sample, y_window[j])
                y_pred_post = [model.predict_one(dict(zip(features, x))) or 0 for x in X_window[1:]]
                window_metrics['Post-Accuracy'] = accuracy_score(y_true, y_pred_post)
            window_results.append(window_metrics)
        except Exception as e:
            print(f"Error evaluating {name} at window {start}: {e}")
    return window_results, adaptation_latencies

# Updated evaluate_models
def evaluate_models(data_df, data_type, features, attack_type, drift_type, window_size=500, step_size=50):
    try:
        X = data_df[features].values
        y = data_df['label'].values
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)
        models = {
            'Random Forest': RandomForestClassifier(
                n_estimators=100, max_depth=8, min_samples_split=10, min_samples_leaf=5,
                max_features='sqrt', class_weight='balanced', random_state=42
            ),
            'SVM': SVC(
                kernel='rbf', C=0.1, probability=True, class_weight='balanced', random_state=42
            ),
            'KNN': KNeighborsClassifier(n_neighbors=15),
            'XGBoost': XGBClassifier(
                n_estimators=100, max_depth=4, learning_rate=0.05, reg_lambda=2.0,
                random_state=42
            ),
            'Voting': VotingClassifier(estimators=[
                ('rf', RandomForestClassifier(
                    n_estimators=100, max_depth=8, min_samples_split=10, min_samples_leaf=5,
                    max_features='sqrt', class_weight='balanced', random_state=42
                )),
                ('svm', SVC(
                    kernel='rbf', C=0.1, probability=True, class_weight='balanced', random_state=42
                )),
                ('knn', KNeighborsClassifier(n_neighbors=15))
            ], voting='soft'),
            'Snapshot RF': SnapshotEnsemble(
                RandomForestClassifier(
                    n_estimators=100, max_depth=8, min_samples_split=10, min_samples_leaf=5,
                    max_features='sqrt', class_weight='balanced', random_state=42
                ), n_snapshots=3
            ),
            'Hoeffding Tree': HoeffdingTreeClassifier(),
            'HAT': HoeffdingAdaptiveTreeClassifier(),
            'Leveraging Bagging': LeveragingBaggingClassifier(model=HoeffdingTreeClassifier(), n_models=10),
            'Online Boosting': AdaBoostClassifier(model=HoeffdingTreeClassifier(), n_models=10),
            'OKD': OnlineKnowledgeDistillation(
                teacher_model=HoeffdingAdaptiveTreeClassifier(),
                student_model=HoeffdingTreeClassifier(),
                alpha=0.5
            )
        }
        if ENSEMBLE_AVAILABLE:
            if ENSEMBLE_NAME == 'ARF':
                models[ENSEMBLE_NAME] = EnsembleClassifier(n_estimators=10)
            else:
                models[ENSEMBLE_NAME] = EnsembleClassifier(n_models=10)
        results = []
        detectors = ['adwin', 'page_hinkley', 'ddm', 'eddm trend', 'kswin', 'pcdm', 'rbf_svm', 'hddm_w', 'hddm_a', 'autoencoder']
        drift_points = {name: {f'{detector}_{feature}': [] for detector in detectors for feature in features} for name in models}
        true_drifts = [len(data_df[data_df['label'] == 0])]
        adaptation_latencies = {name: [] for name in models}
        output_dir = f'/Users/festusedward-n/Documents/Datasets/{data_type}_{attack_type}_{drift_type}'
        os.makedirs(output_dir, exist_ok=True)
        window_results_dict = {}
        for name, model in models.items():
            print(f"Training and evaluating {name}...")
            try:
                if name in ['Hoeffding Tree', 'HAT', ENSEMBLE_NAME, 'Leveraging Bagging', 'Online Boosting', 'OKD']:
                    window_results, latencies = evaluate_streaming_model(name, model, X_scaled, y, features, window_size, step_size, true_drifts, drift_points, output_dir, drift_type)
                else:
                    window_results, latencies = evaluate_batch_model(name, model, X_scaled, y, features, window_size, step_size, true_drifts, drift_points, output_dir, drift_type)
                adaptation_latencies[name] = latencies
                window_results_dict[name] = window_results
            except Exception as e:
                print(f"Failed to evaluate {name}: {e}")
        for name in models:
            if name in window_results_dict and window_results_dict[name]:
                aggregated = {
                    'Model': name,
                    'Accuracy': np.mean([r['Accuracy'] for r in window_results_dict[name]]),
                    'Precision': np.mean([r['Precision'] for r in window_results_dict[name]]),
                    'Recall': np.mean([r['Recall'] for r in window_results_dict[name]]),
                    'F1-Score': np.mean([r['F1-Score'] for r in window_results_dict[name]]),
                    'TPR': np.mean([r['TPR'] for r in window_results_dict[name]]),
                    'FPR': np.mean([r['FPR'] for r in window_results_dict[name]]),
                    'TAR': np.mean([r['TAR'] for r in window_results_dict[name]]),
                    'FRR': np.mean([r['FRR'] for r in window_results_dict[name]])
                }
                if 'Post-Accuracy' in window_results_dict[name][0]:
                    aggregated['Post-Accuracy'] = np.mean([r['Post-Accuracy'] for r in window_results_dict[name]])
                if name in ['Random Forest', 'SVM', 'KNN', 'XGBoost', 'Voting', 'Snapshot RF']:
                    aggregated['ROC-AUC'] = np.mean([r['ROC-AUC'] for r in window_results_dict[name] if 'ROC-AUC' in r and not np.isnan(r['ROC-AUC'])])
                for detector in detectors:
                    for feature in features:
                        detected = drift_points[name][f'{detector}_{feature}']
                        correct = len([d for d in detected if any(abs(d - t) < 50 for t in true_drifts)])
                        total = max(1, len(detected))
                        aggregated[f'DDA_{detector}_{feature}'] = correct / total if detected else 0.0
                if name in ['Hoeffding Tree', 'HAT', ENSEMBLE_NAME, 'Leveraging Bagging', 'Online Boosting', 'OKD']:
                    aggregated['AL'] = np.mean(adaptation_latencies[name]) if adaptation_latencies[name] else float('inf')
                results.append(aggregated)
        results_df = pd.DataFrame(results)
        output_file = f'{output_dir}/results.csv'
        results_df.to_csv(output_file, index=False)
        plt.figure(figsize=(12, 6))
        for name in window_results_dict:
            if window_results_dict[name]:
                f1_scores = [r['F1-Score'] for r in window_results_dict[name]]
                windows = [r['Window'] for r in window_results_dict[name]]
                plt.plot(windows, f1_scores, label=name)
        plt.title(f'F1-Score Over Windows for {data_type.capitalize()} {attack_type} ({drift_type})')
        plt.xlabel('Window Start Index')
        plt.ylabel('F1-Score')
        plt.legend()
        plt.savefig(f'{output_dir}/f1.png', dpi=300, bbox_inches='tight')
        plt.close()
        plt.figure(figsize=(12, 6))
        for name in ['Random Forest', 'SVM', 'KNN', 'XGBoost', 'Voting', 'Snapshot RF']:
            if name in window_results_dict and window_results_dict[name] and any('ROC-AUC' in r for r in window_results_dict[name]):
                fpr_avg = []
                tpr_avg = []
                for window in window_results_dict[name]:
                    if 'ROC-AUC' in window and not np.isnan(window['ROC-AUC']):
                        window_start = window['Window']
                        window_end = window_start + window_size
                        y_true_window = y[window_start:window_end]
                        if len(np.unique(y_true_window)) < 2:
                            continue
                        y_proba_window = models[name].predict_proba(X_scaled[window_start:window_end])[:, 1]
                        fpr, tpr, _ = roc_curve(y_true_window, y_proba_window)
                        fpr_avg.append(fpr)
                        tpr_avg.append(tpr)
                if fpr_avg:
                    interp_fpr = np.linspace(0, 1, 100)
                    tpr_interp = [np.interp(interp_fpr, f, t) for f, t in zip(fpr_avg, tpr_avg)]
                    tpr_mean = np.mean(tpr_interp, axis=0)
                    plt.plot(interp_fpr, tpr_mean, label=f'{name} (AUC={aggregated.get("ROC-AUC", np.nan):.2f})')
        plt.title(f'Average ROC Curve for {data_type.capitalize()} {attack_type} ({drift_type})')
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.legend()
        plt.savefig(f'{output_dir}/roc.png', dpi=300, bbox_inches='tight')
        plt.close()
        plt.figure(figsize=(12, 6))
        for name in drift_points:
            for detector, points in drift_points[name].items():
                if points:
                    plt.scatter(points, [f'{name}-{detector}'] * len(points), label=f'{name}-{detector}')
        plt.title(f'Drift Points for {data_type.capitalize()} {attack_type} ({drift_type})')
        plt.xlabel('Sample Index')
        plt.ylabel('Detector')
        plt.legend()
        plt.savefig(f'{output_dir}/drifts.png', dpi=300, bbox_inches='tight')
        plt.close()
        return results_df, drift_points
    except Exception as e:
        print(f"Error evaluating {data_type} dataset: {e}")
        return None, None

# Main execution
if __name__ == '__main__':
    attack_types = [
        ('session_hijacking', 'abrupt'),
        ('mitm', 'recurring'),
        ('card_skimming', 'abrupt'),
        ('phishing', 'gradual'),
        ('identity_theft', 'incremental')
    ]
    datasets = {
        'keystroke': {
            'cmu': '/Users/festusedward-n/Documents/Datasets/DSL-StrongPasswordData 2.csv'
        },
        'mouse': {
            'ub': '/Users/festusedward-n/Documents/Datasets/UB_mouse_and_events_dataset/'
        }
    }
    for data_type, dataset_dict in datasets.items():
        for dataset_name, data_file in dataset_dict.items():
            print(f"\nProcessing {data_type} dataset: {dataset_name}")
            benign_df, features = load_data(data_file, data_type, dataset_name=dataset_name)
            if benign_df is None:
                continue
            for attack_type, drift_type in attack_types:
                print(f"\nCreating dataset for {data_type}_{attack_type}_{drift_type} ({dataset_name})")
                combined_df = create_combined_dataset(benign_df, data_type, attack_type, drift_type, use_generator=True)
                print(f"Evaluating models for {data_type}_{attack_type}_{drift_type} ({dataset_name})")
                results_df, drift_points = evaluate_models(combined_df, data_type, features, attack_type, drift_type)
                if results_df is not None:
                    print(f"\nResults for {data_type}_{attack_type}_{drift_type} ({dataset_name}):")
                    print(results_df[['Model', 'Accuracy', 'F1-Score', 'ROC-AUC']].round(3))
                    print("Drift Points:", {k: {dk: len(dv) for dk, dv in dv.items()} for k, dv in drift_points.items()})


Processing keystroke dataset: cmu
Initial keystroke Rows (CMU): 20400
Columns in CMU dataset: ['subject', 'sessionIndex', 'rep', 'H.period', 'DD.period.t', 'UD.period.t', 'H.t', 'DD.t.i', 'UD.t.i', 'H.i', 'DD.i.e', 'UD.i.e', 'H.e', 'DD.e.five', 'UD.e.five', 'H.five', 'DD.five.Shift.r', 'UD.five.Shift.r', 'H.Shift.r', 'DD.Shift.r.o', 'UD.Shift.r.o', 'H.o', 'DD.o.a', 'UD.o.a', 'H.a', 'DD.a.n', 'UD.a.n', 'H.n', 'DD.n.l', 'UD.n.l', 'H.l', 'DD.l.Return', 'UD.l.Return', 'H.Return']
Subsampled keystroke Rows (CMU): 5000
keystroke Rows after NaN drop (CMU): 5000

Creating dataset for keystroke_session_hijacking_abrupt (cmu)
Evaluating models for keystroke_session_hijacking_abrupt (cmu)
Training and evaluating Random Forest...
Error evaluating Random Forest at window 100: The number of classes has to be greater than one; got 1 class
Error evaluating Random Forest at window 150: The number of classes has to be greater than one; got 1 class
Error evaluating Random Forest at window 200: The numbe