# Phase II: Baseline Machine Learning Models
## AI-Driven Multi-Source Telemetry Framework for Cyberattack Detection

**Author:** Prabhu Narayan (Roll No. 60222005)  
**Supervisor:** Dr. Mamta Mittal  
**Institution:** Delhi Skill and Entrepreneurship University (DSEU)

---

## Notebook Objectives:
1. Train baseline ML models (Random Forest, SVM, XGBoost, Gradient Boosting)
2. Perform comprehensive model evaluation (Accuracy, Precision, Recall, F1-Score, AUC-ROC)
3. Compare model performance across datasets
4. Generate confusion matrices and classification reports
5. Save trained models and performance metrics

## Models to Implement:
- **Random Forest Classifier**
- **Support Vector Machine (SVM)**
- **XGBoost Classifier**
- **Gradient Boosting Classifier**
- **Ensemble Model (Voting Classifier)**

## Expected Outputs:
- Trained model files (.pkl)
- Performance metrics (JSON/CSV)
- Confusion matrices and ROC curves
- Comparative analysis visualizations

---

In [None]:
# ============================================================================
# SECTION 1: ENVIRONMENT SETUP
# ============================================================================

print("="*80)
print("PHASE II: Baseline Machine Learning Models")
print("AI-Driven Multi-Source Telemetry Framework")
print("="*80)

# Install required packages
!pip install -q pandas numpy scikit-learn matplotlib seaborn
!pip install -q xgboost lightgbm catboost
!pip install -q imbalanced-learn
!pip install -q joblib pickle5

print("\n✓ All packages installed successfully!")

In [None]:
# Import libraries
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
import joblib
import pickle
from datetime import datetime
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Scikit-learn imports
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, classification_report, roc_auc_score, roc_curve
)
from sklearn.preprocessing import label_binarize

# XGBoost and LightGBM
import xgboost as xgb
import lightgbm as lgb

# Imbalanced learning
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler

# Set random seed for reproducibility
RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)

# Set plotting style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print("✓ Libraries imported successfully!")

In [None]:
# ============================================================================
# SECTION 2: GOOGLE DRIVE MOUNTING AND DIRECTORY SETUP
# ============================================================================

from google.colab import drive
drive.mount('/content/drive')

# Define project structure
BASE_DIR = '/content/drive/MyDrive/ai-telemetry-research'
DIRS = {
    'datasets_processed': f'{BASE_DIR}/datasets/processed',
    'models_baseline': f'{BASE_DIR}/models/baseline_ml',
    'results_phase2': f'{BASE_DIR}/results/phase2',
    'results_phase2_metrics': f'{BASE_DIR}/results/phase2/metrics',
    'results_phase2_figures': f'{BASE_DIR}/results/phase2/figures',
    'results_phase2_models': f'{BASE_DIR}/results/phase2/trained_models',
    'logs': f'{BASE_DIR}/logs'
}

# Create directories
for dir_name, dir_path in DIRS.items():
    os.makedirs(dir_path, exist_ok=True)

print("✓ Directory structure created successfully!")

In [None]:
# ============================================================================
# SECTION 3: UTILITY CLASSES FOR MODEL TRAINING AND EVALUATION
# ============================================================================

class ModelTrainer:
    """Comprehensive model training and evaluation pipeline"""
    
    def __init__(self, random_state=42):
        self.random_state = random_state
        self.models = {}
        self.results = {}
    
    def prepare_data(self, df, label_col='binary_label', test_size=0.3):
        """Prepare train-test split"""
        print(f"\nPreparing data with label column: {label_col}")
        
        # Separate features and labels
        X = df.drop([label_col], axis=1, errors='ignore')
        
        # Remove non-numeric columns
        X = X.select_dtypes(include=[np.number])
        y = df[label_col]
        
        # Train-test split
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=test_size, random_state=self.random_state, stratify=y
        )
        
        print(f"  Training set: {X_train.shape}")
        print(f"  Test set: {X_test.shape}")
        print(f"  Features: {X_train.shape[1]}")
        
        return X_train, X_test, y_train, y_test
    
    def handle_imbalance(self, X_train, y_train, method='smote'):
        """Handle class imbalance using SMOTE or undersampling"""
        print(f"\nOriginal class distribution: {dict(pd.Series(y_train).value_counts())}")
        
        if method == 'smote':
            smote = SMOTE(random_state=self.random_state)
            X_resampled, y_resampled = smote.fit_resample(X_train, y_train)
        elif method == 'undersample':
            rus = RandomUnderSampler(random_state=self.random_state)
            X_resampled, y_resampled = rus.fit_resample(X_train, y_train)
        else:
            return X_train, y_train
        
        print(f"Resampled class distribution: {dict(pd.Series(y_resampled).value_counts())}")
        return X_resampled, y_resampled
    
    def train_random_forest(self, X_train, y_train, **kwargs):
        """Train Random Forest Classifier"""
        print("\nTraining Random Forest...")
        
        params = {
            'n_estimators': kwargs.get('n_estimators', 100),
            'max_depth': kwargs.get('max_depth', 20),
            'min_samples_split': kwargs.get('min_samples_split', 5),
            'random_state': self.random_state,
            'n_jobs': -1
        }
        
        model = RandomForestClassifier(**params)
        model.fit(X_train, y_train)
        
        self.models['random_forest'] = model
        print("✓ Random Forest trained successfully")
        return model
    
    def train_svm(self, X_train, y_train, **kwargs):
        """Train Support Vector Machine"""
        print("\nTraining SVM...")
        
        params = {
            'C': kwargs.get('C', 1.0),
            'kernel': kwargs.get('kernel', 'rbf'),
            'gamma': kwargs.get('gamma', 'scale'),
            'random_state': self.random_state
        }
        
        # Use subset for SVM due to computational cost
        if len(X_train) > 10000:
            print("  Using subset of data for SVM training (10,000 samples)")
            indices = np.random.choice(len(X_train), 10000, replace=False)
            X_train_subset = X_train.iloc[indices]
            y_train_subset = y_train.iloc[indices]
        else:
            X_train_subset = X_train
            y_train_subset = y_train
        
        model = SVC(**params, probability=True)
        model.fit(X_train_subset, y_train_subset)
        
        self.models['svm'] = model
        print("✓ SVM trained successfully")
        return model
    
    def train_xgboost(self, X_train, y_train, **kwargs):
        """Train XGBoost Classifier"""
        print("\nTraining XGBoost...")
        
        params = {
            'n_estimators': kwargs.get('n_estimators', 100),
            'max_depth': kwargs.get('max_depth', 10),
            'learning_rate': kwargs.get('learning_rate', 0.1),
            'random_state': self.random_state,
            'n_jobs': -1,
            'eval_metric': 'logloss'
        }
        
        model = xgb.XGBClassifier(**params)
        model.fit(X_train, y_train)
        
        self.models['xgboost'] = model
        print("✓ XGBoost trained successfully")
        return model
    
    def train_gradient_boosting(self, X_train, y_train, **kwargs):
        """Train Gradient Boosting Classifier"""
        print("\nTraining Gradient Boosting...")
        
        params = {
            'n_estimators': kwargs.get('n_estimators', 100),
            'max_depth': kwargs.get('max_depth', 5),
            'learning_rate': kwargs.get('learning_rate', 0.1),
            'random_state': self.random_state
        }
        
        model = GradientBoostingClassifier(**params)
        model.fit(X_train, y_train)
        
        self.models['gradient_boosting'] = model
        print("✓ Gradient Boosting trained successfully")
        return model
    
    def train_lightgbm(self, X_train, y_train, **kwargs):
        """Train LightGBM Classifier"""
        print("\nTraining LightGBM...")
        
        params = {
            'n_estimators': kwargs.get('n_estimators', 100),
            'max_depth': kwargs.get('max_depth', 10),
            'learning_rate': kwargs.get('learning_rate', 0.1),
            'random_state': self.random_state,
            'n_jobs': -1,
            'verbose': -1
        }
        
        model = lgb.LGBMClassifier(**params)
        model.fit(X_train, y_train)
        
        self.models['lightgbm'] = model
        print("✓ LightGBM trained successfully")
        return model
    
    def create_ensemble(self, X_train, y_train):
        """Create ensemble voting classifier"""
        print("\nCreating Ensemble Model...")
        
        # Train individual models
        self.train_random_forest(X_train, y_train)
        self.train_xgboost(X_train, y_train)
        self.train_lightgbm(X_train, y_train)
        
        # Create ensemble
        ensemble = VotingClassifier(
            estimators=[
                ('rf', self.models['random_forest']),
                ('xgb', self.models['xgboost']),
                ('lgb', self.models['lightgbm'])
            ],
            voting='soft'
        )
        
        ensemble.fit(X_train, y_train)
        self.models['ensemble'] = ensemble
        
        print("✓ Ensemble model created successfully")
        return ensemble
    
    def evaluate_model(self, model, X_test, y_test, model_name):
        """Comprehensive model evaluation"""
        print(f"\nEvaluating {model_name}...")
        
        # Predictions
        y_pred = model.predict(X_test)
        y_pred_proba = model.predict_proba(X_test)[:, 1] if hasattr(model, 'predict_proba') else None
        
        # Metrics
        metrics = {
            'model_name': model_name,
            'accuracy': accuracy_score(y_test, y_pred),
            'precision': precision_score(y_test, y_pred, average='binary', zero_division=0),
            'recall': recall_score(y_test, y_pred, average='binary', zero_division=0),
            'f1_score': f1_score(y_test, y_pred, average='binary', zero_division=0),
            'roc_auc': roc_auc_score(y_test, y_pred_proba) if y_pred_proba is not None else None
        }
        
        # Confusion matrix
        cm = confusion_matrix(y_test, y_pred)
        
        # Classification report
        class_report = classification_report(y_test, y_pred, output_dict=True)
        
        print(f"  Accuracy:  {metrics['accuracy']:.4f}")
        print(f"  Precision: {metrics['precision']:.4f}")
        print(f"  Recall:    {metrics['recall']:.4f}")
        print(f"  F1-Score:  {metrics['f1_score']:.4f}")
        if metrics['roc_auc']:
            print(f"  ROC-AUC:   {metrics['roc_auc']:.4f}")
        
        self.results[model_name] = {
            'metrics': metrics,
            'confusion_matrix': cm.tolist(),
            'classification_report': class_report,
            'predictions': {
                'y_pred': y_pred.tolist()[:100],  # Save first 100 predictions
                'y_true': y_test.tolist()[:100]
            }
        }
        
        return metrics, cm, class_report


class ResultVisualizer:
    """Visualization utilities for model results"""
    
    def __init__(self, output_dir):
        self.output_dir = output_dir
    
    def plot_confusion_matrix(self, cm, model_name, dataset_name):
        """Plot confusion matrix"""
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=True)
        plt.title(f'Confusion Matrix - {model_name} on {dataset_name}')
        plt.ylabel('True Label')
        plt.xlabel('Predicted Label')
        plt.tight_layout()
        
        filename = f"{self.output_dir}/cm_{model_name}_{dataset_name}.png"
        plt.savefig(filename, dpi=300)
        plt.show()
        return filename
    
    def plot_roc_curve(self, models, X_test, y_test, dataset_name):
        """Plot ROC curves for multiple models"""
        plt.figure(figsize=(10, 8))
        
        for model_name, model in models.items():
            if hasattr(model, 'predict_proba'):
                y_pred_proba = model.predict_proba(X_test)[:, 1]
                fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
                auc_score = roc_auc_score(y_test, y_pred_proba)
                plt.plot(fpr, tpr, label=f'{model_name} (AUC = {auc_score:.3f})')
        
        plt.plot([0, 1], [0, 1], 'k--', label='Random Classifier')
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title(f'ROC Curves - {dataset_name}')
        plt.legend(loc='lower right')
        plt.grid(alpha=0.3)
        plt.tight_layout()
        
        filename = f"{self.output_dir}/roc_curves_{dataset_name}.png"
        plt.savefig(filename, dpi=300)
        plt.show()
        return filename
    
    def plot_model_comparison(self, results_dict, dataset_name):
        """Compare performance metrics across models"""
        metrics_df = pd.DataFrame([
            results['metrics'] for results in results_dict.values()
        ])
        
        # Bar plot comparison
        fig, axes = plt.subplots(2, 2, figsize=(15, 12))
        fig.suptitle(f'Model Performance Comparison - {dataset_name}', fontsize=16)
        
        metrics_to_plot = ['accuracy', 'precision', 'recall', 'f1_score']
        
        for idx, metric in enumerate(metrics_to_plot):
            ax = axes[idx // 2, idx % 2]
            metrics_df.plot(x='model_name', y=metric, kind='bar', ax=ax, legend=False)
            ax.set_title(metric.replace('_', ' ').title())
            ax.set_xlabel('')
            ax.set_ylabel('Score')
            ax.set_ylim([0, 1])
            ax.grid(axis='y', alpha=0.3)
        
        plt.tight_layout()
        filename = f"{self.output_dir}/model_comparison_{dataset_name}.png"
        plt.savefig(filename, dpi=300)
        plt.show()
        return filename


print("\n✓ Utility classes initialized successfully!")

In [None]:
# ============================================================================
# SECTION 4: LOAD PREPROCESSED DATASETS
# ============================================================================

print("\n" + "="*80)
print("LOADING PREPROCESSED DATASETS")
print("="*80)

datasets = {}
processed_dir = DIRS['datasets_processed']

# Find all preprocessed CSV files
preprocessed_files = [f for f in os.listdir(processed_dir) if f.endswith('_preprocessed.csv')]

print(f"\nFound {len(preprocessed_files)} preprocessed datasets")

for file in preprocessed_files:
    dataset_name = file.replace('_preprocessed.csv', '')
    filepath = os.path.join(processed_dir, file)
    
    try:
        df = pd.read_csv(filepath)
        datasets[dataset_name] = df
        print(f"\n✓ Loaded {dataset_name}")
        print(f"  Shape: {df.shape}")
        print(f"  Memory: {df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")
        
        # Check for label columns
        if 'binary_label' in df.columns:
            print(f"  Binary labels: {dict(df['binary_label'].value_counts())}")
    except Exception as e:
        print(f"\n✗ Error loading {dataset_name}: {e}")

print("\n" + "="*80)
print(f"TOTAL DATASETS LOADED: {len(datasets)}")
print("="*80)

In [None]:
# ============================================================================
# SECTION 5: TRAIN AND EVALUATE BASELINE MODELS ON EACH DATASET
# ============================================================================

print("\n" + "#"*80)
print("# STARTING MODEL TRAINING AND EVALUATION")
print("#"*80)

all_results = {}
all_models = {}

for dataset_name, df in datasets.items():
    print("\n" + "="*80)
    print(f"PROCESSING DATASET: {dataset_name}")
    print("="*80)
    
    # Initialize trainer and visualizer
    trainer = ModelTrainer(random_state=RANDOM_STATE)
    visualizer = ResultVisualizer(DIRS['results_phase2_figures'])
    
    # Prepare data
    try:
        X_train, X_test, y_train, y_test = trainer.prepare_data(df, label_col='binary_label')
    except Exception as e:
        print(f"Error preparing data: {e}")
        continue
    
    # Handle class imbalance
    X_train_balanced, y_train_balanced = trainer.handle_imbalance(X_train, y_train, method='smote')
    
    # Train models
    print("\n" + "-"*80)
    print("TRAINING MODELS")
    print("-"*80)
    
    # 1. Random Forest
    try:
        rf_model = trainer.train_random_forest(X_train_balanced, y_train_balanced, n_estimators=100)
        rf_metrics, rf_cm, rf_report = trainer.evaluate_model(rf_model, X_test, y_test, 'random_forest')
        visualizer.plot_confusion_matrix(rf_cm, 'RandomForest', dataset_name)
    except Exception as e:
        print(f"Error training Random Forest: {e}")
    
    # 2. XGBoost
    try:
        xgb_model = trainer.train_xgboost(X_train_balanced, y_train_balanced, n_estimators=100)
        xgb_metrics, xgb_cm, xgb_report = trainer.evaluate_model(xgb_model, X_test, y_test, 'xgboost')
        visualizer.plot_confusion_matrix(xgb_cm, 'XGBoost', dataset_name)
    except Exception as e:
        print(f"Error training XGBoost: {e}")
    
    # 3. LightGBM
    try:
        lgb_model = trainer.train_lightgbm(X_train_balanced, y_train_balanced, n_estimators=100)
        lgb_metrics, lgb_cm, lgb_report = trainer.evaluate_model(lgb_model, X_test, y_test, 'lightgbm')
        visualizer.plot_confusion_matrix(lgb_cm, 'LightGBM', dataset_name)
    except Exception as e:
        print(f"Error training LightGBM: {e}")
    
    # 4. Gradient Boosting
    try:
        gb_model = trainer.train_gradient_boosting(X_train_balanced, y_train_balanced, n_estimators=50)
        gb_metrics, gb_cm, gb_report = trainer.evaluate_model(gb_model, X_test, y_test, 'gradient_boosting')
        visualizer.plot_confusion_matrix(gb_cm, 'GradientBoosting', dataset_name)
    except Exception as e:
        print(f"Error training Gradient Boosting: {e}")
    
    # 5. SVM (on subset due to computational cost)
    try:
        svm_model = trainer.train_svm(X_train_balanced, y_train_balanced)
        svm_metrics, svm_cm, svm_report = trainer.evaluate_model(svm_model, X_test, y_test, 'svm')
        visualizer.plot_confusion_matrix(svm_cm, 'SVM', dataset_name)
    except Exception as e:
        print(f"Error training SVM: {e}")
    
    # Plot ROC curves
    try:
        visualizer.plot_roc_curve(trainer.models, X_test, y_test, dataset_name)
    except Exception as e:
        print(f"Error plotting ROC curves: {e}")
    
    # Plot model comparison
    try:
        visualizer.plot_model_comparison(trainer.results, dataset_name)
    except Exception as e:
        print(f"Error plotting model comparison: {e}")
    
    # Save results
    all_results[dataset_name] = trainer.results
    all_models[dataset_name] = trainer.models
    
    # Save models
    for model_name, model in trainer.models.items():
        model_file = f"{DIRS['results_phase2_models']}/{dataset_name}_{model_name}.pkl"
        joblib.dump(model, model_file)
        print(f"\n✓ Saved model: {model_file}")
    
    # Save metrics to JSON
    metrics_file = f"{DIRS['results_phase2_metrics']}/{dataset_name}_metrics.json"
    with open(metrics_file, 'w') as f:
        json.dump(trainer.results, f, indent=4)
    print(f"✓ Saved metrics: {metrics_file}")

print("\n" + "#"*80)
print("# MODEL TRAINING COMPLETED")
print("#"*80)

In [None]:
# ============================================================================
# SECTION 6: CROSS-DATASET PERFORMANCE ANALYSIS
# ============================================================================

print("\n" + "="*80)
print("CROSS-DATASET PERFORMANCE ANALYSIS")
print("="*80)

# Aggregate metrics across all datasets
all_metrics = []

for dataset_name, results in all_results.items():
    for model_name, model_results in results.items():
        metrics = model_results['metrics'].copy()
        metrics['dataset'] = dataset_name
        all_metrics.append(metrics)

# Create comprehensive metrics DataFrame
metrics_df = pd.DataFrame(all_metrics)

print("\nOverall Performance Summary:")
print(metrics_df.groupby('model_name')[['accuracy', 'precision', 'recall', 'f1_score']].mean())

# Save comprehensive metrics
metrics_df.to_csv(f"{DIRS['results_phase2']}/comprehensive_metrics.csv", index=False)
print(f"\n✓ Saved comprehensive metrics CSV")

# Visualization: Heatmap of model performance across datasets
plt.figure(figsize=(14, 8))
pivot_accuracy = metrics_df.pivot(index='model_name', columns='dataset', values='accuracy')
sns.heatmap(pivot_accuracy, annot=True, fmt='.3f', cmap='YlGnBu', cbar_kws={'label': 'Accuracy'})
plt.title('Model Accuracy Across Datasets')
plt.tight_layout()
plt.savefig(f"{DIRS['results_phase2_figures']}/accuracy_heatmap_all_datasets.png", dpi=300)
plt.show()

# Visualization: Box plot of F1-scores
plt.figure(figsize=(12, 6))
sns.boxplot(x='model_name', y='f1_score', data=metrics_df)
plt.title('F1-Score Distribution Across Models and Datasets')
plt.xlabel('Model')
plt.ylabel('F1-Score')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig(f"{DIRS['results_phase2_figures']}/f1_score_boxplot.png", dpi=300)
plt.show()

print("\n" + "="*80)
print("CROSS-DATASET ANALYSIS COMPLETED")
print("="*80)

In [None]:
# ============================================================================
# SECTION 7: GENERATE PHASE 2 COMPREHENSIVE REPORT
# ============================================================================

print("\n" + "="*80)
print("GENERATING PHASE 2 COMPREHENSIVE REPORT")
print("="*80)

phase2_report = {
    "phase": "Phase II - Baseline Machine Learning Models",
    "researcher": "Prabhu Narayan (60222005)",
    "supervisor": "Dr. Mamta Mittal",
    "institution": "DSEU",
    "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
    "datasets_used": list(datasets.keys()),
    "models_trained": ['Random Forest', 'XGBoost', 'LightGBM', 'Gradient Boosting', 'SVM'],
    "total_models": sum([len(models) for models in all_models.values()]),
    "performance_summary": {}
}

# Best performing model per dataset
for dataset_name, results in all_results.items():
    best_model = max(results.items(), key=lambda x: x[1]['metrics']['f1_score'])
    phase2_report["performance_summary"][dataset_name] = {
        "best_model": best_model[0],
        "best_f1_score": best_model[1]['metrics']['f1_score'],
        "best_accuracy": best_model[1]['metrics']['accuracy'],
        "all_models": {k: v['metrics'] for k, v in results.items()}
    }

# Overall best model
avg_performance = metrics_df.groupby('model_name')[['accuracy', 'f1_score']].mean()
overall_best = avg_performance['f1_score'].idxmax()
phase2_report["overall_best_model"] = {
    "model_name": overall_best,
    "avg_accuracy": float(avg_performance.loc[overall_best, 'accuracy']),
    "avg_f1_score": float(avg_performance.loc[overall_best, 'f1_score'])
}

# Save comprehensive report
report_file = f"{DIRS['results_phase2']}/PHASE2_COMPREHENSIVE_REPORT.json"
with open(report_file, 'w') as f:
    json.dump(phase2_report, f, indent=4)

print(f"\n✓ Comprehensive report saved: {report_file}")

# Display summary
print("\n" + "#"*80)
print("# PHASE 2 EXECUTION SUMMARY")
print("#"*80)
print(f"\nTotal Datasets: {len(datasets)}")
print(f"Total Models Trained: {phase2_report['total_models']}")
print(f"\nOverall Best Model: {overall_best}")
print(f"  • Average Accuracy: {phase2_report['overall_best_model']['avg_accuracy']:.4f}")
print(f"  • Average F1-Score: {phase2_report['overall_best_model']['avg_f1_score']:.4f}")

print("\n" + "-"*80)
print("Best Model per Dataset:")
for dataset, summary in phase2_report["performance_summary"].items():
    print(f"\n  {dataset}:")
    print(f"    • Best Model: {summary['best_model']}")
    print(f"    • F1-Score: {summary['best_f1_score']:.4f}")
    print(f"    • Accuracy: {summary['best_accuracy']:.4f}")

print("\n" + "#"*80)
print("# PHASE 2 COMPLETED SUCCESSFULLY")
print("#"*80)

print("""
NEXT STEPS:
1. Review model performance metrics and visualizations
2. Proceed to Phase III: Advanced Deep Learning Models
   - Notebook: 03_Advanced_DL_Models.ipynb
   - Models: CNN, LSTM, Transformer
3. Integrate Explainable AI (SHAP/LIME)
""")