# IoT Device Identification with Adversarial Training

This notebook runs the adversarial training pipeline on Google Colab with GPU support.

**Features:**
- Automatic GPU detection
- Google Drive integration for saving results
- Clone from GitHub repository
- Run adversarial training experiments

## 1. Setup Environment

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!git clone https://github.com/yacinemkk/pfe.git /content/pfe
%cd /content/pfe

In [None]:
!pip install torch scikit-learn pandas numpy tqdm -q

In [None]:
import sys
sys.path.insert(0, '/content/pfe')

import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name(0)}")

## 2. Configure Paths for Google Drive

Results will be saved to your Google Drive under `/content/drive/MyDrive/pfe_results/`

In [None]:
from pathlib import Path
import config.config as config

GDRIVE_BASE = Path('/content/drive/MyDrive/pfe_results')
GDRIVE_BASE.mkdir(parents=True, exist_ok=True)

config.RESULTS_DIR = GDRIVE_BASE / 'models'
config.RESULTS_DIR.mkdir(parents=True, exist_ok=True)

print(f"Results will be saved to: {config.RESULTS_DIR}")

## 3. Data Setup

Upload your data to Google Drive or use the data in the repository.

Option A: Data already in repo under `data/` folder
Option B: Upload data to Google Drive and set path below

In [None]:
import os
from pathlib import Path

# Data is already in Google Drive at PFE/IPFIX_ML_Instances
GDRIVE_DATA = Path('/content/drive/MyDrive/PFE')
config.RAW_DATA_DIR = GDRIVE_DATA / 'IPFIX_ML_Instances'
config.PROCESSED_DATA_DIR = GDRIVE_DATA / 'processed'

print(f"Using data from: {config.RAW_DATA_DIR}")
if config.RAW_DATA_DIR.exists():
    print(f"Found {len(list(config.RAW_DATA_DIR.glob('home*.csv')))} data files")
else:
    print("WARNING: Data directory not found!")

## 4. Run Adversarial Training

Configure and run the training experiment.

In [None]:
from train_adversarial import run_experiment, compare_models

MODEL_TYPE = 'lstm'          # Options: 'lstm', 'transformer', 'cnn_lstm', 'cnn'
SEQ_LENGTH = 10              # Sequence length (try 10, 25, or 50)
ADV_METHOD = 'hybrid'        # Options: 'none', 'feature', 'pgd', 'fgsm', 'hybrid'
ADV_RATIO = 0.2              # Ratio of adversarial samples (0.0 - 1.0)
EPOCHS = 30                  # Number of training epochs
BATCH_SIZE = 64              # Batch size
MAX_FILES = None             # Limit data files (None for all)

In [None]:
results = run_experiment(
    model_type=MODEL_TYPE,
    seq_length=SEQ_LENGTH,
    adv_method=ADV_METHOD,
    adv_ratio=ADV_RATIO,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    max_files=MAX_FILES,
    save_results=True
)

print("\n" + "="*60)
print("TRAINING COMPLETE")
print("="*60)
print(f"Test Accuracy (Clean): {results['test_accuracy_clean']:.4f}")
if 'adversarial_results' in results:
    print("\nAdversarial Robustness:")
    for attack, metrics in results['adversarial_results'].items():
        print(f"  {attack}: {metrics['accuracy']:.4f}")

## 5. Run Full Comparison (Optional)

Compare all models, sequence lengths, and adversarial methods.

In [None]:
RUN_COMPARISON = False  # Set to True to run full comparison

if RUN_COMPARISON:
    comparison_results = compare_models(
        seq_lengths=[10, 25],
        models=['lstm', 'transformer'],
        adv_methods=['none', 'pgd', 'hybrid'],
        epochs=20,
        max_files=None
    )

## 6. Save Final Results to Google Drive

In [None]:
import json
from datetime import datetime

final_results = {
    'timestamp': datetime.now().isoformat(),
    'experiment': results
}

results_file = GDRIVE_BASE / f"final_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
with open(results_file, 'w') as f:
    json.dump(final_results, f, indent=2, default=str)

print(f"Results saved to: {results_file}")
print("\nAll done! Check your Google Drive for saved models and results.")