## üìö Step 1: Import Libraries and Check GPU Availability

In [None]:
# Import essential libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve, accuracy_score
from sklearn.utils.class_weight import compute_class_weight
import warnings
import os

warnings.filterwarnings('ignore')
sns.set_style('whitegrid')

print("‚úÖ Libraries imported successfully!")
print(f"TensorFlow version: {tf.__version__}")
print(f"Keras version: {keras.__version__}")

# Check GPU availability
print("\n" + "="*70)
print("üîç GPU Detection")
print("="*70)
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print(f"‚úÖ Found {len(gpus)} GPU(s):")
    for i, gpu in enumerate(gpus):
        print(f"   GPU {i}: {gpu.name}")
    
    # Enable memory growth to prevent TensorFlow from allocating all GPU memory at once
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    print("\n‚úÖ Memory growth enabled for all GPUs")
else:
    print("‚ùå No GPU found! Training will be slow on CPU.")
    print("   Make sure to enable GPU accelerator in Kaggle (Settings > Accelerator > GPU T4 x2)")

## üéØ Step 2: Configure Multi-GPU Training Strategy

In [None]:
# Configure TensorFlow MirroredStrategy for multi-GPU training
print("="*70)
print("‚ö° Configuring Multi-GPU Training Strategy")
print("="*70)

# Create a MirroredStrategy for synchronous training across all GPUs
strategy = tf.distribute.MirroredStrategy()

print(f"\n‚úÖ MirroredStrategy initialized")
print(f"   Number of devices: {strategy.num_replicas_in_sync}")
print(f"   Device names: {strategy.extended.worker_devices}")

if strategy.num_replicas_in_sync >= 2:
    print(f"\nüî• Multi-GPU training enabled with {strategy.num_replicas_in_sync} GPUs!")
    print(f"   Effective batch size will be: BATCH_SIZE √ó {strategy.num_replicas_in_sync}")
else:
    print(f"\n‚ö†Ô∏è Only {strategy.num_replicas_in_sync} GPU detected. Enable 2x T4 for faster training.")

# Global batch size (will be distributed across GPUs)
BATCH_SIZE_PER_REPLICA = 64
GLOBAL_BATCH_SIZE = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync

print(f"\nüìä Batch size configuration:")
print(f"   Per GPU: {BATCH_SIZE_PER_REPLICA}")
print(f"   Global (effective): {GLOBAL_BATCH_SIZE}")

## üìÇ Step 3: Load and Explore Dataset

In [None]:
# Load the dataset
print("="*70)
print("üìÇ Loading Dataset")
print("="*70)

# Try multiple paths (Kaggle input path and local path)
dataset_paths = [
    '/kaggle/input/final-lstm-traning-dataset/final_lstm_training_dataset.csv',  # Kaggle path
    'final_lstm_training_dataset.csv',                                             # Local path
    r'd:\Phishing LSTM Model\final_lstm_training_dataset.csv'                     # Absolute local path
]

df = None
for path in dataset_paths:
    try:
        df = pd.read_csv(path)
        print(f"‚úÖ Dataset loaded successfully from: {path}")
        break
    except (FileNotFoundError, Exception) as e:
        print(f"   ‚è≠Ô∏è Skipping {path}: {type(e).__name__}")
        continue

if df is None:
    raise FileNotFoundError("‚ùå Could not find dataset! Please upload 'final_lstm_training_dataset.csv' to Kaggle.")

# Display basic information
print(f"\nüìä Dataset Shape: {df.shape}")
print(f"   Rows: {df.shape[0]:,}")
print(f"   Columns: {df.shape[1]}")

# IMPORTANT: Check if data was loaded correctly
print(f"\nüîç Data Loading Check:")
print(f"   Total rows loaded: {len(df):,}")
print(f"   Expected: ~24,680 rows")
if len(df) < 20000:
    print(f"   ‚ö†Ô∏è WARNING: Only {len(df):,} rows loaded! Expected ~24,680 rows.")
    print(f"   This may indicate a problem with the dataset file.")

# Check for missing values
missing_values = df.isnull().sum().sum()
print(f"\nüîç Missing values: {missing_values}")

# Display first few rows
print("\nüìã First 3 rows:")
display(df.head(3))

# Display column names
print(f"\nüìù Column names ({len(df.columns)} total):")
for i, col in enumerate(df.columns, 1):
    print(f"   {i:2d}. {col}")

## üîç Step 4: Data Analysis and Visualization

In [None]:
# Analyze label distribution
print("="*70)
print("üìä Label Distribution Analysis")
print("="*70)

label_counts = df['label'].value_counts().sort_index()
print("\n‚öñÔ∏è Class distribution:")
for label, count in label_counts.items():
    label_name = "Legitimate" if label == 0 else "Phishing"
    percentage = (count / len(df)) * 100
    print(f"   {label_name} ({label}): {count:,} samples ({percentage:.2f}%)")

# Calculate imbalance ratio
imbalance_ratio = label_counts.max() / label_counts.min()
print(f"\nüìà Imbalance ratio: {imbalance_ratio:.2f}:1")

# Visualize label distribution
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Bar plot
ax1 = axes[0]
colors = ['#2ecc71', '#e74c3c']
bars = ax1.bar(['Legitimate (0)', 'Phishing (1)'], label_counts.values, color=colors, alpha=0.7, edgecolor='black')
ax1.set_ylabel('Count', fontsize=12, fontweight='bold')
ax1.set_title('Class Distribution', fontsize=14, fontweight='bold')
ax1.grid(axis='y', alpha=0.3)

# Add count labels on bars
for bar in bars:
    height = bar.get_height()
    ax1.text(bar.get_x() + bar.get_width()/2., height,
             f'{int(height):,}',
             ha='center', va='bottom', fontsize=11, fontweight='bold')

# Pie chart
ax2 = axes[1]
ax2.pie(label_counts.values, labels=['Legitimate (0)', 'Phishing (1)'], 
        colors=colors, autopct='%1.1f%%', startangle=90, 
        textprops={'fontsize': 11, 'fontweight': 'bold'})
ax2.set_title('Class Proportion', fontsize=14, fontweight='bold')

plt.tight_layout()
plt.show()

# Check for duplicates
duplicates = df.duplicated().sum()
print(f"\nüîÑ Duplicate rows: {duplicates}")

## üßπ Step 5: Data Preprocessing

In [None]:
print("="*70)
print("üßπ Data Preprocessing")
print("="*70)

# Make a copy
df_clean = df.copy()
print(f"Starting with: {len(df_clean):,} rows")

# Remove URL column (not needed for training)
if 'url' in df_clean.columns:
    df_clean = df_clean.drop('url', axis=1)
    print(f"‚úÖ Removed 'url' column, rows: {len(df_clean):,}")

# Convert ALL boolean string columns to numeric automatically
print(f"\nüîÑ Converting data types...")

# Check all columns for string boolean values
for col in df_clean.columns:
    if col == 'label':  # Skip the target column
        continue
    
    # Check if column contains string boolean values
    if df_clean[col].dtype == 'object':
        unique_values = set(df_clean[col].dropna().unique())
        # Check if values are boolean strings
        if unique_values.issubset({'True', 'False', 'true', 'false', '1', '0'}):
            # Convert to numeric
            df_clean[col] = df_clean[col].map({
                'True': 1, 'False': 0, 
                'true': 1, 'false': 0,
                True: 1, False: 0,
                '1': 1, '0': 0,
                1: 1, 0: 0
            })
            print(f"   Converted '{col}' from string to numeric")
        else:
            # Try to convert to numeric if possible
            try:
                df_clean[col] = pd.to_numeric(df_clean[col], errors='coerce')
                print(f"   Converted '{col}' to numeric (with coercion)")
            except:
                print(f"   ‚ö†Ô∏è Warning: Column '{col}' contains non-numeric values")

# Handle missing values
print(f"\nüîç Handling missing values...")
before_missing = df_clean.isnull().sum().sum()
print(f"   Before: {before_missing} missing values")

# Check which columns have missing values
if before_missing > 0:
    print(f"\n   Columns with missing values:")
    missing_cols = df_clean.isnull().sum()
    for col, count in missing_cols[missing_cols > 0].items():
        print(f"      {col}: {count} missing")

df_clean = df_clean.fillna(0)
after_missing = df_clean.isnull().sum().sum()
print(f"   After: {after_missing} missing values")
print(f"   Rows after filling: {len(df_clean):,}")

# Remove duplicates - DISABLED to keep all training samples
# Many URLs have similar features, so we want to keep them all for training
print(f"\nüóëÔ∏è Checking for duplicates...")
before_rows = len(df_clean)
duplicates_count = df_clean.duplicated().sum()
print(f"   Found {duplicates_count} rows with duplicate feature values")
print(f"   ‚ö†Ô∏è KEEPING all rows (including duplicates) for training")
print(f"   Reason: Different URLs can have similar features - we want to learn from all samples")

# Uncomment the line below if you want to remove duplicates
# df_clean = df_clean.drop_duplicates()

after_rows = len(df_clean)
print(f"   Final row count: {after_rows:,} rows retained")

# Separate features and labels
X = df_clean.drop('label', axis=1)
y = df_clean['label']

print(f"\n‚úÖ Preprocessing complete!")
print(f"   Features shape: {X.shape}")
print(f"   Labels shape: {y.shape}")
print(f"   Number of features: {X.shape[1]}")
print(f"   Final samples for training: {len(X):,}")

# Verify all columns are numeric
non_numeric = X.select_dtypes(include=['object']).columns.tolist()
if non_numeric:
    print(f"\n‚ö†Ô∏è WARNING: Non-numeric columns detected: {non_numeric}")
    print(f"   Attempting to display unique values:")
    for col in non_numeric:
        unique_vals = X[col].unique()[:10]  # Show first 10 unique values
        print(f"      {col}: {unique_vals}")
else:
    print(f"\n‚úÖ All feature columns are numeric")

# Display feature names
print(f"\nüìù Feature columns ({len(X.columns)} total):")
for i, col in enumerate(X.columns, 1):
    dtype = X[col].dtype
    print(f"   {i:2d}. {col:30s} (dtype: {dtype})")

## ‚úÇÔ∏è Step 6: Split Data into Train/Validation/Test Sets

In [None]:
print("="*70)
print("‚úÇÔ∏è Splitting Data")
print("="*70)

# Split: 70% train, 15% validation, 15% test
X_temp, X_test, y_temp, y_test = train_test_split(
    X, y, test_size=0.15, random_state=42, stratify=y
)

X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp, test_size=0.176, random_state=42, stratify=y_temp  # 0.176 of 85% ‚âà 15% of total
)

print("\nüìä Dataset split sizes:")
print(f"   Training set:   {len(X_train):,} samples ({len(X_train)/len(X)*100:.1f}%)")
print(f"   Validation set: {len(X_val):,} samples ({len(X_val)/len(X)*100:.1f}%)")
print(f"   Test set:       {len(X_test):,} samples ({len(X_test)/len(X)*100:.1f}%)")

# Check class distribution in each set
print("\n‚öñÔ∏è Class distribution in each set:")
for name, labels in [('Train', y_train), ('Validation', y_val), ('Test', y_test)]:
    counts = labels.value_counts().sort_index()
    print(f"\n   {name}:")
    for label, count in counts.items():
        label_name = "Legitimate" if label == 0 else "Phishing"
        print(f"      {label_name} ({label}): {count:,} ({count/len(labels)*100:.1f}%)")

## üîß Step 7: Feature Scaling and Sequence Preparation

In [None]:
print("="*70)
print("üîß Feature Scaling and LSTM Preparation")
print("="*70)

# Standardize features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

print("‚úÖ Features scaled using StandardScaler")

# Reshape for LSTM: (samples, timesteps, features)
# For this dataset, we treat each feature vector as a single timestep
X_train_lstm = X_train_scaled.reshape(X_train_scaled.shape[0], 1, X_train_scaled.shape[1])
X_val_lstm = X_val_scaled.reshape(X_val_scaled.shape[0], 1, X_val_scaled.shape[1])
X_test_lstm = X_test_scaled.reshape(X_test_scaled.shape[0], 1, X_test_scaled.shape[1])

print(f"\nüìê LSTM input shapes:")
print(f"   Training:   {X_train_lstm.shape} (samples, timesteps, features)")
print(f"   Validation: {X_val_lstm.shape}")
print(f"   Test:       {X_test_lstm.shape}")

# Convert labels to numpy arrays
y_train_arr = y_train.values
y_val_arr = y_val.values
y_test_arr = y_test.values

print(f"\n‚úÖ Data preparation complete!")
print(f"   Ready for LSTM training with {X_train_lstm.shape[2]} features")

## ‚öñÔ∏è Step 8: Calculate Class Weights for Imbalanced Data

In [None]:
print("="*70)
print("‚öñÔ∏è Computing Class Weights")
print("="*70)

# Calculate class weights to handle imbalanced dataset
class_weights_array = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train_arr),
    y=y_train_arr
)

class_weights = {i: weight for i, weight in enumerate(class_weights_array)}

print("\nüìä Class weights (to balance training):")
for class_idx, weight in class_weights.items():
    class_name = "Legitimate" if class_idx == 0 else "Phishing"
    print(f"   {class_name} ({class_idx}): {weight:.4f}")

print(f"\nüí° Higher weight ({max(class_weights.values()):.4f}) will be applied to minority class")
print("   This helps the model learn from underrepresented samples")

## üèóÔ∏è Step 9: Build LSTM Model Architecture (Multi-GPU)

In [None]:
print("="*70)
print("üèóÔ∏è Building LSTM Model Architecture")
print("="*70)

# Build model inside strategy scope for multi-GPU training
with strategy.scope():
    
    # Define the model
    model = Sequential([
        # First Bidirectional LSTM layer
        Bidirectional(LSTM(128, return_sequences=True), 
                     input_shape=(X_train_lstm.shape[1], X_train_lstm.shape[2])),
        BatchNormalization(),
        Dropout(0.3),
        
        # Second Bidirectional LSTM layer
        Bidirectional(LSTM(64, return_sequences=False)),
        BatchNormalization(),
        Dropout(0.3),
        
        # Dense layers
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),
        
        Dense(32, activation='relu'),
        Dropout(0.2),
        
        # Output layer
        Dense(1, activation='sigmoid')
    ])
    
    # Compile the model
    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='binary_crossentropy',
        metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), tf.keras.metrics.AUC()]
    )

print("\n‚úÖ Model built successfully inside MirroredStrategy scope!")
print(f"   Model will train across {strategy.num_replicas_in_sync} GPU(s)\n")

# Display model architecture
model.summary()

# Count parameters
trainable_params = sum([np.prod(v.shape) for v in model.trainable_variables])
print(f"\nüìä Total trainable parameters: {trainable_params:,}")

## üéØ Step 10: Configure Training Callbacks

In [None]:
print("="*70)
print("üéØ Configuring Training Callbacks")
print("="*70)

# Create callbacks for training optimization
callbacks = [
    # Early stopping: stop training if validation loss doesn't improve
    EarlyStopping(
        monitor='val_loss',
        patience=15,
        restore_best_weights=True,
        verbose=1
    ),
    
    # Reduce learning rate when validation loss plateaus
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=5,
        min_lr=0.00001,
        verbose=1
    ),
    
    # Save best model checkpoint
    ModelCheckpoint(
        'best_phishing_lstm_model.h5',
        monitor='val_accuracy',
        mode='max',
        save_best_only=True,
        verbose=1
    ),
    
    # TensorBoard logging
    TensorBoard(
        log_dir='./logs',
        histogram_freq=1,
        write_graph=True
    )
]

print("\n‚úÖ Configured callbacks:")
print("   1. Early Stopping (patience=15)")
print("   2. Reduce Learning Rate on Plateau (patience=5, factor=0.5)")
print("   3. Model Checkpoint (save best model)")
print("   4. TensorBoard Logging")

# Training configuration
EPOCHS = 100
print(f"\n‚öôÔ∏è Training configuration:")
print(f"   Max epochs: {EPOCHS}")
print(f"   Batch size per GPU: {BATCH_SIZE_PER_REPLICA}")
print(f"   Global batch size: {GLOBAL_BATCH_SIZE}")
print(f"   Learning rate: 0.001 (will reduce on plateau)")

## üî• Step 11: Train the Model (Multi-GPU)

In [None]:
print("="*70)
print("üî• Starting Multi-GPU Training")
print("="*70)
print(f"\n‚ö° Training across {strategy.num_replicas_in_sync} GPU(s)...\n")

import time
start_time = time.time()

# Train the model
history = model.fit(
    X_train_lstm, y_train_arr,
    validation_data=(X_val_lstm, y_val_arr),
    epochs=EPOCHS,
    batch_size=GLOBAL_BATCH_SIZE,  # This batch size is distributed across GPUs
    class_weight=class_weights,
    callbacks=callbacks,
    verbose=1
)

end_time = time.time()
training_time = end_time - start_time

print("\n" + "="*70)
print("‚úÖ Training Complete!")
print("="*70)
print(f"\n‚è±Ô∏è Total training time: {training_time/60:.2f} minutes ({training_time:.0f} seconds)")
print(f"   Epochs completed: {len(history.history['loss'])}")
print(f"   Final training accuracy: {history.history['accuracy'][-1]:.4f}")
print(f"   Final validation accuracy: {history.history['val_accuracy'][-1]:.4f}")

## üìä Step 12: Visualize Training History

In [None]:
print("="*70)
print("üìä Visualizing Training History")
print("="*70)

# Create comprehensive training visualization
fig, axes = plt.subplots(2, 3, figsize=(18, 10))
fig.suptitle('LSTM Model Training History - Multi-GPU', fontsize=16, fontweight='bold')

# 1. Loss
axes[0, 0].plot(history.history['loss'], label='Training Loss', linewidth=2)
axes[0, 0].plot(history.history['val_loss'], label='Validation Loss', linewidth=2)
axes[0, 0].set_xlabel('Epoch', fontweight='bold')
axes[0, 0].set_ylabel('Loss', fontweight='bold')
axes[0, 0].set_title('Model Loss', fontweight='bold')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# 2. Accuracy
axes[0, 1].plot(history.history['accuracy'], label='Training Accuracy', linewidth=2)
axes[0, 1].plot(history.history['val_accuracy'], label='Validation Accuracy', linewidth=2)
axes[0, 1].set_xlabel('Epoch', fontweight='bold')
axes[0, 1].set_ylabel('Accuracy', fontweight='bold')
axes[0, 1].set_title('Model Accuracy', fontweight='bold')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# 3. Precision
axes[0, 2].plot(history.history['precision'], label='Training Precision', linewidth=2)
axes[0, 2].plot(history.history['val_precision'], label='Validation Precision', linewidth=2)
axes[0, 2].set_xlabel('Epoch', fontweight='bold')
axes[0, 2].set_ylabel('Precision', fontweight='bold')
axes[0, 2].set_title('Model Precision', fontweight='bold')
axes[0, 2].legend()
axes[0, 2].grid(True, alpha=0.3)

# 4. Recall
axes[1, 0].plot(history.history['recall'], label='Training Recall', linewidth=2)
axes[1, 0].plot(history.history['val_recall'], label='Validation Recall', linewidth=2)
axes[1, 0].set_xlabel('Epoch', fontweight='bold')
axes[1, 0].set_ylabel('Recall', fontweight='bold')
axes[1, 0].set_title('Model Recall', fontweight='bold')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# 5. AUC
axes[1, 1].plot(history.history['auc'], label='Training AUC', linewidth=2)
axes[1, 1].plot(history.history['val_auc'], label='Validation AUC', linewidth=2)
axes[1, 1].set_xlabel('Epoch', fontweight='bold')
axes[1, 1].set_ylabel('AUC', fontweight='bold')
axes[1, 1].set_title('Model AUC', fontweight='bold')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

# 6. Learning Rate (if available)
if 'lr' in history.history:
    axes[1, 2].plot(history.history['lr'], linewidth=2, color='orange')
    axes[1, 2].set_xlabel('Epoch', fontweight='bold')
    axes[1, 2].set_ylabel('Learning Rate', fontweight='bold')
    axes[1, 2].set_title('Learning Rate Schedule', fontweight='bold')
    axes[1, 2].set_yscale('log')
    axes[1, 2].grid(True, alpha=0.3)
else:
    axes[1, 2].text(0.5, 0.5, 'Learning Rate\nNot Tracked', 
                   ha='center', va='center', fontsize=12)
    axes[1, 2].axis('off')

plt.tight_layout()
plt.savefig('training_history.png', dpi=300, bbox_inches='tight')
plt.show()

print("\n‚úÖ Training visualization saved as 'training_history.png'")

## üéØ Step 13: Evaluate Model on Test Set

In [None]:
print("="*70)
print("üéØ Evaluating Model on Test Set")
print("="*70)

# Evaluate on test set
test_results = model.evaluate(X_test_lstm, y_test_arr, verbose=1)

print("\nüìä Test Set Performance:")
metric_names = ['Loss', 'Accuracy', 'Precision', 'Recall', 'AUC']
for name, value in zip(metric_names, test_results):
    print(f"   {name:12s}: {value:.4f}")

# Make predictions
y_pred_prob = model.predict(X_test_lstm)
y_pred = (y_pred_prob > 0.5).astype(int).flatten()

# Calculate additional metrics
from sklearn.metrics import f1_score, matthews_corrcoef

f1 = f1_score(y_test_arr, y_pred)
mcc = matthews_corrcoef(y_test_arr, y_pred)

print(f"\nüìà Additional Metrics:")
print(f"   F1-Score: {f1:.4f}")
print(f"   Matthews Correlation Coefficient: {mcc:.4f}")

## üìã Step 14: Classification Report and Confusion Matrix

In [None]:
print("="*70)
print("üìã Detailed Classification Report")
print("="*70)

# Classification report
class_names = ['Legitimate (0)', 'Phishing (1)']
print("\n" + classification_report(y_test_arr, y_pred, target_names=class_names, digits=4))

# Confusion matrix
cm = confusion_matrix(y_test_arr, y_pred)

# Visualize confusion matrix
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Raw counts
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, 
            yticklabels=class_names, ax=axes[0], cbar_kws={'label': 'Count'})
axes[0].set_xlabel('Predicted Label', fontweight='bold', fontsize=12)
axes[0].set_ylabel('True Label', fontweight='bold', fontsize=12)
axes[0].set_title('Confusion Matrix (Counts)', fontweight='bold', fontsize=14)

# Normalized
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
sns.heatmap(cm_normalized, annot=True, fmt='.2%', cmap='Greens', xticklabels=class_names, 
            yticklabels=class_names, ax=axes[1], cbar_kws={'label': 'Percentage'})
axes[1].set_xlabel('Predicted Label', fontweight='bold', fontsize=12)
axes[1].set_ylabel('True Label', fontweight='bold', fontsize=12)
axes[1].set_title('Confusion Matrix (Normalized)', fontweight='bold', fontsize=14)

plt.tight_layout()
plt.savefig('confusion_matrix.png', dpi=300, bbox_inches='tight')
plt.show()

print("\n‚úÖ Confusion matrix saved as 'confusion_matrix.png'")

# Print confusion matrix interpretation
tn, fp, fn, tp = cm.ravel()
print("\nüîç Confusion Matrix Breakdown:")
print(f"   True Negatives (Legitimate correctly classified):  {tn:,}")
print(f"   False Positives (Legitimate wrongly as Phishing):  {fp:,}")
print(f"   False Negatives (Phishing wrongly as Legitimate):  {fn:,}")
print(f"   True Positives (Phishing correctly classified):    {tp:,}")

## üìà Step 15: ROC Curve and AUC Score

In [None]:
print("="*70)
print("üìà ROC Curve Analysis")
print("="*70)

# Calculate ROC curve
fpr, tpr, thresholds = roc_curve(y_test_arr, y_pred_prob)
roc_auc = roc_auc_score(y_test_arr, y_pred_prob)

print(f"\nüéØ ROC AUC Score: {roc_auc:.4f}")

# Plot ROC curve
plt.figure(figsize=(10, 8))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.4f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', label='Random Classifier (AUC = 0.5)')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate', fontweight='bold', fontsize=12)
plt.ylabel('True Positive Rate', fontweight='bold', fontsize=12)
plt.title('Receiver Operating Characteristic (ROC) Curve', fontweight='bold', fontsize=14)
plt.legend(loc='lower right', fontsize=11)
plt.grid(True, alpha=0.3)
plt.savefig('roc_curve.png', dpi=300, bbox_inches='tight')
plt.show()

print("\n‚úÖ ROC curve saved as 'roc_curve.png'")

# Find optimal threshold
optimal_idx = np.argmax(tpr - fpr)
optimal_threshold = thresholds[optimal_idx]
print(f"\nüéØ Optimal threshold: {optimal_threshold:.4f}")
print(f"   At this threshold:")
print(f"   - True Positive Rate: {tpr[optimal_idx]:.4f}")
print(f"   - False Positive Rate: {fpr[optimal_idx]:.4f}")

## üíæ Step 16: Save Model and Scaler

In [None]:
print("="*70)
print("üíæ Saving Model and Preprocessing Components")
print("="*70)

# Save the final model
model.save('phishing_lstm_model_final.h5')
print("\n‚úÖ Model saved as 'phishing_lstm_model_final.h5'")

# Save the scaler
import joblib
joblib.dump(scaler, 'scaler.pkl')
print("‚úÖ Scaler saved as 'scaler.pkl'")

# Save feature names
feature_names = X.columns.tolist()
with open('feature_names.txt', 'w') as f:
    f.write('\n'.join(feature_names))
print("‚úÖ Feature names saved as 'feature_names.txt'")

# Save training history
import json
history_dict = {key: [float(val) for val in values] for key, values in history.history.items()}
with open('training_history.json', 'w') as f:
    json.dump(history_dict, f, indent=2)
print("‚úÖ Training history saved as 'training_history.json'")

print("\nüì¶ All files ready for download:")
print("   1. phishing_lstm_model_final.h5 - Trained LSTM model")
print("   2. best_phishing_lstm_model.h5 - Best checkpoint during training")
print("   3. scaler.pkl - Feature scaler for preprocessing")
print("   4. feature_names.txt - List of feature names")
print("   5. training_history.json - Training metrics history")
print("   6. training_history.png - Training visualization")
print("   7. confusion_matrix.png - Confusion matrix visualization")
print("   8. roc_curve.png - ROC curve visualization")

## üìä Step 17: Final Performance Summary

In [None]:
print("="*70)
print("üéâ TRAINING COMPLETE - FINAL SUMMARY")
print("="*70)

print("\n‚öôÔ∏è Training Configuration:")
print(f"   GPUs used: {strategy.num_replicas_in_sync}")
print(f"   Total training time: {training_time/60:.2f} minutes")
print(f"   Epochs completed: {len(history.history['loss'])}")
print(f"   Global batch size: {GLOBAL_BATCH_SIZE}")

print("\nüìä Dataset Information:")
print(f"   Total samples: {len(df_clean):,}")
print(f"   Features: {X_train_lstm.shape[2]}")
print(f"   Training samples: {len(X_train):,}")
print(f"   Validation samples: {len(X_val):,}")
print(f"   Test samples: {len(X_test):,}")

print("\nüéØ Final Test Performance:")
print(f"   Accuracy:  {test_results[1]:.4f} ({test_results[1]*100:.2f}%)")
print(f"   Precision: {test_results[2]:.4f}")
print(f"   Recall:    {test_results[3]:.4f}")
print(f"   AUC:       {test_results[4]:.4f}")
print(f"   F1-Score:  {f1:.4f}")

print("\nüîç Classification Breakdown:")
print(f"   True Negatives:  {tn:,}")
print(f"   False Positives: {fp:,}")
print(f"   False Negatives: {fn:,}")
print(f"   True Positives:  {tp:,}")

print("\n‚úÖ Model files saved and ready for deployment!")
print("\n" + "="*70)
print("Thank you for using this notebook! üöÄ")
print("="*70)

## üß™ Step 18: Test Model with Sample Predictions (Optional)

In [None]:
# Optional: Test the model with a few sample predictions
print("="*70)
print("üß™ Sample Predictions")
print("="*70)

# Get 10 random samples from test set
np.random.seed(42)
sample_indices = np.random.choice(len(X_test_lstm), size=10, replace=False)

print("\nüìã Sample predictions from test set:\n")
for idx in sample_indices:
    true_label = y_test_arr[idx]
    pred_prob = y_pred_prob[idx][0]
    pred_label = 1 if pred_prob > 0.5 else 0
    
    true_name = "Phishing" if true_label == 1 else "Legitimate"
    pred_name = "Phishing" if pred_label == 1 else "Legitimate"
    correct = "‚úÖ" if true_label == pred_label else "‚ùå"
    
    print(f"Sample {idx:4d}: True={true_name:10s} | Pred={pred_name:10s} ({pred_prob:.4f}) {correct}")

print("\n" + "="*70)