# Blockchain Fraud Detection: Model Development

This notebook focuses on developing Graph Neural Network (GNN) models for blockchain fraud detection. We'll use the processed features and graph structure from the previous notebooks to train and compare different GNN architectures.

We'll implement:
1. Graph Convolutional Networks (GCN)
2. GraphSAGE
3. Graph Attention Networks (GAT)

And explore various hyperparameters and training strategies.

In [None]:
# Import libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv, SAGEConv, GATConv
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, f1_score, precision_recall_curve, auc
import time
import copy
import warnings

# Set plotting style
sns.set(style="whitegrid")
plt.style.use('seaborn-v0_8-whitegrid')
warnings.filterwarnings('ignore')

# Check for GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Create directories for saving models and results
os.makedirs('../models', exist_ok=True)
os.makedirs('../reports', exist_ok=True)

## 1. Load Processed Data

In [None]:
# Load data from processed directory
try:
    # Try loading the complete Data object
    data = torch.load('../data/processed/data.pt')
    print(f"Loaded PyTorch Geometric Data object: {data}")
except FileNotFoundError:
    # If not found, load individual components
    features = np.load('../data/processed/features.npy')
    labels = np.load('../data/processed/labels.npy')
    edge_index = torch.load('../data/processed/edge_index.pt')
    
    # Convert to PyTorch tensors
    x = torch.FloatTensor(features)
    y = torch.LongTensor(labels)
    
    # Create Data object
    data = Data(x=x, edge_index=edge_index, y=y)
    print(f"Created PyTorch Geometric Data object from components")

# Move data to device
data = data.to(device)

In [None]:
# Verify data dimensions
print(f"Number of nodes: {data.num_nodes}")
print(f"Number of edges: {data.num_edges}")
print(f"Number of features: {data.num_features}")
print(f"Number of classes: {len(torch.unique(data.y))}")

# Check class distribution
class_counts = torch.bincount(data.y)
print(f"Class distribution: {class_counts.cpu().numpy()}")
print(f"Fraud ratio: {class_counts[1].item() / class_counts.sum().item():.4f}")

## 2. Create Train/Validation/Test Splits

In [None]:
# Create train/val/test splits
def create_data_splits(data, train_size=0.7, val_size=0.15, test_size=0.15, random_state=42):
    """Create train/val/test splits for node classification."""
    # Get indices for all nodes
    indices = np.arange(data.num_nodes)
    
    # First split: train vs. (val+test)
    train_idx, temp_idx = train_test_split(
        indices, 
        train_size=train_size, 
        random_state=random_state,
        stratify=data.y.cpu().numpy()  # Stratify by class
    )
    
    # Second split: val vs. test
    val_test_ratio = val_size / (val_size + test_size)
    val_idx, test_idx = train_test_split(
        temp_idx,
        train_size=val_test_ratio,
        random_state=random_state,
        stratify=data.y[temp_idx].cpu().numpy()  # Stratify by class
    )
    
    # Return split indices
    return {
        'train': train_idx,
        'val': val_idx,
        'test': test_idx
    }

# Create splits
split_idx = create_data_splits(data)

# Print split sizes
print(f"Train set size: {len(split_idx['train'])}")
print(f"Validation set size: {len(split_idx['val'])}")
print(f"Test set size: {len(split_idx['test'])}")

# Check class distribution in each split
for split in ['train', 'val', 'test']:
    split_y = data.y[split_idx[split]].cpu().numpy()
    unique, counts = np.unique(split_y, return_counts=True)
    print(f"{split.capitalize()} set class distribution: {dict(zip(unique, counts))}")

## 3. Define GNN Model Architectures

In [None]:
# Define GCN model
class GCNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers=3, 
                 dropout=0.5, batch_norm=True, residual=True):
        super(GCNModel, self).__init__()
        
        self.num_layers = num_layers
        self.dropout = dropout
        self.batch_norm = batch_norm
        self.residual = residual
        
        # Input layer
        self.convs = nn.ModuleList([GCNConv(input_dim, hidden_dim)])
        
        # Hidden layers
        for _ in range(num_layers - 2):
            self.convs.append(GCNConv(hidden_dim, hidden_dim))
        
        # Output layer
        self.convs.append(GCNConv(hidden_dim, output_dim))
        
        # Batch normalization layers
        if batch_norm:
            self.bns = nn.ModuleList([nn.BatchNorm1d(hidden_dim) for _ in range(num_layers - 1)])
    
    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()
        if self.batch_norm:
            for bn in self.bns:
                bn.reset_parameters()
    
    def forward(self, x, edge_index):
        # Input layer
        h = self.convs[0](x, edge_index)
        h = F.relu(h)
        h = F.dropout(h, p=self.dropout, training=self.training)
        
        # Hidden layers with residual connections
        for i in range(1, self.num_layers - 1):
            h_prev = h
            h = self.convs[i](h, edge_index)
            
            if self.batch_norm:
                h = self.bns[i-1](h)
            
            h = F.relu(h)
            
            if self.residual:
                h = h + h_prev
            
            h = F.dropout(h, p=self.dropout, training=self.training)
        
        # Output layer
        h = self.convs[-1](h, edge_index)
        
        return F.log_softmax(h, dim=1)

In [None]:
# Define GraphSAGE model
class SAGEModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers=3, 
                 dropout=0.5, batch_norm=True, residual=True, aggr='mean'):
        super(SAGEModel, self).__init__()
        
        self.num_layers = num_layers
        self.dropout = dropout
        self.batch_norm = batch_norm
        self.residual = residual
        
        # Input layer
        self.convs = nn.ModuleList([SAGEConv(input_dim, hidden_dim, aggr=aggr)])
        
        # Hidden layers
        for _ in range(num_layers - 2):
            self.convs.append(SAGEConv(hidden_dim, hidden_dim, aggr=aggr))
        
        # Output layer
        self.convs.append(SAGEConv(hidden_dim, output_dim, aggr=aggr))
        
        # Batch normalization layers
        if batch_norm:
            self.bns = nn.ModuleList([nn.BatchNorm1d(hidden_dim) for _ in range(num_layers - 1)])
    
    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()
        if self.batch_norm:
            for bn in self.bns:
                bn.reset_parameters()
    
    def forward(self, x, edge_index):
        # Input layer
        h = self.convs[0](x, edge_index)
        h = F.relu(h)
        h = F.dropout(h, p=self.dropout, training=self.training)
        
        # Hidden layers with residual connections
        for i in range(1, self.num_layers - 1):
            h_prev = h
            h = self.convs[i](h, edge_index)
            
            if self.batch_norm:
                h = self.bns[i-1](h)
            
            h = F.relu(h)
            
            if self.residual:
                h = h + h_prev
            
            h = F.dropout(h, p=self.dropout, training=self.training)
        
        # Output layer
        h = self.convs[-1](h, edge_index)
        
        return F.log_softmax(h, dim=1)

In [None]:
# Define GAT model
class GATModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers=3, 
                 dropout=0.5, batch_norm=True, residual=True, heads=8):
        super(GATModel, self).__init__()
        
        self.num_layers = num_layers
        self.dropout = dropout
        self.batch_norm = batch_norm
        self.residual = residual
        
        # Input layer with multiple attention heads
        self.convs = nn.ModuleList([GATConv(input_dim, hidden_dim // heads, heads=heads)])
        
        # Hidden layers
        for _ in range(num_layers - 2):
            self.convs.append(GATConv(hidden_dim, hidden_dim // heads, heads=heads))
        
        # Output layer (with 1 attention head)
        self.convs.append(GATConv(hidden_dim, output_dim, heads=1))
        
        # Batch normalization layers
        if batch_norm:
            self.bns = nn.ModuleList([nn.BatchNorm1d(hidden_dim) for _ in range(num_layers - 1)])
    
    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()
        if self.batch_norm:
            for bn in self.bns:
                bn.reset_parameters()
    
    def forward(self, x, edge_index):
        # Input layer
        h = self.convs[0](x, edge_index)
        h = F.relu(h)
        h = F.dropout(h, p=self.dropout, training=self.training)
        
        # Hidden layers with residual connections
        for i in range(1, self.num_layers - 1):
            h_prev = h
            h = self.convs[i](h, edge_index)
            
            if self.batch_norm:
                h = self.bns[i-1](h)
            
            h = F.relu(h)
            
            if self.residual:
                h = h + h_prev
            
            h = F.dropout(h, p=self.dropout, training=self.training)
        
        # Output layer
        h = self.convs[-1](h, edge_index)
        
        return F.log_softmax(h, dim=1)

## 4. Training Function with Early Stopping

In [None]:
def train_model(model, data, split_idx, optimizer, criterion, 
               scheduler=None, epochs=200, patience=20, 
               verbose=True, model_name="gnn"):
    """Train a GNN model with early stopping."""
    # Training history
    history = {
        'train_loss': [],
        'val_loss': [],
        'train_acc': [],
        'val_acc': [],
        'train_auc': [],
        'val_auc': []
    }
    
    # Best model tracking
    best_val_auc = 0.0
    best_model_state = None
    best_epoch = 0
    patience_counter = 0
    
    # Training loop
    start_time = time.time()
    
    for epoch in range(epochs):
        epoch_start = time.time()
        
        # Train phase
        model.train()
        optimizer.zero_grad()
        out = model(data.x, data.edge_index)
        train_loss = criterion(out[split_idx['train']], data.y[split_idx['train']])
        train_loss.backward()
        optimizer.step()
        
        # Evaluation phase
        model.eval()
        with torch.no_grad():
            out = model(data.x, data.edge_index)
            
            # Loss
            val_loss = criterion(out[split_idx['val']], data.y[split_idx['val']])
            
            # Accuracy
            pred = out.argmax(dim=1)
            train_acc = (pred[split_idx['train']] == data.y[split_idx['train']]).sum().item() / len(split_idx['train'])
            val_acc = (pred[split_idx['val']] == data.y[split_idx['val']]).sum().item() / len(split_idx['val'])
            
            # AUC
            train_probs = torch.exp(out[split_idx['train'], 1]).cpu().numpy()
            val_probs = torch.exp(out[split_idx['val'], 1]).cpu().numpy()
            train_auc = roc_auc_score(data.y[split_idx['train']].cpu().numpy(), train_probs)
            val_auc = roc_auc_score(data.y[split_idx['val']].cpu().numpy(), val_probs)
        
        # Update learning rate with scheduler if provided
        if scheduler is not None:
            scheduler.step(val_auc)
        
        # Update history
        history['train_loss'].append(train_loss.item())
        history['val_loss'].append(val_loss.item())
        history['train_acc'].append(train_acc)
        history['val_acc'].append(val_acc)
        history['train_auc'].append(train_auc)
        history['val_auc'].append(val_auc)
        
        # Print progress
        epoch_time = time.time() - epoch_start
        if verbose and (epoch % 10 == 0 or epoch == epochs - 1):
            print(f"Epoch {epoch+1:3d}/{epochs} | "
                  f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | "
                  f"Train Acc: {train_acc:.4f} | Val Acc: {val_acc:.4f} | "
                  f"Train AUC: {train_auc:.4f} | Val AUC: {val_auc:.4f} | "
                  f"Time: {epoch_time:.2f}s")
        
        # Check for improvement
        if val_auc > best_val_auc:
            best_val_auc = val_auc
            best_model_state = copy.deepcopy(model.state_dict())
            best_epoch = epoch
            patience_counter = 0
            if verbose:
                print(f"    > New best model with validation AUC: {val_auc:.4f}")
        else:
            patience_counter += 1
        
        # Early stopping check
        if patience_counter >= patience:
            if verbose:
                print(f"Early stopping triggered after {epoch+1} epochs")
            break
    
    # Training summary
    total_time = time.time() - start_time
    if verbose:
        print(f"\nTraining completed in {total_time:.2f} seconds")
        print(f"Best model at epoch {best_epoch+1} with validation AUC: {best_val_auc:.4f}")
    
    # Load best model
    model.load_state_dict(best_model_state)
    
    # Save best model
    torch.save(best_model_state, f"../models/{model_name}_best.pt")
    
    # Save training history
    import json
    with open(f"../models/{model_name}_history.json", 'w') as f:
        json.dump(history, f)
    
    return model, history

## 5. Train GCN Model

In [None]:
# Define GCN model hyperparameters
gcn_params = {
    'input_dim': data.num_features,
    'hidden_dim': 256,
    'output_dim': 2,  # Binary classification
    'num_layers': 3,
    'dropout': 0.5,
    'batch_norm': True,
    'residual': True
}

# Initialize GCN model
gcn_model = GCNModel(**gcn_params).to(device)
gcn_model.reset_parameters()

# Set optimizer and loss function
optimizer = torch.optim.Adam(gcn_model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.NLLLoss()

# Learning rate scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='max', factor=0.5, patience=10, min_lr=1e-5, verbose=True
)

# Train GCN model
print("Training GCN model...")
gcn_model, gcn_history = train_model(
    model=gcn_model,
    data=data,
    split_idx=split_idx,
    optimizer=optimizer,
    criterion=criterion,
    scheduler=scheduler,
    epochs=200,
    patience=20,
    model_name="gcn"
)

In [None]:
# Plot GCN training history
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Loss plot
axes[0].plot(gcn_history['train_loss'], label='Train')
axes[0].plot(gcn_history['val_loss'], label='Validation')
axes[0].set_title('Loss', fontsize=15)
axes[0].set_xlabel('Epoch', fontsize=12)
axes[0].set_ylabel('Loss', fontsize=12)
axes[0].legend()
axes[0].grid(True)

# Accuracy plot
axes[1].plot(gcn_history['train_acc'], label='Train')
axes[1].plot(gcn_history['val_acc'], label='Validation')
axes[1].set_title('Accuracy', fontsize=15)
axes[1].set_xlabel('Epoch', fontsize=12)
axes[1].set_ylabel('Accuracy', fontsize=12)
axes[1].legend()
axes[1].grid(True)

# AUC plot
axes[2].plot(gcn_history['train_auc'], label='Train')
axes[2].plot(gcn_history['val_auc'], label='Validation')
axes[2].set_title('ROC AUC', fontsize=15)
axes[2].set_xlabel('Epoch', fontsize=12)
axes[2].set_ylabel('AUC', fontsize=12)
axes[2].legend()
axes[2].grid(True)

plt.tight_layout()
plt.savefig('../reports/gcn_training_history.png', dpi=300, bbox_inches='tight')
plt.show()

## 6. Train GraphSAGE Model

In [None]:
# Define GraphSAGE model hyperparameters
sage_params = {
    'input_dim': data.num_features,
    'hidden_dim': 256,
    'output_dim': 2,  # Binary classification
    'num_layers': 3,
    'dropout': 0.5,
    'batch_norm': True,
    'residual': True,
    'aggr': 'mean'  # Aggregation method: 'mean', 'max', or 'sum'
}

# Initialize GraphSAGE model
sage_model = SAGEModel(**sage_params).to(device)
sage_model.reset_parameters()

# Set optimizer and loss function
optimizer = torch.optim.Adam(sage_model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.NLLLoss()

# Learning rate scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='max', factor=0.5, patience=10, min_lr=1e-5, verbose=True
)

# Train GraphSAGE model
print("Training GraphSAGE model...")
sage_model, sage_history = train_model(
    model=sage_model,
    data=data,
    split_idx=split_idx,
    optimizer=optimizer,
    criterion=criterion,
    scheduler=scheduler,
    epochs=200,
    patience=20,
    model_name="sage"
)

In [None]:
# Plot GraphSAGE training history
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Loss plot
axes[0].plot(sage_history['train_loss'], label='Train')
axes[0].plot(sage_history['val_loss'], label='Validation')
axes[0].set_title('Loss', fontsize=15)
axes[0].set_xlabel('Epoch', fontsize=12)
axes[0].set_ylabel('Loss', fontsize=12)
axes[0].legend()
axes[0].grid(True)

# Accuracy plot
axes[1].plot(sage_history['train_acc'], label='Train')
axes[1].plot(sage_history['val_acc'], label='Validation')
axes[1].set_title('Accuracy', fontsize=15)
axes[1].set_xlabel('Epoch', fontsize=12)
axes[1].set_ylabel('Accuracy', fontsize=12)
axes[1].legend()
axes[1].grid(True)

# AUC plot
axes[2].plot(sage_history['train_auc'], label='Train')
axes[2].plot(sage_history['val_auc'], label='Validation')
axes[2].set_title('ROC AUC', fontsize=15)
axes[2].set_xlabel('Epoch', fontsize=12)
axes[2].set_ylabel('AUC', fontsize=12)
axes[2].legend()
axes[2].grid(True)

plt.tight_layout()
plt.savefig('../reports/sage_training_history.png', dpi=300, bbox_inches='tight')
plt.show()

## 7. Train GAT Model (Optional, requires more memory)

In [None]:
# Check if we have enough memory for GAT (more memory-intensive)
try:
    # Define GAT model hyperparameters
    gat_params = {
        'input_dim': data.num_features,
        'hidden_dim': 256,
        'output_dim': 2,  # Binary classification
        'num_layers': 3,
        'dropout': 0.5,
        'batch_norm': True,
        'residual': True,
        'heads': 8  # Number of attention heads
    }
    
    # Initialize GAT model
    gat_model = GATModel(**gat_params).to(device)
    gat_model.reset_parameters()
    
    # Set optimizer and loss function
    optimizer = torch.optim.Adam(gat_model.parameters(), lr=0.01, weight_decay=5e-4)
    criterion = torch.nn.NLLLoss()
    
    # Learning rate scheduler
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='max', factor=0.5, patience=10, min_lr=1e-5, verbose=True
    )
    
    # Train GAT model
    print("Training GAT model...")
    gat_model, gat_history = train_model(
        model=gat_model,
        data=data,
        split_idx=split_idx,
        optimizer=optimizer,
        criterion=criterion,
        scheduler=scheduler,
        epochs=200,
        patience=20,
        model_name="gat"
    )
    
    # Plot GAT training history
    fig, axes = plt.subplots(1, 3, figsize=(18, 5))
    
    # Loss plot
    axes[0].plot(gat_history['train_loss'], label='Train')
    axes[0].plot(gat_history['val_loss'], label='Validation')
    axes[0].set_title('Loss', fontsize=15)
    axes[0].set_xlabel('Epoch', fontsize=12)
    axes[0].set_ylabel('Loss', fontsize=12)
    axes[0].legend()
    axes[0].grid(True)
    
    # Accuracy plot
    axes[1].plot(gat_history['train_acc'], label='Train')
    axes[1].plot(gat_history['val_acc'], label='Validation')
    axes[1].set_title('Accuracy', fontsize=15)
    axes[1].set_xlabel('Epoch', fontsize=12)
    axes[1].set_ylabel('Accuracy', fontsize=12)
    axes[1].legend()
    axes[1].grid(True)
    
    # AUC plot
    axes[2].plot(gat_history['train_auc'], label='Train')
    axes[2].plot(gat_history['val_auc'], label='Validation')
    axes[2].set_title('ROC AUC', fontsize=15)
    axes[2].set_xlabel('Epoch', fontsize=12)
    axes[2].set_ylabel('AUC', fontsize=12)
    axes[2].legend()
    axes[2].grid(True)
    
    plt.tight_layout()
    plt.savefig('../reports/gat_training_history.png', dpi=300, bbox_inches='tight')
    plt.show()
    
except RuntimeError as e:
    if 'out of memory' in str(e).lower():
        print("Not enough GPU memory for GAT model. Skipping GAT training.")
        print("You can try reducing the hidden dimension or number of attention heads.")
    else:
        raise e

## 8. Hyperparameter Tuning (Optional)

In [None]:
# Uncomment and run this cell if you want to explore hyperparameter tuning

# def hyperparameter_tuning(model_class, param_grid, data, split_idx, epochs=100, patience=10):
#     """Perform hyperparameter tuning on a model class."""
#     results = []
#     
#     # Generate parameter combinations
#     import itertools
#     param_names = param_grid.keys()
#     param_values = list(param_grid.values())
#     param_combinations = list(itertools.product(*param_values))
#     
#     print(f"Testing {len(param_combinations)} parameter combinations")
#     
#     for i, params in enumerate(param_combinations):
#         param_dict = {name: value for name, value in zip(param_names, params)}
#         print(f"\nCombination {i+1}/{len(param_combinations)}: {param_dict}")
#         
#         # Initialize model with parameters
#         model = model_class(**param_dict).to(device)
#         model.reset_parameters()
#         
#         # Set optimizer and loss function
#         optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
#         criterion = torch.nn.NLLLoss()
#         
#         # Train model
#         model, history = train_model(
#             model=model,
#             data=data,
#             split_idx=split_idx,
#             optimizer=optimizer,
#             criterion=criterion,
#             epochs=epochs,
#             patience=patience,
#             verbose=False
#         )
#         
#         # Get best validation performance
#         best_val_auc = max(history['val_auc'])
#         best_epoch = history['val_auc'].index(best_val_auc)
#         
#         # Record results
#         results.append({
#             'params': param_dict,
#             'val_auc': best_val_auc,
#             'best_epoch': best_epoch
#         })
#         
#         print(f"Best validation AUC: {best_val_auc:.4f} at epoch {best_epoch+1}")
#     
#     # Sort results by validation AUC
#     results.sort(key=lambda x: x['val_auc'], reverse=True)
#     
#     # Return best parameters
#     return results

# # GCN hyperparameter grid
# gcn_param_grid = {
#     'input_dim': [data.num_features],
#     'hidden_dim': [128, 256],
#     'output_dim': [2],
#     'num_layers': [2, 3],
#     'dropout': [0.3, 0.5],
#     'batch_norm': [True, False],
#     'residual': [True, False]
# }

# # Run hyperparameter tuning
# gcn_tuning_results = hyperparameter_tuning(GCNModel, gcn_param_grid, data, split_idx)

# # Print best results
# print("\nTop 3 parameter combinations:")
# for i, result in enumerate(gcn_tuning_results[:3]):
#     print(f"{i+1}. Val AUC: {result['val_auc']:.4f}, Params: {result['params']}")

## 9. Model Comparison

In [None]:
# Load training histories
import json

with open('../models/gcn_history.json', 'r') as f:
    gcn_history = json.load(f)

with open('../models/sage_history.json', 'r') as f:
    sage_history = json.load(f)

# Check if GAT history exists
try:
    with open('../models/gat_history.json', 'r') as f:
        gat_history = json.load(f)
    has_gat = True
except FileNotFoundError:
    has_gat = False

# Collect best validation metrics
best_metrics = {
    'GCN': {
        'val_auc': max(gcn_history['val_auc']),
        'epoch': gcn_history['val_auc'].index(max(gcn_history['val_auc'])) + 1
    },
    'GraphSAGE': {
        'val_auc': max(sage_history['val_auc']),
        'epoch': sage_history['val_auc'].index(max(sage_history['val_auc'])) + 1
    }
}

if has_gat:
    best_metrics['GAT'] = {
        'val_auc': max(gat_history['val_auc']),
        'epoch': gat_history['val_auc'].index(max(gat_history['val_auc'])) + 1
    }

# Print comparison
print("Model Performance Comparison (Validation Set):")
for model_name, metrics in best_metrics.items():
    print(f"{model_name}: AUC = {metrics['val_auc']:.4f} (at epoch {metrics['epoch']})")

# Identify best model
best_model = max(best_metrics.items(), key=lambda x: x[1]['val_auc'])[0]
print(f"\nBest model based on validation AUC: {best_model}")

In [None]:
# Plot validation AUC comparison
plt.figure(figsize=(10, 6))

plt.plot(gcn_history['val_auc'], label='GCN')
plt.plot(sage_history['val_auc'], label='GraphSAGE')
if has_gat:
    plt.plot(gat_history['val_auc'], label='GAT')

plt.title('Validation AUC Comparison', fontsize=15)
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('AUC', fontsize=12)
plt.legend(fontsize=12)
plt.grid(True)
plt.tight_layout()
plt.savefig('../reports/model_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

## 10. Results Summary

In [None]:
# Create summary table
summary_data = []

for model_name, metrics in best_metrics.items():
    summary_data.append({
        'Model': model_name,
        'Best Val AUC': metrics['val_auc'],
        'Best Epoch': metrics['epoch'],
    })

summary_df = pd.DataFrame(summary_data).sort_values('Best Val AUC', ascending=False)
summary_df

In [None]:
# Save summary to CSV
summary_df.to_csv('../reports/model_comparison_summary.csv', index=False)
print("Saved model comparison summary to ../reports/model_comparison_summary.csv")

## 11. Save the Best Model for Evaluation

In the next notebook, we'll load the best model for a detailed evaluation on the test set.

In [None]:
# Get the best model file path
best_model_path = f"../models/{best_model.lower()}_best.pt"

# Copy the best model to a common name for easier loading in the evaluation notebook
import shutil
shutil.copy(best_model_path, "../models/best_model.pt")
print(f"Copied {best_model_path} to ../models/best_model.pt for evaluation")

# Also save the model name
with open("../models/best_model_name.txt", "w") as f:
    f.write(best_model)
print(f"Saved best model name ({best_model}) to ../models/best_model_name.txt")

## 12. Conclusion

In this notebook, we developed and trained several Graph Neural Network models for blockchain fraud detection:

1. We implemented GCN, GraphSAGE, and GAT models with advanced features like residual connections, batch normalization, and dropout.

2. We trained these models using early stopping based on validation AUC to prevent overfitting.

3. We compared model performances and identified the best model based on validation AUC.

The best performing model was [Best Model], achieving a validation AUC of [Best AUC]. In the next notebook, we'll perform a detailed evaluation of this model on the test set to assess its generalization capabilities.