# Tabular Deep Learning Models for Medical Diagnosis

## Experiment Overview

This notebook trains two state-of-the-art tabular deep learning algorithms:
1. **TabNet** - Interpretable deep learning model using sequential attention mechanism
2. **FT-Transformer** - Feature Tokenization + Transformer architecture for tabular data

### Dataset
- Using pre-filtered datasets (evidence features already encoded, raw columns removed)
- Training: preprocessed_filtered/train_filtered.csv
- Validation: preprocessed_filtered/validation_filtered.csv
- Test: preprocessed_filtered/test_filtered.csv

### Evaluation Metrics
- Accuracy, Macro F1-score, Weighted F1-score, Top-3 Accuracy


In [12]:
import pandas as pd
import numpy as np
import pickle
import time
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# PyTorch imports
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import math

# Model libraries
try:
    from pytorch_tabnet.tab_model import TabNetClassifier
except ImportError:
    print("⚠️  pytorch-tabnet not installed. Install with: pip install pytorch-tabnet")
    TabNetClassifier = None

# Custom FT-Transformer implementation (PyTorch 2.x compatible)
class FTTransformer(nn.Module):
    """
    FT-Transformer: Feature Tokenization + Transformer for tabular data
    Based on: "Revisiting Deep Learning Models for Tabular Data" (Gorishniy et al., 2021)
    """
    def __init__(
        self,
        n_num_features,
        cat_cardinalities=None,
        d_token=192,
        n_blocks=3,
        attention_dropout=0.2,
        ffn_dropout=0.1,
        residual_dropout=0.0,
        d_out=1,
        d_ffn_factor=4/3,
    ):
        super().__init__()
        self.n_num_features = n_num_features
        self.cat_cardinalities = cat_cardinalities or []
        self.d_token = d_token
        self.n_blocks = n_blocks
        self.d_out = d_out
        
        # Feature tokenization: each numerical feature gets its own token
        # Linear layer to map each feature value to d_token dimensions
        self.num_embedding = nn.Linear(1, d_token)
        
        # CLS token (learnable token for aggregation)
        self.cls_token = nn.Parameter(torch.randn(1, 1, d_token))
        
        # Positional embeddings
        max_num_tokens = n_num_features + len(self.cat_cardinalities) + 1  # +1 for CLS
        self.pos_embedding = nn.Parameter(torch.randn(1, max_num_tokens, d_token))
        
        # Transformer blocks
        self.blocks = nn.ModuleList([
            TransformerBlock(
                d_token=d_token,
                attention_dropout=attention_dropout,
                ffn_dropout=ffn_dropout,
                residual_dropout=residual_dropout,
                d_ffn_factor=d_ffn_factor,
            )
            for _ in range(n_blocks)
        ])
        
        # Classification head
        self.head = nn.Linear(d_token, d_out)
        
    def forward(self, x_num, x_cat=None):
        """
        Args:
            x_num: numerical features [batch_size, n_num_features]
            x_cat: categorical features [batch_size, n_cat_features] (optional)
        """
        batch_size = x_num.shape[0]
        
        # Tokenize numerical features: each feature becomes a token
        # Reshape to [batch_size * n_num_features, 1] then embed
        num_features_reshaped = x_num.unsqueeze(-1)  # [batch_size, n_num_features, 1]
        num_tokens = self.num_embedding(num_features_reshaped)  # [batch_size, n_num_features, d_token]
        
        # Combine tokens
        tokens = [num_tokens]
        
        # Add CLS token
        cls_tokens = self.cls_token.expand(batch_size, -1, -1)  # [batch_size, 1, d_token]
        tokens = [cls_tokens] + tokens
        
        # Concatenate all tokens
        x = torch.cat(tokens, dim=1)  # [batch_size, n_tokens, d_token]
        
        # Add positional embeddings
        x = x + self.pos_embedding[:, :x.shape[1], :]
        
        # Apply transformer blocks
        for block in self.blocks:
            x = block(x)
        
        # Use CLS token for prediction
        cls_output = x[:, 0, :]  # [batch_size, d_token]
        
        # Classification head
        output = self.head(cls_output)  # [batch_size, d_out]
        
        return output


class TransformerBlock(nn.Module):
    """Transformer block with multi-head attention and FFN"""
    def __init__(
        self,
        d_token,
        attention_dropout=0.2,
        ffn_dropout=0.1,
        residual_dropout=0.0,
        d_ffn_factor=4/3,
        n_heads=8,
    ):
        super().__init__()
        self.attention = nn.MultiheadAttention(
            embed_dim=d_token,
            num_heads=n_heads,
            dropout=attention_dropout,
            batch_first=True
        )
        self.ffn = FeedForward(d_token, int(d_token * d_ffn_factor), ffn_dropout)
        self.ln1 = nn.LayerNorm(d_token)
        self.ln2 = nn.LayerNorm(d_token)
        self.residual_dropout = residual_dropout
        
    def forward(self, x):
        # Self-attention with residual connection
        attn_out, _ = self.attention(x, x, x)
        if self.residual_dropout > 0:
            attn_out = F.dropout(attn_out, p=self.residual_dropout, training=self.training)
        x = self.ln1(x + attn_out)
        
        # FFN with residual connection
        ffn_out = self.ffn(x)
        if self.residual_dropout > 0:
            ffn_out = F.dropout(ffn_out, p=self.residual_dropout, training=self.training)
        x = self.ln2(x + ffn_out)
        
        return x


class FeedForward(nn.Module):
    """Feed-forward network"""
    def __init__(self, d_in, d_hidden, dropout=0.1):
        super().__init__()
        self.linear1 = nn.Linear(d_in, d_hidden)
        self.linear2 = nn.Linear(d_hidden, d_in)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x):
        x = self.linear1(x)
        x = F.gelu(x)
        x = self.dropout(x)
        x = self.linear2(x)
        x = self.dropout(x)
        return x

print("✓ Custom FT-Transformer implementation loaded (PyTorch 2.x compatible)")

# Evaluation metrics
from sklearn.metrics import (
    accuracy_score, classification_report, confusion_matrix,
    f1_score, precision_score, recall_score
)
from sklearn.preprocessing import StandardScaler

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

print("✓ Libraries imported successfully!")


✓ Custom FT-Transformer implementation loaded (PyTorch 2.x compatible)
✓ Libraries imported successfully!


## Load Datasets

Load the pre-filtered datasets that already have evidence features encoded.


In [13]:
# Load filtered datasets
base_path = "/Users/zeynep_yilmaz/Desktop/sdp_gregDDx/DDxPlus Dataset/preprocessed_filtered/"
train_df = pd.read_csv(f"{base_path}train_filtered.csv")
val_df = pd.read_csv(f"{base_path}validation_filtered.csv")
test_df = pd.read_csv(f"{base_path}test_filtered.csv")

# Drop raw 'SEX' and 'DIFFERENTIAL_DIAGNOSIS' columns to prevent leakage
train_df = train_df.drop(columns=['SEX', 'DIFFERENTIAL_DIAGNOSIS'], errors='ignore')
val_df = val_df.drop(columns=['SEX', 'DIFFERENTIAL_DIAGNOSIS'], errors='ignore')
test_df = test_df.drop(columns=['SEX', 'DIFFERENTIAL_DIAGNOSIS'], errors='ignore')

# Load label encoder
with open("/Users/zeynep_yilmaz/Desktop/sdp_gregDDx/DDxPlus Dataset/pkl files/label_encoder.pkl", "rb") as f:
    label_encoder = pickle.load(f)

print("✓ Datasets loaded successfully!")
print(f"  Training: {train_df.shape}")
print(f"  Validation: {val_df.shape}")
print(f"  Test: {test_df.shape}")
print(f"  Total pathologies: {len(label_encoder.classes_)}")


✓ Datasets loaded successfully!
  Training: (936888, 592)
  Validation: (129258, 592)
  Test: (142184, 592)
  Total pathologies: 49


## Feature Selection

Select features for training. Exclude non-feature columns and prepare X and y.


In [14]:
# Feature selection + breakdown + data prep (uses filtered datasets)
non_feature_cols = ['PATHOLOGY', 'PATHOLOGY_ENCODED'] # SEX and DIFFERENTIAL_DIAGNOSIS already dropped
feature_cols = [col for col in train_df.columns if col not in non_feature_cols]

print(f'Total columns: {train_df.shape[1]}')
print(f'Selected features: {len(feature_cols)}')

demo_features = [c for c in feature_cols if c in ['AGE', 'SEX_ENCODED']]
evidence_features = [c for c in feature_cols if c.startswith('evidence_')]
initial_features = [c for c in feature_cols if c.startswith('initial_')]

print('Feature breakdown:')
print(f'  - Demographics: {len(demo_features)} ({demo_features})')
print(f'  - Evidence features: {len(evidence_features)}')
print(f'  - Initial evidence features: {len(initial_features)}')
print(f'Total features: {len(feature_cols)}')

# Prepare X and y
X_train = train_df[feature_cols].values
y_train = train_df['PATHOLOGY_ENCODED'].values
X_val = val_df[feature_cols].values
y_val = val_df['PATHOLOGY_ENCODED'].values
X_test = test_df[feature_cols].values
y_test = test_df['PATHOLOGY_ENCODED'].values

print('✓ Data prepared:')
print(f'  X_train: {X_train.shape} | X_val: {X_val.shape} | X_test: {X_test.shape}')

# Standardize features for deep learning models
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

print('✓ Features standardized for deep learning models')


Total columns: 592
Selected features: 590
Feature breakdown:
  - Demographics: 2 (['AGE', 'SEX_ENCODED'])
  - Evidence features: 492
  - Initial evidence features: 96
Total features: 590
✓ Data prepared:
  X_train: (936888, 590) | X_val: (129258, 590) | X_test: (142184, 590)
✓ Features standardized for deep learning models


## Model 1: TabNet

TabNet uses sequential attention to learn interpretable representations and can achieve state-of-the-art performance on tabular data.


In [None]:
if TabNetClassifier is None:
    print("❌ TabNet not available. Please install pytorch-tabnet: pip install pytorch-tabnet")
else:
    # TabNet parameters
    tabnet_params = {
        'n_d': 64,              # Dimension of the decision layer
        'n_a': 64,              # Dimension of the attention layer
        'n_steps': 5,            # Number of steps in the encoder
        'gamma': 1.5,            # Coefficient for feature reusage
        'lambda_sparse': 1e-3,   # Sparsity regularization
        'optimizer_fn': torch.optim.Adam,
        'optimizer_params': dict(lr=2e-2),
        'mask_type': 'entmax',   # How to use attention
        'n_shared': 2,           # Number of shared GLU layers
        'n_independent': 2,      # Number of independent GLU layers per step
        'clip_value': 1.0,       # Gradient clipping
        'verbose': 1,
        'seed': 42,
        'device_name': 'auto'    # 'cpu' or 'cuda' or 'auto'
    }
    
    print("Training TabNet...")
    start_time = time.time()
    
    tabnet_model = TabNetClassifier(**tabnet_params)
    
    tabnet_model.fit(
        X_train=X_train_scaled,
        y_train=y_train,
        eval_set=[(X_train_scaled, y_train), (X_val_scaled, y_val)],
        eval_name=['train', 'val'],
        eval_metric=['accuracy'],
        max_epochs=100,
        patience=15,
        batch_size=1024,
        virtual_batch_size=128,
        num_workers=0,
        drop_last=False
    )
    
    training_time = time.time() - start_time
    print(f"✓ TabNet trained in {training_time:.2f} seconds")
    
    # Predictions
    y_val_pred_proba = tabnet_model.predict_proba(X_val_scaled)
    y_val_pred = tabnet_model.predict(X_val_scaled)
    
    # Evaluate
    tabnet_accuracy = accuracy_score(y_val, y_val_pred)
    tabnet_f1_macro = f1_score(y_val, y_val_pred, average='macro')
    tabnet_f1_weighted = f1_score(y_val, y_val_pred, average='weighted')
    top3_pred = np.argsort(y_val_pred_proba, axis=1)[:, -3:]
    top3_accuracy = np.mean([y_val[i] in top3_pred[i] for i in range(len(y_val))])
    
    print("\n" + "="*80)
    print("TABNET VALIDATION RESULTS")
    print("="*80)
    print(f"Accuracy: {tabnet_accuracy:.4f}")
    print(f"Macro F1-score: {tabnet_f1_macro:.4f}")
    print(f"Weighted F1-score: {tabnet_f1_weighted:.4f}")
    print(f"Top-3 Accuracy: {top3_accuracy:.4f}")
    print(f"Training time: {training_time:.2f} seconds")
    
    tabnet_results = {
        'accuracy': tabnet_accuracy,
        'f1_macro': tabnet_f1_macro,
        'f1_weighted': tabnet_f1_weighted,
        'top3_accuracy': top3_accuracy,
        'training_time': training_time
    }


Training TabNet...
epoch 0  | loss: 0.20061 | train_accuracy: 0.9948  | val_accuracy: 0.99524 |  0:12:21s
epoch 1  | loss: 0.01551 | train_accuracy: 0.99615 | val_accuracy: 0.99627 |  0:25:09s
epoch 2  | loss: 0.01788 | train_accuracy: 0.99672 | val_accuracy: 0.99685 |  0:37:14s
epoch 3  | loss: 0.01173 | train_accuracy: 0.99671 | val_accuracy: 0.99677 |  0:50:05s
epoch 4  | loss: 0.01551 | train_accuracy: 0.99672 | val_accuracy: 0.99688 |  1:02:57s
epoch 5  | loss: 0.01312 | train_accuracy: 0.99669 | val_accuracy: 0.99672 |  1:11:44s
epoch 6  | loss: 0.0106  | train_accuracy: 0.99695 | val_accuracy: 0.99695 |  1:15:52s
epoch 7  | loss: 0.01023 | train_accuracy: 0.997   | val_accuracy: 0.99703 |  1:20:02s
epoch 8  | loss: 0.01053 | train_accuracy: 0.99697 | val_accuracy: 0.99704 |  1:24:25s
epoch 9  | loss: 0.01099 | train_accuracy: 0.9971  | val_accuracy: 0.99712 |  1:28:25s
epoch 10 | loss: 0.00983 | train_accuracy: 0.9969  | val_accuracy: 0.99698 |  1:32:31s
epoch 11 | loss: 0.00978

## Model 2: FT-Transformer

FT-Transformer applies the Transformer architecture to tabular data by tokenizing features and using self-attention mechanisms.


In [1]:
# Convert to PyTorch tensors
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
    
    # FT-Transformer configuration
    n_features = X_train_scaled.shape[1]
    n_classes = len(label_encoder.classes_)
    
    model_config = {
        'n_num_features': n_features,  # All features are numerical (after encoding)
        'cat_cardinalities': [],  # No categorical features (already encoded)
        'd_token': 192,           # Dimension of feature tokens
        'n_blocks': 3,            # Number of transformer blocks
        'attention_dropout': 0.2,
        'ffn_dropout': 0.1,
        'residual_dropout': 0.0,
        'd_out': n_classes        # Output dimension (number of classes)
    }
    
    print("Training FT-Transformer...")
    start_time = time.time()
    
    # Create model
    ft_model = FTTransformer(**model_config).to(device)
    
    # Training parameters
    learning_rate = 1e-4
    batch_size = 512
    n_epochs = 50
    patience = 10
    
    # Optimizer and loss
    optimizer = torch.optim.AdamW(ft_model.parameters(), lr=learning_rate, weight_decay=1e-5)
    criterion = nn.CrossEntropyLoss()
    
    # Convert to tensors
    X_train_tensor = torch.FloatTensor(X_train_scaled).to(device)
    y_train_tensor = torch.LongTensor(y_train).to(device)
    X_val_tensor = torch.FloatTensor(X_val_scaled).to(device)
    y_val_tensor = torch.LongTensor(y_val).to(device)
    
    # Create DataLoader for batching
    train_dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    
    # Training loop
    best_val_loss = float('inf')
    patience_counter = 0
    train_losses = []
    val_losses = []
    
    for epoch in range(n_epochs):
        # Training
        ft_model.train()
        epoch_train_loss = 0
        num_batches = 0
        
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            
            # Forward pass - FT-Transformer expects X_num and optionally X_cat
            # Since all features are numerical, pass X_num and None for X_cat
            output = ft_model(batch_X)
            loss = criterion(output, batch_y)
            
            # Backward pass
            loss.backward()
            optimizer.step()
            
            epoch_train_loss += loss.item()
            num_batches += 1
        
        avg_train_loss = epoch_train_loss / num_batches
        train_losses.append(avg_train_loss)
        
        # Validation
        ft_model.eval()
        with torch.no_grad():
            # For validation, we process in batches to avoid memory issues
            val_outputs = []
            val_batch_size = 1024
            for i in range(0, len(X_val_tensor), val_batch_size):
                batch_X_val = X_val_tensor[i:i+val_batch_size]
                batch_output = ft_model(batch_X_val)
                val_outputs.append(batch_output)
            
            val_output = torch.cat(val_outputs, dim=0)
            val_loss = criterion(val_output, y_val_tensor)
            val_losses.append(val_loss.item())
            
            # Calculate accuracy
            val_pred = torch.argmax(val_output, dim=1)
            val_acc = (val_pred == y_val_tensor).float().mean().item()
        
        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            # Save best model
            best_model_state = ft_model.state_dict().copy()
        else:
            patience_counter += 1
        
        if (epoch + 1) % 5 == 0:
            print(f"Epoch {epoch+1}/{n_epochs} - Train Loss: {avg_train_loss:.4f}, "
                  f"Val Loss: {val_loss.item():.4f}, Val Acc: {val_acc:.4f}")
        
        if patience_counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break
    
    # Load best model
    ft_model.load_state_dict(best_model_state)
    ft_model.eval()
    
    training_time = time.time() - start_time
    print(f"✓ FT-Transformer trained in {training_time:.2f} seconds")
    
    # Predictions on validation set
    with torch.no_grad():
        val_outputs = []
        val_batch_size = 1024
        for i in range(0, len(X_val_tensor), val_batch_size):
            batch_X_val = X_val_tensor[i:i+val_batch_size]
            batch_output = ft_model(batch_X_val)
            val_outputs.append(batch_output)
        
        val_output = torch.cat(val_outputs, dim=0)
        y_val_pred_proba = torch.softmax(val_output, dim=1).cpu().numpy()
        y_val_pred = torch.argmax(val_output, dim=1).cpu().numpy()
    
    # Evaluate
    ft_accuracy = accuracy_score(y_val, y_val_pred)
    ft_f1_macro = f1_score(y_val, y_val_pred, average='macro')
    ft_f1_weighted = f1_score(y_val, y_val_pred, average='weighted')
    top3_pred = np.argsort(y_val_pred_proba, axis=1)[:, -3:]
    top3_accuracy = np.mean([y_val[i] in top3_pred[i] for i in range(len(y_val))])
    
    print("\n" + "="*80)
    print("FT-TRANSFORMER VALIDATION RESULTS")
    print("="*80)
    print(f"Accuracy: {ft_accuracy:.4f}")
    print(f"Macro F1-score: {ft_f1_macro:.4f}")
    print(f"Weighted F1-score: {ft_f1_weighted:.4f}")
    print(f"Top-3 Accuracy: {top3_accuracy:.4f}")
    print(f"Training time: {training_time:.2f} seconds")
    
    ft_results = {
        'accuracy': ft_accuracy,
        'f1_macro': ft_f1_macro,
        'f1_weighted': ft_f1_weighted,
        'top3_accuracy': top3_accuracy,
        'training_time': training_time
    }


IndentationError: unexpected indent (496945516.py, line 6)

## Test Set Evaluation

Now let's evaluate all models on the true test set to get final results.


In [None]:
# Evaluate on test set
print("="*80)
print("TEST SET EVALUATION")
print("="*80)

test_results = {}

# TabNet test predictions
if TabNetClassifier is not None and 'tabnet_model' in locals():
    print("\nEvaluating TabNet on test set...")
    y_test_pred_proba_tabnet = tabnet_model.predict_proba(X_test_scaled)
    y_test_pred_tabnet = tabnet_model.predict(X_test_scaled)
    
    test_acc_tabnet = accuracy_score(y_test, y_test_pred_tabnet)
    test_f1_macro_tabnet = f1_score(y_test, y_test_pred_tabnet, average='macro')
    test_f1_weighted_tabnet = f1_score(y_test, y_test_pred_tabnet, average='weighted')
    top3_pred_tabnet = np.argsort(y_test_pred_proba_tabnet, axis=1)[:, -3:]
    top3_acc_tabnet = np.mean([y_test[i] in top3_pred_tabnet[i] for i in range(len(y_test))])
    
    test_results['TabNet'] = {
        'accuracy': test_acc_tabnet,
        'f1_macro': test_f1_macro_tabnet,
        'f1_weighted': test_f1_weighted_tabnet,
        'top3_accuracy': top3_acc_tabnet
    }
    
    print(f"TabNet Test Results:")
    print(f"  Accuracy: {test_acc_tabnet:.4f}")
    print(f"  Macro F1: {test_f1_macro_tabnet:.4f}")
    print(f"  Weighted F1: {test_f1_weighted_tabnet:.4f}")
    print(f"  Top-3 Accuracy: {top3_acc_tabnet:.4f}")

# FT-Transformer test predictions
if 'ft_model' in locals():
    print("\nEvaluating FT-Transformer on test set...")
    X_test_tensor = torch.FloatTensor(X_test_scaled).to(device)
    
    ft_model.eval()
    with torch.no_grad():
        # Process test set in batches
        test_outputs = []
        test_batch_size = 1024
        for i in range(0, len(X_test_tensor), test_batch_size):
            batch_X_test = X_test_tensor[i:i+test_batch_size]
            batch_output = ft_model(batch_X_test)
            test_outputs.append(batch_output)
        
        test_output = torch.cat(test_outputs, dim=0)
        y_test_pred_proba_ft = torch.softmax(test_output, dim=1).cpu().numpy()
        y_test_pred_ft = torch.argmax(test_output, dim=1).cpu().numpy()
    
    test_acc_ft = accuracy_score(y_test, y_test_pred_ft)
    test_f1_macro_ft = f1_score(y_test, y_test_pred_ft, average='macro')
    test_f1_weighted_ft = f1_score(y_test, y_test_pred_ft, average='weighted')
    top3_pred_ft = np.argsort(y_test_pred_proba_ft, axis=1)[:, -3:]
    top3_acc_ft = np.mean([y_test[i] in top3_pred_ft[i] for i in range(len(y_test))])
    
    test_results['FT-Transformer'] = {
        'accuracy': test_acc_ft,
        'f1_macro': test_f1_macro_ft,
        'f1_weighted': test_f1_weighted_ft,
        'top3_accuracy': top3_acc_ft
    }
    
    print(f"FT-Transformer Test Results:")
    print(f"  Accuracy: {test_acc_ft:.4f}")
    print(f"  Macro F1: {test_f1_macro_ft:.4f}")
    print(f"  Weighted F1: {test_f1_weighted_ft:.4f}")
    print(f"  Top-3 Accuracy: {top3_acc_ft:.4f}")

# Create test comparison
if test_results:
    test_comparison_data = {
        'Model': list(test_results.keys()),
        'Accuracy': [test_results[m]['accuracy'] for m in test_results.keys()],
        'Macro F1': [test_results[m]['f1_macro'] for m in test_results.keys()],
        'Weighted F1': [test_results[m]['f1_weighted'] for m in test_results.keys()],
        'Top-3 Accuracy': [test_results[m]['top3_accuracy'] for m in test_results.keys()]
    }
    
    test_comparison = pd.DataFrame(test_comparison_data)
    
    print("\n" + "="*80)
    print("TEST SET COMPARISON")
    print("="*80)
    print(test_comparison.to_string(index=False))
else:
    print("\n⚠️  No models available for test evaluation")


TEST SET EVALUATION

⚠️  No models available for test evaluation


## Model Comparison

Compare validation set performance across models.


In [None]:
# Create comparison
comparison_data = []
if TabNetClassifier is not None and 'tabnet_results' in locals():
    comparison_data.append({
        'Model': 'TabNet',
        'Accuracy': tabnet_results['accuracy'],
        'Macro F1': tabnet_results['f1_macro'],
        'Weighted F1': tabnet_results['f1_weighted'],
        'Top-3 Accuracy': tabnet_results['top3_accuracy'],
        'Training Time (s)': tabnet_results['training_time']
    })

if 'ft_results' in locals():
    comparison_data.append({
        'Model': 'FT-Transformer',
        'Accuracy': ft_results['accuracy'],
        'Macro F1': ft_results['f1_macro'],
        'Weighted F1': ft_results['f1_weighted'],
        'Top-3 Accuracy': ft_results['top3_accuracy'],
        'Training Time (s)': ft_results['training_time']
    })

if comparison_data:
    comparison = pd.DataFrame(comparison_data)
    
    print("="*80)
    print("MODEL COMPARISON (VALIDATION SET)")
    print("="*80)
    print(comparison.to_string(index=False))
    
    # Visualize
    if len(comparison) > 0:
        fig, axes = plt.subplots(2, 2, figsize=(15, 10))
        fig.suptitle('Tabular Deep Learning Models Comparison', fontsize=16, fontweight='bold')
        
        metrics = ['Accuracy', 'Macro F1', 'Weighted F1', 'Top-3 Accuracy']
        colors = ['#3498db', '#e74c3c', '#2ecc71', '#f39c12']
        
        for i, metric in enumerate(metrics):
            row = i // 2
            col = i % 2
            ax = axes[row, col]
            
            bars = ax.bar(comparison['Model'], comparison[metric], 
                         color=colors[:len(comparison)])
            ax.set_title(f'{metric} Comparison', fontsize=12, fontweight='bold')
            ax.set_ylabel(metric)
            ax.set_ylim([0, 1])
            ax.grid(True, alpha=0.3, axis='y')
            
            for j, v in enumerate(comparison[metric]):
                ax.text(j, v, f'{v:.4f}', ha='center', va='bottom', fontweight='bold')
        
        plt.tight_layout()
        
        # Save plot
        results_dir = Path('../results')
        results_dir.mkdir(exist_ok=True)
        plt.savefig(results_dir / 'tabular_dl_comparison.png', dpi=300, bbox_inches='tight')
        plt.show()
        
        print(f"\n✓ Comparison plot saved to {results_dir / 'tabular_dl_comparison.png'}")
else:
    print("⚠️  No models available for comparison")


⚠️  No models available for comparison


## Model Saving

Save trained models for future use.


In [None]:
# Save models
saved_models_dir = Path('../saved_models')
saved_models_dir.mkdir(exist_ok=True)

# Save TabNet
if TabNetClassifier is not None and 'tabnet_model' in locals():
    tabnet_save_path = saved_models_dir / 'tabnet_model'
    tabnet_model.save_model(str(tabnet_save_path))
    print(f"✓ TabNet model saved to {tabnet_save_path}")

# Save FT-Transformer
if 'ft_model' in locals():
    ft_save_path = saved_models_dir / 'ft_transformer_model.pth'
    torch.save({
        'model_state_dict': ft_model.state_dict(),
        'model_config': model_config,
        'scaler': scaler,
        'label_encoder': label_encoder
    }, ft_save_path)
    print(f"✓ FT-Transformer model saved to {ft_save_path}")

# Save scaler and results
results_dict = {
    'scaler': scaler,
    'label_encoder': label_encoder,
    'feature_cols': feature_cols,
    'validation_results': {}
}

if 'tabnet_results' in locals():
    results_dict['validation_results']['TabNet'] = tabnet_results
if 'ft_results' in locals():
    results_dict['validation_results']['FT-Transformer'] = ft_results
if test_results:
    results_dict['test_results'] = test_results

results_path = saved_models_dir / 'tabular_dl_results.pkl'
with open(results_path, 'wb') as f:
    pickle.dump(results_dict, f)
print(f"✓ Results and preprocessing objects saved to {results_path}")


✓ Results and preprocessing objects saved to ../saved_models/tabular_dl_results.pkl
