# This notebook is used for reproduce the ablation studies in the paper

In [1]:
import numpy as np
from tqdm import trange
import torch
import random

from evaluation_utils import prepare_dataloader, train_loop, get_test_results, calc_metrics
from CAMELOT import CamelotModel
from variants_paper import CAMELOT_FF

metrics = ['AUC', 'F1 score', 'Recall', 'NMI']
seeds = [1001, 1012, 1134, 2475, 6138, 7415, 1663, 7205, 9253, 1782]

## 1. The Original CAMELOT

In [None]:
results = np.zeros((len(seeds), 4))
for index, SEED in enumerate(seeds):
    torch.random.manual_seed(SEED)
    np.random.seed(SEED)
    random.seed(SEED)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    train_dataset, val_dataset, test_dataset, train_loader, val_loader, test_loader = prepare_dataloader(SEED)

    
    model = CamelotModel(input_shape=(train_dataset.x.shape[1], train_dataset.x.shape[2]), seed=SEED, num_clusters=10, latent_dim=64)
    model = train_loop(model, train_dataset, val_dataset, train_loader, val_loader, SEED=SEED)

    real, preds = get_test_results(model, test_loader)

    auc, f1, rec, nmi = calc_metrics(real, preds)

    print(f'AUCROC: \t{auc.mean():.5f}, \t{auc}')
    print(f'F1-score: \t{f1.mean():.5f}, \t{f1}')
    print(f'Recall: \t{rec.mean():.5f}, \t{rec}')
    print(f'NMI: \t\t{nmi:.5f}')
    
    results[index, 0] = auc.mean()
    results[index, 1] = f1.mean()
    results[index, 2] = rec.mean()
    results[index, 3] = nmi

In [14]:
for m, u, std in zip(metrics, results.mean(axis=0), results.std(axis=0)):
    print(f'{m}: {u:.3f} ({std:.3f})')

AUC: 0.771 (0.023)
F1 score: 0.318 (0.021)
Recall: 0.353 (0.006)
NMI: 0.109 (0.010)


## 2. Without Distance Loss

In [None]:
results = np.zeros((len(seeds), 4))
for index, SEED in enumerate(seeds):
    torch.random.manual_seed(SEED)
    np.random.seed(SEED)
    random.seed(SEED)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    train_dataset, val_dataset, test_dataset, train_loader, val_loader, test_loader = prepare_dataloader(SEED)

    
    model = CamelotModel(input_shape=(train_dataset.x.shape[1], train_dataset.x.shape[2]), seed=SEED, num_clusters=10, latent_dim=64, alpha=0)
    model = train_loop(model, train_dataset, val_dataset, train_loader, val_loader, SEED=SEED)

    real, preds = get_test_results(model, test_loader)

    auc, f1, rec, nmi = calc_metrics(real, preds)

    print(f'AUCROC: \t{auc.mean():.5f}, \t{auc}')
    print(f'F1-score: \t{f1.mean():.5f}, \t{f1}')
    print(f'Recall: \t{rec.mean():.5f}, \t{rec}')
    print(f'NMI: \t\t{nmi:.5f}')
    
    results[index, 0] = auc.mean()
    results[index, 1] = f1.mean()
    results[index, 2] = rec.mean()
    results[index, 3] = nmi

In [16]:
for m, u, std in zip(metrics, results.mean(axis=0), results.std(axis=0)):
    print(f'{m}: {u:.3f} ({std:.3f})')

AUC: 0.765 (0.017)
F1 score: 0.317 (0.027)
Recall: 0.347 (0.009)
NMI: 0.104 (0.017)


## 3. Without Cluster Loss

In [None]:
results = np.zeros((len(seeds), 4))
for index, SEED in enumerate(seeds):
    torch.random.manual_seed(SEED)
    np.random.seed(SEED)
    random.seed(SEED)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    train_dataset, val_dataset, test_dataset, train_loader, val_loader, test_loader = prepare_dataloader(SEED)

    
    model = CamelotModel(input_shape=(train_dataset.x.shape[1], train_dataset.x.shape[2]), seed=SEED, num_clusters=10, latent_dim=64, beta=0)
    model = train_loop(model, train_dataset, val_dataset, train_loader, val_loader, SEED=SEED)

    real, preds = get_test_results(model, test_loader)

    auc, f1, rec, nmi = calc_metrics(real, preds)

    print(f'AUCROC: \t{auc.mean():.5f}, \t{auc}')
    print(f'F1-score: \t{f1.mean():.5f}, \t{f1}')
    print(f'Recall: \t{rec.mean():.5f}, \t{rec}')
    print(f'NMI: \t\t{nmi:.5f}')
    
    results[index, 0] = auc.mean()
    results[index, 1] = f1.mean()
    results[index, 2] = rec.mean()
    results[index, 3] = nmi

In [18]:
for m, u, std in zip(metrics, results.mean(axis=0), results.std(axis=0)):
    print(f'{m}: {u:.3f} ({std:.3f})')

AUC: 0.768 (0.013)
F1 score: 0.323 (0.014)
Recall: 0.355 (0.010)
NMI: 0.107 (0.012)


## 4. Without Distance and Cluster Loss

In [None]:
results = np.zeros((len(seeds), 4))
for index, SEED in enumerate(seeds):
    torch.random.manual_seed(SEED)
    np.random.seed(SEED)
    random.seed(SEED)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    train_dataset, val_dataset, test_dataset, train_loader, val_loader, test_loader = prepare_dataloader(SEED)

    
    model = CamelotModel(input_shape=(train_dataset.x.shape[1], train_dataset.x.shape[2]), seed=SEED, num_clusters=10, latent_dim=64, alpha=0, beta=0)
    model = train_loop(model, train_dataset, val_dataset, train_loader, val_loader, SEED=SEED)

    real, preds = get_test_results(model, test_loader)

    auc, f1, rec, nmi = calc_metrics(real, preds)

    print(f'AUCROC: \t{auc.mean():.5f}, \t{auc}')
    print(f'F1-score: \t{f1.mean():.5f}, \t{f1}')
    print(f'Recall: \t{rec.mean():.5f}, \t{rec}')
    print(f'NMI: \t\t{nmi:.5f}')
    
    results[index, 0] = auc.mean()
    results[index, 1] = f1.mean()
    results[index, 2] = rec.mean()
    results[index, 3] = nmi

In [None]:
for m, u, std in zip(metrics, results.mean(axis=0), results.std(axis=0)):
    print(f'{m}: {u:.3f} ({std:.3f})')

## 5. Without Attention Layer

In [None]:
results = np.zeros((len(seeds), 4))
for index, SEED in enumerate(seeds):
    torch.random.manual_seed(SEED)
    np.random.seed(SEED)
    random.seed(SEED)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    train_dataset, val_dataset, test_dataset, train_loader, val_loader, test_loader = prepare_dataloader(SEED)

    
    model = CAMELOT_FF(input_shape=(train_dataset.x.shape[1], train_dataset.x.shape[2]), seed=SEED, num_clusters=10, latent_dim=64, alpha=0, beta=0)
    model = train_loop(model, train_dataset, val_dataset, train_loader, val_loader, SEED=SEED)

    real, preds = get_test_results(model, test_loader)

    auc, f1, rec, nmi = calc_metrics(real, preds)

    print(f'AUCROC: \t{auc.mean():.5f}, \t{auc}')
    print(f'F1-score: \t{f1.mean():.5f}, \t{f1}')
    print(f'Recall: \t{rec.mean():.5f}, \t{rec}')
    print(f'NMI: \t\t{nmi:.5f}')
    
    results[index, 0] = auc.mean()
    results[index, 1] = f1.mean()
    results[index, 2] = rec.mean()
    results[index, 3] = nmi

In [None]:
for m, u, std in zip(metrics, results.mean(axis=0), results.std(axis=0)):
    print(f'{m}: {u:.3f} ({std:.3f})')