## Dependecies


In [None]:
import sys
from pathlib import Path
from torchvision import datasets

## Utils


In [None]:
project_root = Path.cwd().resolve().parents[2]
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

data_root = project_root / "data"
data_root.mkdir(parents=True, exist_ok=True)

from notebook_setup import setup_notebook

setup_notebook()

In [None]:
# Device configuration
from utils.utils import DEVICE

print(f"Device used: {DEVICE}")

# Set random seed for reproducibility
from utils.utils import set_seed

set_seed()

# Prepare Dataloaders
from methods.naive.naive_utils import init_dataloaders

# Train loop
from utils.train_test_metrics import train_model

# Plot losses
from utils.train_test_metrics import plot_training_history

# Test function
from utils.train_test_metrics import test_model

# Merics
from utils.train_test_metrics import show_metrics

# Init model
from models.simple_cnn import init_model_cnn

## Parameters (arbitrary chosen)


In [None]:
BATCH_SIZE = 64
LEARNING_RATE = 0.001
EPOCHS = 10

# Simple base training


## Init model


In [None]:
model, model_name, criterion, optimizer, transform = init_model_cnn(
    learning_rate=LEARNING_RATE
)

## Init dataloaders


In [None]:
train_dataset = datasets.MNIST(
    root=data_root, train=True, transform=transform, download=True
)
test_dataset = datasets.MNIST(
    root=data_root, train=False, transform=transform, download=True
)
data_split_path = "mnist_data_splits.json"

train_loader, val_loader, test_loader, classes = init_dataloaders(
    datasets=(train_dataset, test_dataset),
    val_ratio=0.2,
    batch_size=BATCH_SIZE,
    info_file_path=data_split_path,
)

## Call train


In [None]:
import time

start_time = time.perf_counter()
train_model(
    model, model_name, train_loader, val_loader, criterion, optimizer, num_epochs=EPOCHS
)
end_time = time.perf_counter()  # End timer
elapsed_time = end_time - start_time

print(f"Execution time: {elapsed_time:.6f} seconds")

## Plot history losses


In [None]:
history_path = f"{model_name}_history.json"
plot_training_history(history_path)

## Call test


In [None]:
model_path = f"{model_name}_model.pth"
test_model(model, model_name, model_path, test_loader)

## Show metrics


In [None]:
predictions_path = f"{model_name}_predictions.json"
# classes = ['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four', '5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine']
show_metrics(predictions_path, classes, model_name)

## Utils naive


In [None]:
# Select samples to unlearn (10% random)
from utils.utils import select_samples_to_unlearn

# Update data splits
from methods.naive.naive_utils import update_splits_after_unlearning

# Recreate Dataloaders from json files
from methods.naive.naive_utils import recreate_dataloaders

# Naive unlearning


### Init new model


In [None]:
data_splits_file = "mnist_data_splits.json"
unlearn_samples_file = "mnist_samples_to_unlearn_5per.json"
updated_data_splits_path = "updated_mnist_data_splits.json"

In [None]:
model, model_name, criterion, optimizer, transform = init_model_cnn(
    learning_rate=LEARNING_RATE
)
model_name = "naive_unlearning_" + model_name

### Init data


In [None]:
dataset = datasets.MNIST(root=data_root, train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(
    root=data_root, train=False, transform=transform, download=True
)

## Select samples to unlearn


In [None]:
# ONLY ONCE
select_samples_to_unlearn(data_splits_file, unlearn_samples_file, unlearn_ratio=0.2)

## Update data splits -> delete samples


In [None]:
update_splits_after_unlearning(
    data_splits_file, unlearn_samples_file, updated_data_splits_path
)

## Recreate DataLoaders from json files


In [None]:
train_loader, val_loader, test_loader, classes = recreate_dataloaders(
    data_splits_file=updated_data_splits_path,
    datasets=(train_dataset, test_dataset),
    batch_size=BATCH_SIZE,
)

## Re-train


In [None]:
import time

start_time = time.perf_counter()
train_model(
    model, model_name, train_loader, val_loader, criterion, optimizer, num_epochs=EPOCHS
)
end_time = time.perf_counter()  # End timer
elapsed_time = end_time - start_time

print(f"Execution time: {elapsed_time:.6f} seconds")

## Plot history losses


In [None]:
history_path = f"{model_name}_history.json"
plot_training_history(history_path)

## Re-test


In [None]:
model_path = f"{model_name}_model.pth"
test_model(model, model_name, model_path, test_loader)

## Show metrics


In [None]:
predictions_path = f"{model_name}_predictions.json"
# classes = ['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four', '5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine']
show_metrics(predictions_path, classes, model_name)