# Continual Learning on Split CIFAR-10

## Prepare the Dataset

In [1]:
from pathlib import Path

from avalanche.benchmarks.classic import SplitCIFAR10


example_dir_path = Path().resolve()
data_dir_path = Path.joinpath(example_dir_path, "data")

split_cifar10 = SplitCIFAR10(
    n_experiences=5,
    dataset_root=Path.joinpath(data_dir_path, "cifar10"),
    shuffle=False,
    return_task_id=True,
    class_ids_from_zero_in_each_exp=True,
)

Files already downloaded and verified
Files already downloaded and verified


## Define the Lightning Module

In [2]:
import pytorch_lightning as pl
import timm
import torch
import torch.nn as nn

# Must import `hat.networks` to register the models
# noinspection PyUnresolvedReferences
import hat.networks
from hat import HATConfig, HATPayload
from hat.utils import get_hat_reg_term


class ContinualClassifier(pl.LightningModule):
    def __init__(self, num_classes_per_exp, max_mask_scale=100.0):
        super().__init__()
        self.num_classes_per_exp = num_classes_per_exp
        self.max_mask_scale = max_mask_scale
        _hat_config = HATConfig(
            num_tasks=len(num_classes_per_exp),
        )
        self.backbone = timm.create_model(
            "hat_resnet18s",
            num_classes=0,
            hat_config=_hat_config,
        )
        self.heads = nn.ModuleList(
            [nn.Linear(512, __c) for __c in num_classes_per_exp]
        )
        self.criterion = nn.CrossEntropyLoss()

    def forward(self, images, task_id, mask_scale=None):
        pld = HATPayload(images, task_id=task_id, mask_scale=mask_scale)
        return self.heads[pld.task_id](self.backbone(pld).data)

    def training_step(self, batch, batch_idx):
        images, targets, task_id = batch
        # Progress is the percentage of the training completed
        _progress = (batch_idx + 1) / self.trainer.num_training_batches
        _mask_scale = _progress * self.max_mask_scale
        logits = self.forward(images, task_id, _mask_scale)
        loss = self.criterion(logits, targets)
        reg = get_hat_reg_term(
            module=self.backbone,
            reg_strategy="uniform",
            task_id=task_id,
            mask_scale=_mask_scale,
        )
        return loss + reg

    def test_step(self, batch, batch_idx, dataloader_idx):
        images, targets, task_id = batch
        # Class-incremental learning
        # Iterate through all the tasks and compute the logits
        logits = []
        for __task_id in range(len(self.heads)):
            logits.append(self.forward(images, __task_id, self.max_mask_scale))
        # Class-incremental testing
        cil_logits = torch.cat(logits, dim=1)
        cil_targets = targets + sum(self.num_classes_per_exp[:task_id])
        cil_acc = cil_logits.argmax(dim=1) == cil_targets
        # Task-incremental testing
        til_logits = logits[task_id]
        til_acc = til_logits.argmax(dim=1) == targets
        self.log_dict(
            {
                "cil_acc": cil_acc.float().mean(),
                "til_acc": til_acc.float().mean(),
            },
            batch_size=images.shape[0],
        )

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-3)

## Train the Model for Each Task

In [3]:
from torch.utils.data import DataLoader


clf = ContinualClassifier(split_cifar10.n_classes_per_exp)
device = "cuda"
strategy = "ddp_notebook_find_unused_parameters_true"


def collate_fn(batch):
    images, targets, task_ids = zip(*batch)
    return (torch.stack(images), torch.tensor(targets), int(task_ids[0]))


for __task_id, __trn_exp in enumerate(split_cifar10.train_stream):
    print(f"Training on task/experience {__task_id}")
    trainer = pl.Trainer(
        max_epochs=10,
        accelerator=device,
        strategy=strategy,
    )
    dataloader = DataLoader(
        __trn_exp.dataset,
        batch_size=128,
        shuffle=True,
        num_workers=8,
        pin_memory=True if device == "cuda" else False,
        collate_fn=collate_fn,
    )
    trainer.fit(clf, dataloader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Training on task/experience 0


Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/4
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/4
Initializing distributed: GLOBAL_RANK: 2, MEMBER: 3/4
Initializing distributed: GLOBAL_RANK: 3, MEMBER: 4/4
----------------------------------------------------------------------------------------------------
distributed_backend=nccl
All distributed processes registered. Starting with 4 processes
----------------------------------------------------------------------------------------------------

LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]

  | Name      | Type             | Params
-----------------------------------------------
0 | backbone  | HATResNet        | 11.2 M
1 | heads     | ModuleList       | 5.1 K 
2 | criterion | CrossEntropyLoss | 0     
-----------------------------------------------
11.2 M    Trainable params
100     

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Training on task/experience 1


Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/4
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/4
Initializing distributed: GLOBAL_RANK: 2, MEMBER: 3/4
Initializing distributed: GLOBAL_RANK: 3, MEMBER: 4/4
----------------------------------------------------------------------------------------------------
distributed_backend=nccl
All distributed processes registered. Starting with 4 processes
----------------------------------------------------------------------------------------------------

LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3]

  | Name      | Type             | Params
-----------------------------------------------
0 | backbone  | HATResNet        | 11.2 M
1 | heads     | ModuleList       | 5.1 K 
2 | criterion | CrossEntropyLoss | 0     
-----------------------------------------------
11.2 M    Trainable params
100     

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Training on task/experience 2


Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/4
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/4
Initializing distributed: GLOBAL_RANK: 2, MEMBER: 3/4
Initializing distributed: GLOBAL_RANK: 3, MEMBER: 4/4
----------------------------------------------------------------------------------------------------
distributed_backend=nccl
All distributed processes registered. Starting with 4 processes
----------------------------------------------------------------------------------------------------

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3]

  | Name      | Type             | Params
-----------------------------------------------
0 | backbone  | HATResNet        | 11.2 M
1 | heads     | ModuleList       | 5.1 K 
2 | criterion | CrossEntropyLoss | 0     
-----------------------------------------------
11.2 M    Trainable params
100     

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Training on task/experience 3


Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/4
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/4
Initializing distributed: GLOBAL_RANK: 2, MEMBER: 3/4
Initializing distributed: GLOBAL_RANK: 3, MEMBER: 4/4
----------------------------------------------------------------------------------------------------
distributed_backend=nccl
All distributed processes registered. Starting with 4 processes
----------------------------------------------------------------------------------------------------

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3]

  | Name      | Type             | Params
-----------------------------------------------
0 | backbone  | HATResNet        | 11.2 M
1 | heads     | ModuleList       | 5.1 K 
2 | criterion | CrossEntropyLoss | 0     
-----------------------------------------------
11.2 M    Trainable params
100     

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Training on task/experience 4


Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/4
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/4
Initializing distributed: GLOBAL_RANK: 2, MEMBER: 3/4
Initializing distributed: GLOBAL_RANK: 3, MEMBER: 4/4
----------------------------------------------------------------------------------------------------
distributed_backend=nccl
All distributed processes registered. Starting with 4 processes
----------------------------------------------------------------------------------------------------

LOCAL_RANK: 1 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
LOCAL_RANK: 3 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3]

  | Name      | Type             | Params
-----------------------------------------------
0 | backbone  | HATResNet        | 11.2 M
1 | heads     | ModuleList       | 5.1 K 
2 | criterion | CrossEntropyLoss | 0     
-----------------------------------------------
11.2 M    Trainable params
100     

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.


## Test the Model

In [4]:
clf.freeze()
for __m in clf.modules():
    if isinstance(__m, nn.BatchNorm2d):
        __m.track_running_stats = False

trainer = pl.Trainer(
    accelerator=device,
    devices=1,
)
tst_dataloaders = [
    DataLoader(
        __exp.dataset,
        batch_size=128,
        num_workers=8,
        pin_memory=True if device == "cuda" else False,
        collate_fn=collate_fn,
    )
    for __exp in split_cifar10.test_stream
]
tst_results = trainer.test(clf, tst_dataloaders)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]