In [None]:
#|default_exp callback.progresize

# Progressive Resizing
> A callback to progressively resize images during training

`ProgressiveResize` is inspired by MosaicML's [Progressive Resizing algorithm for Composer](https://docs.mosaicml.com/en/stable/method_cards/progressive_resizing.html) which in turn was inspired by [fastai](https://github.com/fastai/fastbook/blob/780b76bef3127ce5b64f8230fce60e915a7e0735/07_sizing_and_tta.ipynb). 

In [None]:
#|export
from __future__ import annotations

from pathlib import Path
from tempfile import TemporaryDirectory

from fastcore.basics import detuplify
from fastcore.transform import Pipeline, Transform

from fastai.callback.core import Callback
from fastai.callback.fp16 import MixedPrecision
from fastai.learner import _cast_tensor
from fastai.vision.augment import AffineCoordTfm, RandomResizedCropGPU

from fastxtend.callback.cutmixup import CutMixUpAugment
from fastxtend.imports import *

In [None]:
#|hide
from nbdev.showdoc import *

In [None]:
#|exporti
_resize_augs = (AffineCoordTfm, RandomResizedCropGPU)

In [None]:
#|exporti
def _to_size(t:Tensor):
    "Convert Tensor to size compatible values"
    if sum(t.shape)==2: return tuple(t.tolist())
    else:               return tuple(t.item(),t.item())

In [None]:
#|exporti
def _num_steps(input_size, current_size, min_increase):
    "Convert Tensor to size compatible values"
    steps = (input_size - current_size) / min_increase
    if sum(steps.shape)==2: 
        steps = steps[0].item()
    return steps

In [None]:
#|exporti
def _evenly_divisible(input_size, current_size, min_increase, steps):
    min_increase = tensor(min_increase)
    return (((input_size-current_size) % min_increase).sum() == 0) and (((input_size-current_size) - (min_increase*steps)).sum() == 0)

In [None]:
#|export
class ProgSizeMode(Enum):
    "Delete batch after resize to assist with PyTorch memory management"
    Auto = 'auto'
    Strict = 'strict'

In [None]:
#|export
class ProgressiveResize(Callback):
    order = MixedPrecision.order+1 # Needs to run after MixedPrecision
    "Progressively increase the size of input images during training. Final image size is the valid image size or `input_size`."
    def __init__(self,
        initial_size:float|tuple[int,int]=0.5, # Staring size to increase from. Image shape must be square
        start:Number=0.5, # Earliest upsizing epoch in percent of training time or epoch (index 0)
        finish:Number=0.75, # Last upsizing epoch in percent of training time or epoch (index 0)
        min_increase:int=4, # Minimum increase per upsizing epoch
        size_mode:ProgSizeMode=ProgSizeMode.Auto, # Automatically determine the resizing schedule or manually set `start` and `finish`
        resize_mode:str='bilinear', # PyTorch interpolate mode string for progressive resizing
        add_resize:bool=False, # Add a seperate resize step. Use for non-fastai DataLoaders or DataLoaders without batch transforms
        resize_valid:bool=True, # Apply progressive resizing to valid dataset
        input_size:tuple[int,int]|None=None, # Final image size. Set if using a non-fastai DataLoaders.
        empty_cache:bool=False, # Call `torch.cuda.empty_cache()` before a resizing epoch. May prevent cuda & magma errors. Don't use with multiple GPUs.
        verbose:str=True, # Print a summary of the progressive resizing schedule
        logger_callback:str='wandb', # Log report and samples/second to `logger_callback` using `Callback.name` if avalible
    ):
        store_attr()
        self.run_valid = resize_valid

    def before_fit(self):
        "Sets up Progressive Resizing"
        if hasattr(self.learn, 'lr_finder') and not hasattr(self, "gather_preds"):
            self.run = False
            return

        self._resize, self.remove_resize, self.null_resize, self.remove_cutmix = [], True, True, False
        self._log_after_resize = getattr(self, f'_{self.logger_callback}_log_after_resize', noop)
        self.has_logger = hasattr(self.learn, self.logger_callback) and self._log_after_resize != noop
        self.min_increase = tensor(self.min_increase)

        # Dry run at full resolution to pre-allocate memory
        # See https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html#pre-allocate-memory-in-case-of-variable-input-length
        try:
            states = get_random_states()
            path = self.path/self.model_dir
            path.mkdir(parents=True, exist_ok=True)
            tmp_d = TemporaryDirectory(dir=path)
            tmp_p = Path(tmp_d.name).stem
            self.learn.save(f'{tmp_p}/_tmp')

            b = self.dls.valid.one_batch()
            i = getattr(self.dls, 'n_inp', 1 if len(b)==1 else len(b)-1)
            self.learn.xb, self.learn.yb = b[:i],b[i:]

            if hasattr(self.learn, 'mixed_precision'):
                self.learn.mixed_precision.autocast.__enter__()

            self.learn.pred = self.learn.model(*_cast_tensor(self.learn.xb))
            self.learn.loss = self.learn.loss_func(self.learn.pred, *_cast_tensor(self.learn.yb))

            if hasattr(self.learn, 'mixed_precision'):
                self.learn.mixed_precision.autocast.__exit__(None, None, None)

            self.learn.loss.backward()
            self.learn.opt.zero_grad()

        finally:
            self.learn.load(f'{tmp_p}/_tmp', with_opt=True)
            tmp_d.cleanup()
            set_random_states(**states)

        # Try to automatically determine the input size
        try:
            for n in range(i):
                x = detuplify(self.learn.xb[n])
                if isinstance(x, TensorImageBase):
                    self.input_size = x.shape[-2:]
        finally:
            if self.input_size is None: 
                raise ValueError(f'Could not determine input size. Set `input_size`: {self.input_size}')
            self.input_size = tensor(self.input_size)
            if self.input_size[0] != self.input_size[1]:
                raise ValueError(f'`ProgressiveResize` does not support non-square images: `input_size` = {self.input_size.tolist()}')
            if self.input_size[0] % 2 != 0:
                 raise ValueError(f"Input shape must be even: {self.input_size}")
            assert self.min_increase.item() % 2 == 0, f"Minimum increase must be even: {self.min_increase}"

        # Set the initial size
        if isinstance(self.initial_size, float): 
            self.current_size = (tensor(self.initial_size) * self.input_size).int()
        elif isinstance(self.initial_size, tuple): 
            self.current_size = tensor(self.initial_size)

        start_epoch  = int(self.n_epoch*self.start)  if self.start < 1  else self.start
        finish_epoch = int(self.n_epoch*self.finish) if self.finish < 1 else self.finish
        max_steps = finish_epoch - start_epoch 

        # Automatically determine the number of steps, increasing `min_increase` as needed
        if self.size_mode == ProgSizeMode.Auto:
            count = 10000 # prevent infinite loop
            steps = _num_steps(self.input_size, self.current_size, self.min_increase)
            while ((steps > max_steps) or not _evenly_divisible(self.input_size, self.current_size, self.min_increase, steps)) and count > 0:
                self.min_increase += 2
                steps = _num_steps(self.input_size, self.current_size, self.min_increase)
                count -= 1
        n_steps = _num_steps(self.input_size, self.current_size, self.min_increase)

        # Double check that the number of resize steps works
        if (n_steps > max_steps) or ((max_steps % n_steps != 0) and self.size_mode != ProgSizeMode.Auto):
            raise ValueError(f'invalid number of steps {n_steps}')

        # Double check that the step size works
        if not _evenly_divisible(self.input_size, self.current_size, self.min_increase, n_steps):
            raise ValueError(f'Resize amount {self.input_size-self.current_size} not evenly divisible by `min_increase` {self.min_increase}')

        # Set when progressive resizing steps are applied
        step_size = int(max_steps / n_steps)
        start_epoch = finish_epoch - ((self.input_size-self.current_size) / self.min_increase)*step_size
        if isinstance(start_epoch, torch.Tensor):
            if sum(start_epoch.shape)==2: start_epoch = int(start_epoch[0].item())
            else:                         start_epoch = int(start_epoch.item())
        self.step_epochs = [i for i in range(start_epoch+step_size, finish_epoch+step_size, step_size)]

        if self.verbose:
            msg = f'Progressively increase the initial image size of {self.current_size.tolist()} by {self.min_increase} '\
                  f'pixels every {step_size} epoch{"s" if step_size > 1 else ""} for {len(self.step_epochs)} resizes.\nStarting '\
                  f'at epoch {start_epoch+step_size} and finishing at epoch {finish_epoch} for a final training size of '\
                  f'{(self.current_size+(len(self.step_epochs))*self.min_increase).tolist()}.'
            print(msg)

        # If `add_resize`, add a seperate resize
        if self.add_resize:
            self._resize_pipe = Pipeline(AffineCoordTfm(size=_to_size(self.current_size), mode=self.resize_mode))
            self._resize.append(self._resize_pipe[0])
            self.remove_resize = True
        else:
            if hasattr(self.learn, 'cutmixupaugment'):
                # Modify the `CutMixUpAugment` augmentation pipeline 
                self._process_pipeline(self.learn.cutmixupaugment._orig_pipe, False)

                # If `CutMixUpAugment` has an Affine Transform for Augmentations then use it
                if len(self._resize) > 0:
                    # Check for pre-mixup augment pipeline and modify it
                    if self.learn.cutmixupaugment._docutmixaug:
                        self._process_pipeline(self.learn.cutmixupaugment._cutmixaugs_pipe, False)
                    else:
                        # There isn't one, then add it a pre-mixup augment pipeline for resizing
                        self.learn.cutmixupaugment._cutmixaugs_pipe = Pipeline(AffineCoordTfm(size=_to_size(self.current_size)))
                        self.learn.cutmixupaugment._docutmixaug = True
                        self._resize.append(self.learn.cutmixupaugment._cutmixaugs_pipe[0])
                        self.remove_cutmix, self.remove_resize = True, True

            else:
                # If no `CutMixUpAugment` check the train dataloader pipeline for Affine Transforms
                self._process_pipeline(self.dls.train.after_batch.fs, False)

            # If `resize_valid` check the valid dataloader pipeline for Affine Transforms
            if self.resize_valid:
                self._process_pipeline(self.dls.valid.after_batch.fs, False)

            # If no there are no detected resizes add a resize transform pipeline
            if len(self._resize) == 0:
                self.add_resize = True
                self._resize_pipe = Pipeline(AffineCoordTfm(size=_to_size(self.current_size)))
                self._resize.append(self._resize_pipe[0])
                self.remove_resize = True

        # Set created or detected resize to the first size and store original interpolation
        self._orig_modes = []
        for resize in self._resize:
            resize.size = _to_size(self.current_size)
            self._orig_modes.append(resize.mode)
            resize.mode = self.resize_mode

    def before_batch(self):
        "Applies optional additional resize"
        if self.add_resize:
            self.learn.xb = self._resize_pipe(self.xb)
            # self.learn.yb = self._resize_pipe(self.yb) TODO this wasn't working
        
    def before_train(self):
        "Increases the image size before the training epoch"
        if len(self.step_epochs)> 0 and self.epoch >= self.step_epochs[0]:
            _ = self.step_epochs.pop(0)
            self.current_size += self.min_increase
        
            for i, resize in enumerate(self._resize):
                if (self.current_size < self.input_size).all():
                    resize.size = _to_size(self.current_size)
                else:
                    # Reset everything after progressive resizing is done
                    if self.null_resize: 
                        resize.size = None
                    elif self.remove_resize:
                        if self.remove_cutmix:
                            self.learn.cutmixupaugment._cutmixaugs_pipe = Pipeline([])
                            self.learn.cutmixupaugment._docutmixaug = False
                        else:
                            self._resize_pipe = Pipeline([])
                            self.add_resize = False
                    else:
                        resize.size = _to_size(self.current_size)
                        resize.mode = self._orig_modes[i]
        if self.has_logger: self._log_after_resize()

    def after_epoch(self):
        'Calls `torch.cuda.empty_cache()` if `empty_cache=True` before a resizing epoch. May slightly increase single GPU training speed. Do not use with multiple GPUs.'
        if self.empty_cache and len(self.step_epochs) > 0 and self.epoch+1 >= self.step_epochs[0]:
            del self.learn.xb
            del self.learn.yb
            del self.learn.pred
            torch.cuda.empty_cache()
        if self.has_logger: self._log_after_resize(step=0)

    def _process_pipeline(self, pipe, remove_resize=False, null_resize=None):
        "Helper method for processing augmentation pipelines"
        for p in pipe:
            if isinstance(p, _resize_augs):
                self._resize.append(p)
                if null_resize is None:
                    self.null_resize = self.null_resize and p.size is None
                else:
                    self.null_resize = null_resize
        self.remove_resize = remove_resize 

Progressive resizing initially trains on downsampled images then gradually increases the image size over multiple epochs to the full size for the rest of training. This can significantly reduce training time at the expense of lower model performance. The model must be cababily of variable image sizes.

> Important: <code>ProgressiveResize</code> should increase GPU throughput which may cause other parts of the training pipeline may become a bottlneck. An easy way to increase fastai dataloader throughput is by [replacing pillow with pillow-simd](https://docs.fast.ai/dev/performance.html#pillow-simd).

When testing Composer's [Progressive Resizing](https://docs.mosaicml.com/en/stable/method_cards/progressive_resizing.html) callback MosiacML [found]( https://docs.mosaicml.com/en/stable/method_cards/progressive_resizing.html#technical-details):

> In our experiments, Progressive Resizing improves the attainable tradeoffs between training speed and the final quality of the trained model. In some cases, it leads to slightly lower quality than the original model for the same number of training steps. However, Progressive Resizing increases training speed so much (via improved throughput during the early part of training) that it is possible to train for more steps, recover accuracy, and still complete training in less time.

`ProgressiveResize` modifies the fastai batch augmentations pipeline to change the batch resize size. If there isn't a batch augmentation, `ProgressiveResize` temporarily adds one. This occurs at the beginning of an epoch, as resizing in the middle of an epoch exhibited stability issues. It is also compatible with `CutMixUpAugment`.

> Note: If training with `ProgressiveResize` results in CUDA or Magma errors, try setting `empty_cache=True` which will call `torch.cuda.empty_cache()` before a resizing epoch. This may interfere with training multiple models on multi-GPU systems.

Progressive resizing appears to work best when the resize steps are small (16 or less pixels) and spread out over multiple epochs.

## Tests -

In [None]:
from fastxtend.test_utils import *

In [None]:
#|hide
# modified from https://github.com/thomasbrandon/mish-cuda/blob/master/test/perftest.py

import numpy as np
def scale_time(val:float, spec:str="#0.4G"):
    "Scale fractional second `time` values and return formatted to `spec`"
    if val == 0: return '-'
    PREFIXES = np.array([c for c in u"yzafpnµm kMGTPEZY"])
    exp = np.int8(np.log10(np.abs(val)) // 3 * 3 * np.sign(val))
    val /= 10.**exp
    prefix = PREFIXES[exp//3 + len(PREFIXES)//2]
    return f"{val:{spec}}{prefix}s"

class SyncthProgResizeTest(Callback):
    order = ProgressiveResize.order+1
    def __init__(self, input_size, start_size, increase, step_size, first_epoch, last_epoch, total_resizes):
        store_attr()

    def before_fit(self):
        prog = self.learn.progressive_resize
        if isinstance(self.start_size, tuple):
            assert torch.equal(prog.current_size, tensor(self.start_size).int())
        else:
            assert torch.equal(prog.current_size, tensor([self.start_size,self.start_size]).int())
        assert prog.min_increase==self.increase
        assert prog.step_epochs[1]-prog.step_epochs[0]==self.step_size
        assert prog.step_epochs[0]==self.first_epoch
        assert prog.step_epochs[-1]==self.last_epoch
        assert len(prog.step_epochs)==self.total_resizes
        size = self.start_size
        for i in prog.step_epochs:
            size += self.increase
        assert size==self.input_size

In [None]:
#|hide
test = SyncthProgResizeTest(input_size=256, start_size=128, increase=32, step_size=1, first_epoch=12, last_epoch=15, total_resizes=4)
learn = synth_learner(cbs=[ProgressiveResize(input_size=[256,256]), test])
learn('after_create')
learn.create_opt()
learn.n_epoch=20
learn('before_fit')

Progressively increase the initial image size of [128, 128] by 32 pixels every 1 epoch for 4 resizes.
Starting at epoch 12 and finishing at epoch 15 for a final training size of [256, 256].


In [None]:
#|hide
test = SyncthProgResizeTest(input_size=256, start_size=128, increase=8, step_size=1, first_epoch=34, last_epoch=49, total_resizes=16)
learn = synth_learner(cbs=[ProgressiveResize(input_size=[256,256]), test])
learn('after_create')
learn.create_opt()
learn.n_epoch=66
learn('before_fit')

Progressively increase the initial image size of [128, 128] by 8 pixels every 1 epoch for 16 resizes.
Starting at epoch 34 and finishing at epoch 49 for a final training size of [256, 256].


In [None]:
#|hide
test = SyncthProgResizeTest(input_size=384, start_size=192, increase=6, step_size=2, first_epoch=163, last_epoch=225, total_resizes=32)
learn = synth_learner(cbs=[ProgressiveResize(input_size=[384,384], min_increase=6), test])
learn('after_create')
learn.create_opt()
learn.n_epoch=300
learn('before_fit')

Progressively increase the initial image size of [192, 192] by 6 pixels every 2 epochs for 32 resizes.
Starting at epoch 163 and finishing at epoch 225 for a final training size of [384, 384].


In [None]:
#|hide
test = SyncthProgResizeTest(input_size=384, start_size=192, increase=8, step_size=3, first_epoch=156, last_epoch=225, total_resizes=24)
learn = synth_learner(cbs=[ProgressiveResize(input_size=[384,384], min_increase=8), test])
learn('after_create')
learn.create_opt()
learn.n_epoch=300
learn('before_fit')

Progressively increase the initial image size of [192, 192] by 8 pixels every 3 epochs for 24 resizes.
Starting at epoch 156 and finishing at epoch 225 for a final training size of [384, 384].


## Example

In this example, I train a ResNet50 on a SageMaker Studio Lab Tesla T4 instance for 20 epochs on Imagenette at an image size of 224 pixels. Even in this 20 epoch example, `ProgressiveResize` yields training time savings compared to a full sized run. ~14 minutes to ~10.5 minutes with an accuracy dropoff from ~86.2% to ~85.6%.

In [None]:
#|hide
#|slow
import time

from fastcore.basics import num_cpus

from fastai.data.external import URLs, untar_data
from fastai.data.block import DataBlock, CategoryBlock
from fastai.data.transforms import GrandparentSplitter, get_image_files, parent_label, Normalize
from fastai.learner import Learner
from fastai.vision.augment import Resize, aug_transforms
from fastai.vision.core import imagenet_stats
from fastai.vision.data import ImageBlock
from fastai.vision.models import resnet50
from fastxtend.callback.channelslast import *
from fastxtend.metrics import *
from fastxtend.utils import *

In [None]:
#|hide
free_gpu_memory(learn)

In [None]:
#|hide
#|slow
class ProgressiveResizeTest(Callback):
    run_valid, order = True, ProgressiveResize.order+1
    
    def before_fit(self):
        self.progsize = self.learn.progressive_resize.current_size

    def before_batch(self):
        assert L(self.x.shape[-2:]) == L(self.progsize.tolist())
            
    def after_batch(self):
        self.progsize = self.learn.progressive_resize.current_size

In [None]:
#|slow
#|cuda
imagenette = untar_data(URLs.IMAGENETTE_320)

with less_random():
    dblock = DataBlock(blocks=(ImageBlock, CategoryBlock),
                        splitter=GrandparentSplitter(valid_name='val'),
                        get_items=get_image_files, get_y=parent_label,
                        item_tfms=Resize(224),
                        batch_tfms=[*aug_transforms(), Normalize.from_stats(*imagenet_stats)])
    dls =  dblock.dataloaders(imagenette, bs=128, num_workers=num_cpus(), pin_memory=True)

    cbs = [ProgressiveResize(start=0.2, finish=0.8), ProgressiveResizeTest]
    learn = Learner(dls, resnet50(num_classes=dls.c), metrics=Accuracy(), cbs=cbs).to_channelslast()

    start = time.perf_counter()
    learn.fit_one_cycle(20, 3e-3)
    total = time.perf_counter() - start
    print(f'Total training time: {scale_time(total)}')

Progressively increase the initial image size of [112, 112] by 14 pixels every 1 epoch for 8 resizes.
Starting at epoch 9 and finishing at epoch 16 for a final training size of [224, 224].


epoch,train_loss,valid_loss,accuracy,time
0,2.122014,2.182029,0.227516,00:25
1,1.905108,2.192691,0.363057,00:25
2,1.690208,1.83963,0.487389,00:25
3,1.43391,1.606834,0.480764,00:25
4,1.286285,3.116283,0.494522,00:25
5,1.200042,1.25451,0.626242,00:25
6,1.105527,1.396334,0.572484,00:25
7,1.106385,1.339236,0.564586,00:25
8,0.982895,0.885877,0.723822,00:25
9,0.953809,1.116866,0.637197,00:26


Total training time: 629.7 s


In [None]:
#|hide
#|slow
#|cuda
free_gpu_memory(learn, dls)

In [None]:
#|slow
#|cuda
imagenette = untar_data(URLs.IMAGENETTE_320)

with less_random():
    dblock = DataBlock(blocks=(ImageBlock, CategoryBlock),
                        splitter=GrandparentSplitter(valid_name='val'),
                        get_items=get_image_files, get_y=parent_label,
                        item_tfms=Resize(224),
                        batch_tfms=[*aug_transforms(),Normalize.from_stats(*imagenet_stats)])
    dls =  dblock.dataloaders(imagenette, bs=128, num_workers=num_cpus(), pin_memory=True)

    learn = Learner(dls, resnet50(num_classes=dls.c), metrics=Accuracy()).to_channelslast()

    start = time.perf_counter()
    learn.fit_one_cycle(20, 3e-3)
    total = time.perf_counter() - start
    print(f'Total training time: {scale_time(total)}')

epoch,train_loss,valid_loss,accuracy,time
0,2.016391,2.256088,0.241274,00:43
1,1.769153,3.686334,0.311083,00:41
2,1.529073,1.638564,0.471847,00:41
3,1.348534,1.439297,0.55465,00:41
4,1.181351,1.534368,0.530446,00:42
5,1.127666,1.91433,0.532994,00:41
6,1.025626,3.243782,0.461911,00:41
7,0.951557,1.24778,0.625987,00:42
8,0.887196,0.973012,0.694777,00:42
9,0.855779,1.0638,0.663694,00:41


Total training time: 840.6 s


## Progressive Resizing Wandb Logging

In [None]:
#|exports
try:
    import wandb

    @patch
    def _wandb_log_after_resize(self:ProgressiveResize):
        size = _to_size(self.current_size, step=1)
        wandb.log({'progressive_resize_size': size[0]}, self.learn.wandb._wandb_step+step)
except:
    pass

## Extend to other Loggers

To extend to new loggers, follow the Weights & Biases code above and create patches for `ProgressiveResize` to add a `_{Callback.name}_log_after_resize`, where `Callback.name` is the [name of the logger callback](https://docs.fast.ai/callback.core.html#Callback.name).

Then to use, pass `logger_callback='{Callback.name}'` to `Learner.profile()`. 

`ProgressiveResize` sets its `_log_after_resize` method to `f'_{self.logger_callback}_log_after_resize'`, which should match the patched method.

```python
self._log_after_resize = getattr(self, f'_{self.logger_callback}_log_after_resize', noop)
```