In [1]:
from __future__ import annotations
from fastai.vision.all import *

# DataBlock

fastai provides the `DataBlock` api, which is a high (or mid) level api to easily create DataSets and DataLoaders out of composable pieces.

Here is an example of creating a train and valid DataLoader from the DataBlock api on Imagenette with both CPU and GPU augmentations.

In [None]:
imagenette_stats = ([0.465,0.458,0.429],[0.285,0.28,0.301])

def get_dls(size, bs, augs=None, workers=None):
    path = URLs.IMAGENETTE_320
    source = untar_data(path)

    if workers is None: 
        workers = min(8, num_cpus())

    batch_tfms = [Normalize.from_stats(*imagenette_stats)]
    if augs: 
        batch_tfms += augs

    dblock = DataBlock(blocks=(ImageBlock, CategoryBlock),
                       splitter=GrandparentSplitter(valid_name='val'),
                       get_items=get_image_files, 
                       get_y=parent_label,
                       item_tfms=[RandomResizedCrop(size, min_scale=0.35), FlipItem(0.5)],
                       batch_tfms=batch_tfms)

    return dblock.dataloaders(source, path=source, bs=bs, num_workers=workers)

The DataBlock api has many items which should be familiar from the PyTorch DataLoaders example in the lesson:

- bs: the batch size
- num_workers: create a multiprocessing DataLoader

It also has items which are unfamiliar. Which we will go over.

## Blocks

First is the `blocks` in `DataBlock`. These are precreated, mix and matchable, methods to ingest data into a format which the PyTorch model expects.

`ImageBlock` says our training data is images and `CategoryBlock` says our labels are categories.

If we had more than one input, say both RGB and Greyscale images, we would pass in three blocks and set `n_inp=2`.

In [None]:
dblock = DataBlock(blocks=(ImageBlock, ImageBlock(PILImageBW), CategoryBlock),
                   splitter=GrandparentSplitter(valid_name='val'),
                   get_items=get_image_files, 
                   get_y=parent_label,
                   n_inp=2)

`ImageBlock` tells the `DataBlock` api to read in `PILImage`s and converts the uint8 format to float tensors on the GPU thanks to passing `IntToFloatTensor` to `TransformBlock`.

In [None]:
def ImageBlock(cls:PILBase=PILImage):
    "A `TransformBlock` for images of `cls`"
    return TransformBlock(type_tfms=cls.create, batch_tfms=IntToFloatTensor)

`PILImage` can read in images from multiple sources, and knows how to plot itself thanks to `show` and `show_image`

In [None]:
class PILBase(Image.Image, metaclass=BypassNewMeta):
    _bypass_type=Image.Image
    _show_args = {'cmap':'viridis'}
    _open_args = {'mode': 'RGB'}

    @classmethod
    def create(cls, fn:Path|str|Tensor|ndarray|bytes, **kwargs)->None:
        "Open an `Image` from path `fn`"
        if isinstance(fn,TensorImage): fn = fn.permute(1,2,0).type(torch.uint8)
        if isinstance(fn, TensorMask): fn = fn.type(torch.uint8)
        if isinstance(fn,Tensor): fn = fn.numpy()
        if isinstance(fn,ndarray): return cls(Image.fromarray(fn))
        if isinstance(fn,bytes): fn = io.BytesIO(fn)
        return cls(load_image(fn, **merge(cls._open_args, kwargs)))

    def show(self, ctx=None, **kwargs):
        "Show image using `merge(self._show_args, kwargs)`"
        return show_image(self, ctx=ctx, **merge(self._show_args, kwargs))

class PILImage(PILBase): pass

`PILImageBW` tells pillow to read images in greyscale and passes the greyscale `cmap` arg to `show_image`

In [None]:
class PILImageBW(PILImage): 
    _show_args = {'cmap':'Greys'}
    _open_args = {'mode': 'L'}

Almost all transforms in fastai use TypeDispatch to apply to the correct types and ignore types which they are not supposed to modify.

Here we are monkey patching `ToTensor` to use TypeDispatch to convert a `PILBase` image to a Tensor. Specifically, `TensorImage`.

In [None]:
def image2tensor(img):
    "Transform image to byte tensor in `c*h*w` dim order."
    res = tensor(img)
    if res.dim()==2: res = res.unsqueeze(-1)
    return res.permute(2,0,1)

PILImage._tensor_cls = TensorImage

@ToTensor
def encodes(self, o:PILBase):
    return o._tensor_cls(image2tensor(o))

We can see that `IntToFloatTensor` knows how to correctly encode and decode a `TensorImage` from uint8 to float for passing to the model and plotting, respectively.

In [None]:
class IntToFloatTensor(DisplayedTransform):
    "Transform image to float tensor, optionally dividing by 255 (e.g. for images)."
    order = 10 # Need to run after PIL transforms on the GPU
    def __init__(self, div=255., div_mask=1): 
        store_attr()

    def encodes(self, o:TensorImage): 
        return o.float().div_(self.div)

    def decodes(self, o:TensorImage): 
        return ((o.clamp(0., 1.) * self.div).long()) if self.div else o

Likewise, `CategoryBlock` can read in labels from multiple sources

In [None]:
def CategoryBlock(
    vocab:list|pd.Series=None, # List of unique class names
    sort:bool=True, # Sort the classes alphabetically
    add_na:bool=False, # Add `#na#` to `vocab`
):
    "`TransformBlock` for single-label categorical targets"
    return TransformBlock(type_tfms=Categorize(vocab=vocab, sort=sort, add_na=add_na))

And by passing `Categorize` to `TransformBlock`, it will automatically generate a vocab (if needed), and contains methods which `encodes` the text labels to integers and `decodes` them back to text when we plot or display results.

In [None]:
class Categorize(DisplayedTransform):
    "Reversible transform of category string to `vocab` id"
    loss_func,order=CrossEntropyLossFlat(),1
    def __init__(self, vocab=None, sort=True, add_na=False):
        if vocab is not None: 
            vocab = CategoryMap(vocab, sort=sort, add_na=add_na)
        store_attr()

    def setups(self, dsets):
        if self.vocab is None and dsets is not None: 
            self.vocab = CategoryMap(dsets, sort=self.sort, add_na=self.add_na)
        self.c = len(self.vocab)

    def encodes(self, o): 
        try:
            return TensorCategory(self.vocab.o2i[o])
        except KeyError as e:
            raise KeyError(f"Label '{o}' was not included in the training dataset") from e

    def decodes(self, o): 
        return Category(self.vocab[o])

There are multiple built in blocks in the DataBlock api. `MultiCategoryBlock` and `RegressionBlock` for labels. Along with more task specific blocks, which you should look up in the documentation.

## Splitters

Next in the datablock is the splitter, which splits the data into the train and validation set.

Here we are using a `GrandparentSplitter`.

In [None]:
def get_dls(size, bs, augs=None, workers=None):
    dblock = DataBlock(blocks=(ImageBlock, CategoryBlock),
                        splitter=GrandparentSplitter(valid_name='val'),
                        get_items=get_image_files, 
                        get_y=parent_label,
                        item_tfms=[RandomResizedCrop(size, min_scale=0.35), FlipItem(0.5)],
                        batch_tfms=batch_tfms)

The `GrandparentSplitter` splits the data based on a folder name.

There are a bunch of splitters defined in fastai. For example, `ColSplitter` for splitting via a pandas `DataFrame`, `IndexSplitter` by index, etc. See [https://docs.fast.ai/data.transforms.html#split](https://docs.fast.ai/data.transforms.html#split) for more details.

## Get Items

We need to let the datablock know where the items are. Which we accomplish via `get_items`, `get_x`, and `get_y`.

In [None]:
def get_dls(size, bs, source, batch_tfms=None, workers=None):
    dblock = DataBlock(blocks=(ImageBlock, CategoryBlock),
                        splitter=GrandparentSplitter(valid_name='val'),
                        get_items=get_image_files, 
                        get_y=parent_label,
                        item_tfms=[RandomResizedCrop(size, min_scale=0.35), FlipItem(0.5)],
                        batch_tfms=batch_tfms)

    dblock.dataloaders(source, path=source, bs=bs, num_workers=workers)

`get_image_files` knows to read images from the path we pass into the dataloader. `parent_label` reads the folder name as a label. See [https://docs.fast.ai/data.transforms.html](https://docs.fast.ai/data.transforms.html) for more item getters.

## Transforms

fastai has a large collection of built in transformations for Images.

They have five primary features.

1. Transforms have an `order` to correctly sort themselves
2. Transforms use TypeDispatch to apply on the correct data types
3. Transforms know how to undo themselves for plotting 
4. Transforms have both random train and non-random valid implemetentations in one definition
5. GPU Transformations smartly combine Affine transforms and Lighting transforms into one step to save computation

We can see in `RandomCrop` that it uses `split_idx` to apply a constant center crop if this is the validation set, or a random crop if the training set.

In [None]:
class RandomCrop(RandTransform):
    "Randomly crop an image to `size`"
    split_idx = None
    order = 1

    def __init__(self, 
        size:int|tuple, # Size to crop to, duplicated if one value is specified
        **kwargs
    ):
        size = _process_sz(size)
        store_attr()
        super().__init__(**kwargs)

    def before_call(self, 
        b, 
        split_idx:int # Index of the train/valid dataset
    ):
        "Randomly positioning crop if train dataset else center crop"
        self.orig_sz = _get_sz(b)
        if split_idx: 
            self.tl = (self.orig_sz-self.size)//2
        else:
            wd = self.orig_sz[0] - self.size[0]
            hd = self.orig_sz[1] - self.size[1]
            w_rand = (wd, -1) if wd < 0 else (0, wd)
            h_rand = (hd, -1) if hd < 0 else (0, hd)
            self.tl = fastuple(random.randint(*w_rand), random.randint(*h_rand))

    def encodes(self, x:Image.Image|TensorBBox|TensorPoint):
        return x.crop_pad(self.size, self.tl, orig_sz=self.orig_sz)

And `Normalize` has both `encodes` and `decodes` methods which use TypeDispatch to normalize images before passing to the model or unnormalize them when we plot or display results

In [None]:
class Normalize(DisplayedTransform):
    "Normalize/denorm batch of `TensorImage`"
    parameters = L('mean', 'std')
    order = 99
    def __init__(self, mean=None, std=None, axes=(0,2,3)): 
        store_attr()

    @classmethod
    def from_stats(cls, mean, std, dim=1, ndim=4, cuda=True): 
        return cls(*broadcast_vec(dim, ndim, mean, std, cuda=cuda))

    def setups(self, dl:DataLoader):
        if self.mean is None or self.std is None:
            x,*_ = dl.one_batch()
            self.mean,self.std = x.mean(self.axes, keepdim=True),x.std(self.axes, keepdim=True)+1e-7

    def encodes(self, x:TensorImage): 
        return (x-self.mean) / self.std

    def decodes(self, x:TensorImage):
        f = to_cpu if x.device.type=='cpu' else noop
        return (x*f(self.std) + f(self.mean))