In [None]:
#default_exp data.core

In [None]:
#export
from local.torch_basics import *
from local.test import *
from local.transform import *
from local.data.load import *
from local.notebook.showdoc import show_doc

# Data core

> Core functionality for gathering data

The classes here provide functionality for applying a list of transforms to a set of items (`TfmdList`, `DataSource`) or a `DataLoader` (`TfmdDl`) as well as the base class used to gatehr the data for model training: `DataBunch`.

## TfmdDL -

In [None]:
#export
_dl_tfms = ('after_item','before_batch','after_batch')

In [None]:
#export
@delegates()
class TfmdDL(DataLoader):
    "Transformed `DataLoader`"
    def __init__(self, dataset, bs=16, shuffle=False, num_workers=None, **kwargs):
        if num_workers is None: num_workers = min(16, defaults.cpus)
        for nm in _dl_tfms:
            kwargs[nm] = Pipeline(kwargs.get(nm,None), as_item=(nm=='before_batch'))
            kwargs[nm].setup(self)
        super().__init__(dataset, bs=bs, shuffle=shuffle, num_workers=num_workers, **kwargs)

    def _one_pass(self):
        its = self.after_batch(self.do_batch([self.do_item(0)]))
        self._device = find_device(its)
        self._retain_dl = partial(retain_types, typs=mapped(type,its))
        
    def _retain_dl(self,b):
        self._one_pass()
        # we just replaced ourselves, so this is *not* recursive! :)
        return self._retain_dl(b)

    def before_iter(self):
        super().before_iter()
        filt = getattr(self.dataset, 'filt', None)
        for nm in _dl_tfms:
            f = getattr(self,nm)
            if isinstance(f,Pipeline): f.filt=filt

    def decode(self, b): return self.before_batch.decode(self.after_batch.decode(self._retain_dl(b)))
    def decode_batch(self, b, max_n=10, ds_decode=True): return self._decode_batch(self.decode(b), max_n, ds_decode)

    def _decode_batch(self, b, max_n=10, ds_decode=True):
        f = self.after_item.decode
        if ds_decode: f = compose(f, getattr(self.dataset,'decode',noop))
        return L(batch_to_samples(b, max_n=max_n)).mapped(f)

    def show_batch(self, b=None, max_n=10, ctxs=None, **kwargs):
        "Show `b` (defaults to `one_batch`), a list of lists of pipeline outputs (i.e. output of a `DataLoader`)"
        if b is None: b = self.one_batch()
        b = self.decode(b)
        if hasattr(b, 'show'): return b.show(max_n=max_n, **kwargs)
        if ctxs is None:
            if hasattr(b[0], 'get_ctxs'): ctxs = b[0].get_ctxs(max_n=max_n, **kwargs)
            else: ctxs = [None] * len(b[0] if is_iter(b[0]) else b)
        db = self._decode_batch(b, max_n, False)
        ctxs = [self.dataset.show(o, ctx=ctx, **kwargs) for o,ctx in zip(db, ctxs)]
        if hasattr(b[0], 'display'): b[0].display(ctxs)
            
    @property
    def device(self):
        if not hasattr(self, '_device'): _ = self._one_pass()
        return self._device

A `TfmdDL` is a `DataLoader` that creates `Pipeline` from a list of `Transform`s for the callbacks `after_item`, `before_batch` and `after_batch`. As a result, it can decode or show a processed `batch`.

In [None]:
add_docs(TfmdDL,
         decode="Decode `b` using `tfms`",
         decode_batch="Decode `b` entirely",
         show_batch="Show each item of `b`",
         before_iter="override")

In [None]:
class _Category(int, ShowTitle): pass

In [None]:
#Test retain type
class NegTfm(Transform):
    def encodes(self, x): return torch.neg(x)
    def decodes(self, x): return torch.neg(x)
    
tdl = TfmdDL([(TensorImage([1]),)] * 4, after_batch=NegTfm(), bs=4, num_workers=4)
b = tdl.one_batch()
test_eq(type(b[0]), TensorImage)
b = (tensor([1.,1.,1.,1.]),)
test_eq(type(tdl.decode_batch(b)[0][0]), TensorImage)

In [None]:
class A(Transform): 
    def encodes(self, x): return x 
    def decodes(self, x): return Int(x) 

@Transform
def f(x)->None: return Tuple((x,x))

start = torch.arange(50)
test_eq_type(f(2), Tuple((2,2)))

In [None]:
a = A()
tdl = TfmdDL(start, after_item=lambda x: (a(x), f(x)), bs=4)
x,y = tdl.one_batch()
test_eq(type(y), Tuple)

s = tdl.decode_batch((x,y))
test_eq(type(s[0][1]), Tuple)

### Methods

In [None]:
show_doc(TfmdDL.one_batch)

<h4 id="DataLoader.one_batch" class="doc_header"><code>DataLoader.one_batch</code><a href="https://github.com/fastai/fastai_dev/tree/master/dev/local/data/load.py#L99" class="source_link" style="float:right">[source]</a></h4>

> <code>DataLoader.one_batch</code>()



In [None]:
tfm = NegTfm()
tdl = TfmdDL(start, after_batch=tfm, bs=4)

In [None]:
b = tdl.one_batch()
test_eq(tensor([0,-1,-2,-3]), b)

In [None]:
show_doc(TfmdDL.decode)

<h4 id="TfmdDL.decode" class="doc_header"><code>TfmdDL.decode</code><a href="https://github.com/fastai/fastai_dev/tree/master/dev/__main__.py#L29" class="source_link" style="float:right">[source]</a></h4>

> <code>TfmdDL.decode</code>(**`b`**)

Decode `b` using `tfms`

In [None]:
test_eq(tdl.decode(b), tensor(0,1,2,3))

In [None]:
show_doc(TfmdDL.decode_batch)

<h4 id="TfmdDL.decode_batch" class="doc_header"><code>TfmdDL.decode_batch</code><a href="https://github.com/fastai/fastai_dev/tree/master/dev/__main__.py#L30" class="source_link" style="float:right">[source]</a></h4>

> <code>TfmdDL.decode_batch</code>(**`b`**, **`max_n`**=*`10`*, **`ds_decode`**=*`True`*)

Decode `b` entirely

In [None]:
test_eq(tdl.decode_batch(b), [0,1,2,3])

In [None]:
show_doc(TfmdDL.show_batch)

<h4 id="TfmdDL.show_batch" class="doc_header"><code>TfmdDL.show_batch</code><a href="https://github.com/fastai/fastai_dev/tree/master/dev/__main__.py#L37" class="source_link" style="float:right">[source]</a></h4>

> <code>TfmdDL.show_batch</code>(**`b`**=*`None`*, **`max_n`**=*`10`*, **`ctxs`**=*`None`*, **\*\*`kwargs`**)

Show each item of `b`

## DataBunch -

In [None]:
# export
@docs
class DataBunch(GetAttr):
    "Basic wrapper around several `DataLoader`s."
    _xtra = 'one_batch show_batch dataset device'.split()

    def __init__(self, *dls): self.dls,self.default = dls,dls[0]
    def __getitem__(self, i): return self.dls[i]

    train_dl,valid_dl = add_props(lambda i,x: x[i])
    train_ds,valid_ds = add_props(lambda i,x: x[i].dataset)

    _docs=dict(__getitem__="Retrieve `DataLoader` at `i` (`0` is training, `1` is validation)",
              train_dl="Training `DataLoader`",
              valid_dl="Validation `DataLoader`",
              train_ds="Training `Dataset`",
              valid_ds="Validation `Dataset`")

In [None]:
dbch = DataBunch(tdl,tdl)
x = dbch.train_dl.one_batch()
x2 = next(iter(tdl))
test_eq(x,x2)
x2 = dbch.one_batch()
test_eq(x,x2)

### Methods

In [None]:
show_doc(DataBunch.__getitem__)

<h4 id="DataBunch.__getitem__" class="doc_header"><code>DataBunch.__getitem__</code><a href="https://github.com/fastai/fastai_dev/tree/master/dev/__main__.py#L8" class="source_link" style="float:right">[source]</a></h4>

> <code>DataBunch.__getitem__</code>(**`i`**)

Retrieve [`DataLoader`](/dataloader.html#DataLoader) at `i` (`0` is training, `1` is validation)

In [None]:
x2 = dbch[0].one_batch()
test_eq(x,x2)

In [None]:
show_doc(DataBunch.train_dl, name="train_dl")

<h4 id="train_dl" class="doc_header"><code>train_dl</code><a href="https://github.com/fastai/fastai_dev/tree/master/dev/__main__.py#L10" class="source_link" style="float:right">[source]</a></h4>

Training [`DataLoader`](/dataloader.html#DataLoader)

In [None]:
show_doc(DataBunch.valid_dl, name="valid_dl")

<h4 id="valid_dl" class="doc_header"><code>valid_dl</code><a href="https://github.com/fastai/fastai_dev/tree/master/dev/__main__.py#L10" class="source_link" style="float:right">[source]</a></h4>

Validation [`DataLoader`](/dataloader.html#DataLoader)

In [None]:
show_doc(DataBunch.train_ds, name="train_ds")

<h4 id="train_ds" class="doc_header"><code>train_ds</code><a href="https://github.com/fastai/fastai_dev/tree/master/dev/__main__.py#L11" class="source_link" style="float:right">[source]</a></h4>

Training `Dataset`

In [None]:
show_doc(DataBunch.valid_ds, name="valid_ds")

<h4 id="valid_ds" class="doc_header"><code>valid_ds</code><a href="https://github.com/fastai/fastai_dev/tree/master/dev/__main__.py#L11" class="source_link" style="float:right">[source]</a></h4>

Validation `Dataset`

## TfmdList -

In [None]:
#export
class FilteredBase:
    "Base class for lists with subsets"
    _dl_type = TfmdDL
    def __init__(self, *args, **kwargs):
        self.databunch = delegates(self._dl_type.__init__)(self.databunch)
        super().__init__(*args, **kwargs)

    def _new(self, items, **kwargs): return super()._new(items, filts=self.filts, **kwargs)
    def subset(self): raise NotImplemented
    @property
    def n_subsets(self): return len(self.filts)

    def databunch(self, bs=16, val_bs=None, shuffle_train=True, **kwargs):
        n = self.n_subsets-1
        bss = [bs] + [2*bs]*n if val_bs is None else [bs] + [val_bs]*n
        shuffles = [shuffle_train] + [False]*n
        return DataBunch(*[self._dl_type(self.subset(i), bs=b, shuffle=s, drop_last=s, **kwargs)
                               for i,(b,s) in enumerate(zip(bss, shuffles))])

FilteredBase.train,FilteredBase.valid = add_props(lambda i,x: x.subset(i), 2)

In [None]:
#export
class TfmdList(FilteredBase, L):
    "A `Pipeline` of `tfms` applied to a collection of `items`"
    def __init__(self, items, tfms, use_list=None, do_setup=True, as_item=True, filt=None, train_setup=True, filts=None):
        super().__init__(items, use_list=use_list)
        self.filts = L([slice(None)] if filts is None else filts).mapped(mask2idxs)
        if isinstance(tfms,TfmdList): tfms = tfms.tfms
        if isinstance(tfms,Pipeline): do_setup=False
        self.tfms = Pipeline(tfms, as_item=as_item, filt=filt)
        if do_setup: self.setup(train_setup=train_setup)

    def _new(self, items, **kwargs): return super()._new(items, tfms=self.tfms, do_setup=False, **kwargs)
    def subset(self, i): return self._new(self._get(self.filts[i]), filt=i)
    def _after_item(self, o): return self.tfms(o)
    def __repr__(self): return f"{self.__class__.__name__}: {self.items}\ntfms - {self.tfms.fs}"
    def __iter__(self): return (self[i] for i in range(len(self)))
    def show(self, o, **kwargs): return self.tfms.show(o, **kwargs)
    def decode(self, x, **kwargs): return self.tfms.decode(x, **kwargs)
    def __call__(self, x, **kwargs): return self.tfms.__call__(x, **kwargs)
    def setup(self, train_setup=True): self.tfms.setup(getattr(self,'train',self) if train_setup else self)
    @property
    def default(self): return self.tfms

    def __getitem__(self, idx):
        res = super().__getitem__(idx)
        if self._after_item is None: return res
        return self._after_item(res) if is_indexer(idx) else res.mapped(self._after_item)

In [None]:
add_docs(TfmdList,
         setup="Transform setup with self",
         decode="From `Pipeline",
         show="From `Pipeline",
         subset="New `TfmdList` that only includes subset `i`")

In [None]:
#exports
def decode_at(o, idx):
    "Decoded item at `idx`"
    return o.decode(o[idx])

In [None]:
#exports
def show_at(o, idx, **kwargs):
    "Show item at `idx`",
    return o.show(o[idx], **kwargs)

A `TfmdList` combines a collection of object with a `Pipeline`. `tfms` can either be a `Pipeline` or a list of transforms, in which case, it will wrap them in a `Pipeline`. `use_list` is passed along to `L` with the `items`, `as_item` and `filt` are passed to each transform of the `Pipeline`. `do_setup` indicates if the `Pipeline.setup` method should be called during initialization.

In [None]:
class IntFloatTfm(Transform):
    def encodes(self, x):  return Int(x)
    def decodes(self, x):  return Float(x)
    foo=1

int_tfm=IntFloatTfm()

def neg(x): return -x
neg_tfm = Transform(neg, neg)

class B(Transform):
    def encodes(self, x): return x+1
    def decodes(self, x): return x-1
add1 = B()
add1.filt = 1

In [None]:
tl = TfmdList([1.,2.,3.], [neg_tfm, int_tfm], filts=[[0,2],[1]])
t = tl[1]
test_eq_type(t, Int(-2))
test_eq(decode_at(tl, 1), 2)
test_eq_type(tl.decode(t), Float(2.0))
test_stdout(lambda: show_at(tl, 2), '-3')
tl

TfmdList: [1.0, 2.0, 3.0]
tfms - (#2) [Transform: True {'object': 'neg'} {'object': 'neg'},IntFloatTfm: True {'object': 'encodes'} {'object': 'decodes'}]

In [None]:
tl = TfmdList([1.,2.,3.], [neg_tfm, int_tfm, add1], filts=[[0,2],[1]])
test_eq(tl[0], -1)
test_eq(tl[1], -2)
test_eq(tl.valid[0], -1) #add1 is only applied on the validation set

In [None]:
p2 = tl.subset(0)
test_eq(p2, [-1,-3])
test_eq(map(type, p2), (Int,Int))
test_eq(tl[tensor(1)], tl[1])

In [None]:
df = pd.DataFrame(dict(a=[1,2,3],b=[2,3,4]))
tl = TfmdList(df, lambda o: o.a, filts=[[0],[1,2]])
test_eq(tl[1,2], [2,3])
p2 = tl.subset(1)
test_eq(p2, [2,3])

In [None]:
class B(Transform):
    def __init__(self):   self.a = 2
    def encodes(self, x): return x+self.a
    def decodes(self, x): return x-self.a
    def setups(self, items): self.a = tensor(items).float().mean().item()

tl1 = TfmdList([1,2,3,4], B())
test_eq(tl1.tfms[0].a, 2.5)

In [None]:
tfilts = [tensor([0,2]), [1,3,4]]

In [None]:
tl = TfmdList(range(5), tfms=[None], filts=tfilts)
test_eq(len(tl.filts), 2)
test_eq(tl.subset(0), [0,2])
test_eq(tl.train, [0,2])       # Subset 0 is aliased to `train`
test_eq(tl.subset(1), [1,3,4])
test_eq(tl.valid, [1,3,4])     # Subset 1 is aliased to `valid`
test_eq(tl.valid[2], 4)

Here's how we can use `TfmdList.setup` to implement a simple category list, getting labels from a mock file list:

In [None]:
class _Cat(Transform):
    order = 1
    def encodes(self, o):    return int(self.o2i[o])
    def decodes(self, o):    return Str(self.vocab[o])
    def setups(self, items): self.vocab,self.o2i = uniqueify(L(items), sort=True, bidir=True)

def _lbl(o):  return Str(o.split('_')[0])
test_fns = ['dog_0.jpg','cat_0.jpg','cat_2.jpg','cat_1.jpg','dog_1.jpg']
tcat = _Cat()
# Check that tfms are sorted by `order`
tl = TfmdList(test_fns, [tcat,_lbl])

exp_voc = ['cat','dog']
test_eq(tcat.vocab, exp_voc)
test_eq(tl.tfms.vocab, exp_voc)
test_eq(tl.vocab, exp_voc)

In [None]:
test_eq(tl, (1,0,0,0,1))
t = L(tl)
test_eq(t, [1,0,0,0,1])
test_eq(tl[-1], 1)
test_eq(tl[0,1], (1,0))
test_eq([tl.decode(o) for o in t], ('dog','cat','cat','cat','dog'))
test_stdout(lambda:show_at(tl, 0), "dog")
tl

TfmdList: ['dog_0.jpg', 'cat_0.jpg', 'cat_2.jpg', 'cat_1.jpg', 'dog_1.jpg']
tfms - (#2) [Transform: True {'object': '_lbl'} {},_Cat: True {'object': 'encodes'} {'object': 'decodes'}]

In [None]:
test_fns = ['dog_0.jpg','cat_0.jpg','cat_2.jpg','cat_1.jpg','dog_1.jpg','kid_05.jpg']
tcat = _Cat()
tl = TfmdList(test_fns, [tcat,_lbl], filts=[[0,1,2,3,4], [5]])
#Check only the training set is taken into account for setup
test_eq(tcat.vocab, ['cat','dog'])

In [None]:
tfm = NegTfm(filt=1)
tds = TfmdList(start, A())
tdl = TfmdDL(tds, after_batch=tfm, bs=4)
x = tdl.one_batch()
test_eq(x, torch.arange(4))
tds.filt = 1
x = tdl.one_batch()
test_eq(x, -torch.arange(4))
tds.filt = 0
x = tdl.one_batch()
test_eq(x, torch.arange(4))

In [None]:
tds = TfmdList(start, A())
tdl = TfmdDL(tds, after_batch=NegTfm(), bs=4)
test_eq(tdl.dataset[0], start[0])
test_eq(len(tdl), (len(tds)-1)//4+1)
test_eq(tdl.bs, 4)
test_stdout(tdl.show_batch, '0\n1\n2\n3')

In [None]:
show_doc(TfmdList.subset)

<h4 id="TfmdList.subset" class="doc_header"><code>TfmdList.subset</code><a href="https://github.com/fastai/fastai_dev/tree/master/dev/__main__.py#L13" class="source_link" style="float:right">[source]</a></h4>

> <code>TfmdList.subset</code>(**`i`**)

New [`TfmdList`](/data.core.html#TfmdList) that only includes subset `i`

## DataSource -

In [None]:
#export
@docs
@delegates(TfmdList)
class DataSource(FilteredBase):
    "A dataset that creates a tuple from each `tfms`, passed thru `ds_tfms`"
    def __init__(self, items=None, tfms=None, tls=None, **kwargs):
        self.tls = L(tls if tls else [TfmdList(items, t, **kwargs) for t in L(ifnone(tfms,[None]))])

    def __getitem__(self, it):
        res = tuple([tl[it] for tl in self.tls])
        return res if is_indexer(it) else list(zip(*res))
    
    def __getattr__(self,k): return gather_attrs(self, k, 'tls')
    def __len__(self): return len(self.tls[0])
    def __iter__(self): return (self[i] for i in range(len(self)))
    def __repr__(self): return coll_repr(self)
    def decode(self, o): return tuple(tl.decode(o_) for o_,tl in zip(o,self.tls))
    def subset(self, i): return type(self)(tls=L(tl.subset(i) for tl in self.tls))
    def _new(self, items, *args, **kwargs): return super()._new(items, tfms=self.tfms, do_setup=False, **kwargs)
    @property
    def filts(self): return self.tls[0].filts
    @property
    def filt(self): return self.tls[0].tfms.filt
    
    def show(self, o, ctx=None, **kwargs):
        for o_,tl in zip(o,self.tls): ctx = tl.show(o_, ctx=ctx, **kwargs)
        return ctx

    _docs=dict(
        decode="Compose `decode` of all `tuple_tfms` then all `tfms` on `i`",
        show="Show item `o` in `ctx`",
        databunch="Get a `DataBunch`",
        subset="New `DataSource` that only includes subset `i`")

A `DataSource` creates a tuple from `items` (typically input,target) by applying each list of `Transform` (or `Pipeline`) in `tfms` to them. Note that if `tfms` contains only one list of `tfms`, the items given by `DataSource` will be tuples of one element.

In [None]:
items = [1,2,3,4]
dsrc = DataSource(items, [[neg_tfm,int_tfm]])
test_eq(dsrc[0], (-1,))
test_eq(dsrc[0,1,2], [(-1,),(-2,),(-3,)])

In [None]:
class Norm(Transform):
    def encodes(self, o): return (o-self.m)/self.s
    def decodes(self, o): return (o*self.s)+self.m
    def setups(self, items):
        its = tensor(items).float()
        self.m,self.s = its.mean(),its.std()

In [None]:
items = [1,2,3,4]
nrm = Norm()
dsrc = DataSource(items, [[neg_tfm,int_tfm], [neg_tfm,nrm]])

x,y = zip(*dsrc)
test_close(tensor(y).mean(), 0)
test_close(tensor(y).std(), 1)
test_eq(x, (-1,-2,-3,-4,))
test_eq(nrm.m, -2.5)
test_stdout(lambda:show_at(dsrc, 1), '-2')

test_eq(dsrc.m, nrm.m)
test_eq(dsrc.norm.m, nrm.m)
test_eq(dsrc.train.norm.m, nrm.m)

In [None]:
#hide
#Check filtering is properly applied
class B(Transform):
    def encodes(self, x)->None:  return int(x+1)
    def decodes(self, x):        return Int(x-1)
add1 = B(filt=1)

dsrc = DataSource(items, [neg_tfm, [neg_tfm,int_tfm,add1]], filts=[[3],[0,1,2]])
test_eq(dsrc[1], [-2,-2])
test_eq(dsrc.valid[1], [-2,-1])
test_eq(dsrc.valid[[1,1]], [[-2,-1], [-2,-1]])
test_eq(dsrc.train[0], [-4,-4])

In [None]:
#hide
#Test setup works with train attribute
def _lbl(o): return o.split('_')[0]

test_fns = ['dog_0.jpg','cat_0.jpg','cat_2.jpg','cat_1.jpg','kid_1.jpg']
tcat = _Cat()
dsrc = DataSource(test_fns, [[tcat,_lbl]], filts=[[0,1,2], [3,4]])
test_eq(tcat.vocab, ['cat','dog'])
test_eq(dsrc.train, [(1,),(0,),(0,)])
test_eq(dsrc.valid[0], (0,))
test_stdout(lambda: show_at(dsrc.train, 0), "dog")

In [None]:
inp = [0,1,2,3,4]
dsrc = DataSource(inp, tfms=[None])

test_eq(*dsrc[2], 2)          # Retrieve one item (subset 0 is the default)
test_eq(dsrc[1,2], [(1,),(2,)])    # Retrieve two items by index
mask = [True,False,False,True,False]
test_eq(dsrc[mask], [(0,),(3,)])   # Retrieve two items by mask

In [None]:
inp = pd.DataFrame(dict(a=[5,1,2,3,4]))
dsrc = DataSource(inp, tfms=attrgetter('a')).subset(0)
test_eq(*dsrc[2], 2)          # Retrieve one item (subset 0 is the default)
test_eq(dsrc[1,2], [(1,),(2,)])    # Retrieve two items by index
mask = [True,False,False,True,False]
test_eq(dsrc[mask], [(5,),(3,)])   # Retrieve two items by mask

In [None]:
# filts can be indices
dsrc = DataSource(range(5), tfms=[None], filts=[tensor([0,2]), [1,3,4]])

test_eq(dsrc.subset(0), [(0,),(2,)])
test_eq(dsrc.train, [(0,),(2,)])       # Subset 0 is aliased to `train`
test_eq(dsrc.subset(1), [(1,),(3,),(4,)])
test_eq(dsrc.valid, [(1,),(3,),(4,)])     # Subset 1 is aliased to `valid`
test_eq(*dsrc.valid[2], 4)
#assert '[(1,),(3,),(4,)]' in str(dsrc) and '[(0,),(2,)]' in str(dsrc)
dsrc

(#5) [(0,),(1,),(2,),(3,),(4,)]

In [None]:
# filts can be boolean masks (they don't have to cover all items, but must be disjoint)
filts = [[False,True,True,False,True], [True,False,False,False,False]]
dsrc = DataSource(range(5), tfms=[None], filts=filts)

test_eq(dsrc.train, [(1,),(2,),(4,)])
test_eq(dsrc.valid, [(0,)])

In [None]:
# apply transforms to all items
tfm = [[lambda x: x*2,lambda x: x+1]]
filts = [[1,2],[0,3,4]]
dsrc = DataSource(range(5), tfm, filts=filts)
test_eq(dsrc.train,[(3,),(5,)])
test_eq(dsrc.valid,[(1,),(7,),(9,)])
test_eq(dsrc.train[False,True], [(5,)])

In [None]:
# only transform subset 1
class _Tfm(Transform):
    filt=1
    def encodes(self, x): return x*2
    def decodes(self, x): return Str(x//2)

In [None]:
dsrc = DataSource(range(5), [_Tfm()], filts=[[1,2],[0,3,4]])
test_eq(dsrc.train,[(1,),(2,)])
test_eq(dsrc.valid,[(0,),(6,),(8,)])
test_eq(dsrc.train[False,True], [(2,)])
dsrc

(#5) [(0,),(1,),(2,),(3,),(4,)]

In [None]:
#hide
#Test DataSource pickles
dsrc1 = pickle.loads(pickle.dumps(dsrc))
test_eq(dsrc.train, dsrc1.train)
test_eq(dsrc.valid, dsrc1.valid)

In [None]:
dsrc = DataSource(range(5), [_Tfm(),noop], filts=[[1,2],[0,3,4]])
test_eq(dsrc.train,[(1,1),(2,2)])
test_eq(dsrc.valid,[(0,0),(6,3),(8,4)])

In [None]:
start = torch.arange(0,50)
tds = DataSource(start, [A()])
tdl = TfmdDL(tds, after_item=NegTfm(), bs=4)
b = tdl.one_batch()
test_eq(tdl.decode_batch(b), ((0,),(1,),(2,),(3,)))
test_stdout(tdl.show_batch, "0\n1\n2\n3")

In [None]:
# only transform subset 1
class _Tfm(Transform):
    filt=1
    def encodes(self, x): return x*2

dsrc = DataSource(range(8), [None], filts=[[1,2,5,7],[0,3,4,6]])
dbch = dsrc.databunch(bs=4, after_batch=_Tfm(), shuffle_train=False)
test_eq(dbch.train_dl, [(tensor([1,2,5, 7]),)])
test_eq(dbch.valid_dl, [(tensor([0,6,8,12]),)])

### Methods

In [None]:
items = [1,2,3,4]
dsrc = DataSource(items, [[neg_tfm,int_tfm]])

In [None]:
show_doc(DataSource.decode)

<h4 id="DataSource.decode" class="doc_header"><code>DataSource.decode</code><a href="https://github.com/fastai/fastai_dev/tree/master/dev/__main__.py#L17" class="source_link" style="float:right">[source]</a></h4>

> <code>DataSource.decode</code>(**`o`**)

Compose `decode` of all `tuple_tfms` then all `tfms` on `i`

In [None]:
test_eq(*dsrc[0], -1)
test_eq(*dsrc.decode((-1,)), 1)

In [None]:
show_doc(DataSource.show)

<h4 id="DataSource.show" class="doc_header"><code>DataSource.show</code><a href="https://github.com/fastai/fastai_dev/tree/master/dev/__main__.py#L25" class="source_link" style="float:right">[source]</a></h4>

> <code>DataSource.show</code>(**`o`**, **`ctx`**=*`None`*, **\*\*`kwargs`**)

Show item `o` in `ctx`

In [None]:
test_stdout(lambda:dsrc.show(dsrc[1]), '-2')

## Export -

In [None]:
#hide
from local.notebook.export import notebook2script
notebook2script(all_fs=True)

Converted 00_test.ipynb.
Converted 01_core.ipynb.
Converted 01a_torch_core.ipynb.
Converted 02_script.ipynb.
Converted 03_dataloader.ipynb.
Converted 04_transform.ipynb.
Converted 05_data_core.ipynb.
Converted 06_data_transforms.ipynb.
Converted 07_vision_core.ipynb.
Converted 08_pets_tutorial.ipynb.
Converted 09_vision_augment.ipynb.
Converted 11_layers.ipynb.
Converted 11a_vision_models_xresnet.ipynb.
Converted 12_optimizer.ipynb.
Converted 13_learner.ipynb.
Converted 14_callback_schedule.ipynb.
Converted 15_callback_hook.ipynb.
Converted 16_callback_progress.ipynb.
Converted 17_callback_tracker.ipynb.
Converted 18_callback_fp16.ipynb.
Converted 19_callback_mixup.ipynb.
Converted 20_metrics.ipynb.
Converted 21_tutorial_imagenette.ipynb.
Converted 22_vision_learner.ipynb.
Converted 23_tutorial_transfer_learning.ipynb.
Converted 30_text_core.ipynb.
Converted 31_text_data.ipynb.
Converted 32_text_models_awdlstm.ipynb.
Converted 33_text_models_core.ipynb.
Converted 34_callback_rnn.ipynb.