In [None]:
# default_exp callback.lr_finder

In [None]:
#hide
from nbdev.showdoc import *

# LR Finder
> A duplicate of fast.ai's `lr_find`, except it can restore the dataloader and random state after running.

In [None]:
#export
from fastcore.xtras import is_listy
from fastcore.foundation import patch, docs
from fastcore.basics import tuplify
from fastai.callback.schedule import ParamScheduler, SchedExp, SuggestionMethod
from fastai.callback.core import *
from fastai.torch_core import tensor, get_random_states, set_random_states
from fastai.learner import Learner
from functools import partial
from copy import deepcopy
import os, torch, collections

In [None]:
#export
@docs
class LRFinder(ParamScheduler):
    "Training with exponentially growing learning rate"
    def __init__(self, start_lr=1e-7, end_lr=10, num_it=100, stop_div=True, restore_state=True):
        if num_it < 6: num_it = 6
        self.scheds = {'lr': [SchedExp(s, e) for (s,e) in zip(start_lr,end_lr)
                             ] if is_listy(start_lr) else SchedExp(start_lr, end_lr)}
        self.num_it,self.stop_div,self.restore_state = num_it,stop_div,restore_state

    def before_fit(self):
        super().before_fit()
        if self.restore_state:
            self.old_dls = deepcopy(self.learn.dls)
            self.states = get_random_states()
        self.learn.save('_tmp')
        self.best_loss = float('inf')

    def before_batch(self): self._update_val(self.train_iter/self.num_it)

    def after_batch(self):
        super().after_batch()
        if self.smooth_loss < self.best_loss: self.best_loss = self.smooth_loss
        if self.smooth_loss > 4*self.best_loss and self.stop_div: raise CancelFitException()
        if self.train_iter >= self.num_it: raise CancelFitException()

    def before_validate(self): raise CancelValidException()

    def after_fit(self):
        self.learn.opt.zero_grad() # Needed before detaching the optimizer for future fits
        tmp_f = self.path/self.model_dir/'_tmp.pth'
        if tmp_f.exists():
            self.learn.load('_tmp', with_opt=True)
            os.remove(tmp_f)
        if self.restore_state:
            self.learn.dls = self.old_dls
            set_random_states(**self.states)

    _docs = {"before_fit": "Initialize container for hyper-parameters and save the model & optimizer, optionally saving dataloader & random state",
             "before_batch": "Set the proper hyper-parameters in the optimizer",
             "after_batch": "Record hyper-parameters of this batch and potentially stop training",
             "after_fit": "Save the hyper-parameters in the recorder if there is one and load the original model & optimizer, optionally restoring dataloader & random state",
             "before_validate": "Skip the validation part of training"}

In [None]:
show_doc(LRFinder.before_fit)

<h4 id="LRFinder.before_fit" class="doc_header"><code>LRFinder.before_fit</code><a href="__main__.py#L11" class="source_link" style="float:right">[source]</a></h4>

> <code>LRFinder.before_fit</code>()

Initialize container for hyper-parameters and save the model & optimizer, optionally saving dataloader & random state

In [None]:
show_doc(LRFinder.before_batch)

<h4 id="LRFinder.before_batch" class="doc_header"><code>LRFinder.before_batch</code><a href="__main__.py#L19" class="source_link" style="float:right">[source]</a></h4>

> <code>LRFinder.before_batch</code>()

Set the proper hyper-parameters in the optimizer

In [None]:
show_doc(LRFinder.after_batch)

<h4 id="LRFinder.after_batch" class="doc_header"><code>LRFinder.after_batch</code><a href="__main__.py#L21" class="source_link" style="float:right">[source]</a></h4>

> <code>LRFinder.after_batch</code>()

Record hyper-parameters of this batch and potentially stop training

In [None]:
show_doc(LRFinder.before_validate)

<h4 id="LRFinder.before_validate" class="doc_header"><code>LRFinder.before_validate</code><a href="__main__.py#L27" class="source_link" style="float:right">[source]</a></h4>

> <code>LRFinder.before_validate</code>()

Skip the validation part of training

In [None]:
show_doc(LRFinder.after_fit)

<h4 id="LRFinder.after_fit" class="doc_header"><code>LRFinder.after_fit</code><a href="__main__.py#L29" class="source_link" style="float:right">[source]</a></h4>

> <code>LRFinder.after_fit</code>()

Save the hyper-parameters in the recorder if there is one and load the original model & optimizer, optionally restoring dataloader & random state

## lr_find

In [None]:
#export
@patch
def lr_find(self:Learner, start_lr=1e-7, end_lr=10, num_it=100, stop_div=True, show_plot=True, suggest_funcs=(SuggestionMethod.Valley), restore_state=True):
    "Launch a mock training to find a good learning rate and return suggestions based on `suggest_funcs` as a named tuple. Use `restore_state` to reset dataloaders and random state after running."
    n_epoch = num_it//len(self.dls.train) + 1
    cb=LRFinder(start_lr=start_lr, end_lr=end_lr, num_it=num_it, stop_div=stop_div,restore_state=restore_state)
    with self.no_logging(): self.fit(n_epoch, cbs=cb)
    if suggest_funcs is not None:
        lrs, losses = tensor(self.recorder.lrs[num_it//10:-5]), tensor(self.recorder.losses[num_it//10:-5])
        nan_idxs = torch.nonzero(torch.isnan(losses.view(-1)))
        if len(nan_idxs) > 0:
            drop_idx = min(nan_idxs)
            lrs = lrs[:drop_idx]
            losses = losses[:drop_idx]
        _suggestions, nms = [], []
        for func in tuplify(suggest_funcs):
            nms.append(func.__name__ if not isinstance(func, partial) else func.func.__name__) # deal with partials
            _suggestions.append(func(lrs, losses, num_it))

        SuggestedLRs = collections.namedtuple('SuggestedLRs', nms)
        lrs, pnts = [], []
        for lr, pnt in _suggestions:
            lrs.append(lr)
            pnts.append(pnt)
        if show_plot: self.recorder.plot_lr_find(suggestions=pnts, nms=nms)
        return SuggestedLRs(*lrs)

    elif show_plot: self.recorder.plot_lr_find()

Without `restore_state` running `lr_find` advances both the random state and dataloaders and behaves the same way as [fast.ai's](https://docs.fast.ai/callback.schedule.html#Learner.lr_find) `lr_find`. Which means the following two code blocks:

In [None]:
#slow
with no_random():
    dls = get_dls()
    learn = Learner(dls, xresnet18(n_out=dls.c))

with no_random():
    learn.lr_find(restore_state=False)
    learn.fit_one_cycle(2, 3e-3)

In [None]:
#slow
with no_random():
    dls = get_dls()
    learn = Learner(dls, xresnet18(n_out=dls.c))

with no_random():
    learn.fit_one_cycle(2, 3e-3)

will result with different training output.

While the default of `restore_state=True` prevents this from occuring, it has the potential downside of showing less variance in learning rate results. As every call to `lr_find` will be over the same first `n_iter` items using the same random state. Without `no_random` set, most of the variation appears to be from cuda not being set in deterministic mode.

In [None]:
#hide
#slow
from nbdev.export import notebook2script; notebook2script()

Converted augment.tensor_item_tfm.ipynb.
Converted callback.cutmixup.ipynb.
Converted callback.lr_finder.ipynb.
Converted index.ipynb.
Converted schedulers.fit_flat_varied.ipynb.
