In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
#export
from exp.nb_07 import *

In [3]:
x_train,y_train,x_valid,y_valid = get_data()

x_train,x_valid = normalize_to(x_train,x_valid)
train_ds,valid_ds = Dataset(x_train, y_train),Dataset(x_valid, y_valid)

nh,bs = 50,512
c = y_train.max().item()+1
loss_func = F.cross_entropy

data = DataBunch(*get_dls(train_ds, valid_ds, bs), c)

In [4]:
mnist_view = view_tfm(1,28,28)
cbfs = [Recorder,
        partial(AvgStatsCallback,accuracy),
        CudaCallback,
        partial(BatchTransformXCallback, mnist_view)]

In [5]:
nfs = [8,16,32,64,64]

In [6]:
class ConvLayer(nn.Module):
    def __init__(self, ni, nf, ks=3, stride=2, sub=0., **kwargs):
        super().__init__()
        self.conv = nn.Conv2d(ni, nf, ks, padding=ks//2, stride=stride, bias=True)
        self.relu = GeneralRelu(sub=sub, **kwargs)
    
    def forward(self, x): return self.relu(self.conv(x))
    
    @property
    def bias(self): return -self.relu.sub
    @bias.setter
    def bias(self,v): self.relu.sub = -v
    @property
    def weight(self): return self.conv.weight

In [7]:
learn,run = get_learn_run(nfs, data, 0.6, ConvLayer, cbs=cbfs)

In [8]:
run.fit(2, learn)

train: [2.3449075, tensor(0.1581, device='cuda:0')]
valid: [2.3429630859375, tensor(0.2208, device='cuda:0')]
train: [1.68988890625, tensor(0.4009, device='cuda:0')]
valid: [0.53120185546875, tensor(0.8373, device='cuda:0')]


In [9]:
learn,run = get_learn_run(nfs, data, 0.6, ConvLayer, cbs=cbfs)

In [10]:
#export
def get_batch(dl, run):
    run.xb,run.yb = next(iter(dl))
    for cb in run.cbs: cb.set_runner(run)
    run('begin_batch')
    return run.xb,run.yb

In [11]:
xb,yb = get_batch(data.train_dl, run)

In [12]:
#export
def find_modules(m, cond):
    if cond(m): return [m]
    return sum([find_modules(o,cond) for o in m.children()], [])

def is_lin_layer(l):
    lin_layers = (nn.Conv1d, nn.Conv2d, nn.Conv3d, nn.Linear, nn.ReLU)
    return isinstance(l, lin_layers)

In [13]:
mods = find_modules(learn.model, lambda o: isinstance(o,ConvLayer))

In [14]:
mods

[ConvLayer(
   (conv): Conv2d(1, 8, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
   (relu): GeneralRelu()
 ), ConvLayer(
   (conv): Conv2d(8, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
   (relu): GeneralRelu()
 ), ConvLayer(
   (conv): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
   (relu): GeneralRelu()
 ), ConvLayer(
   (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
   (relu): GeneralRelu()
 ), ConvLayer(
   (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
   (relu): GeneralRelu()
 )]

In [15]:
def append_stat(hook, mod, inp, outp):
    d = outp.data
    hook.mean,hook.std = d.mean().item(),d.std().item()

In [16]:
mdl = learn.model.cuda()

In [17]:
with Hooks(mods, append_stat) as hooks:
    mdl(xb)
    for hook in hooks: print(hook.mean,hook.std)

0.49411851167678833 1.0664690732955933
0.5908210873603821 1.125556230545044
0.5269044637680054 1.1062132120132446
0.5653814077377319 0.9796994924545288
0.2989712655544281 0.4869980812072754


In [18]:
#export
def lsuv_module(m, xb):
    h = Hook(m, append_stat)

    while mdl(xb) is not None and abs(h.mean)  > 1e-3: m.bias -= h.mean
    while mdl(xb) is not None and abs(h.std-1) > 1e-3: m.weight.data /= h.std

    h.remove()
    return h.mean,h.std


In [19]:
for m in mods: print(lsuv_module(m, xb))

(-0.030796591192483902, 1.0)
(0.04239000007510185, 1.0)
(0.12393206357955933, 1.0)
(0.10479597747325897, 0.9999999403953552)
(0.3084784746170044, 0.9999999403953552)


In [20]:
%time run.fit(2, learn)

train: [0.473671328125, tensor(0.8441, device='cuda:0')]
valid: [0.220664501953125, tensor(0.9312, device='cuda:0')]
train: [0.117792373046875, tensor(0.9640, device='cuda:0')]
valid: [0.09378499145507813, tensor(0.9722, device='cuda:0')]
CPU times: user 1.78 s, sys: 127 ms, total: 1.91 s
Wall time: 1.85 s


In [22]:
!python notebook2script.py 07a_lsuv.ipynb

Converted 07a_lsuv.ipynb to exp/nb_07a.py
