# Test Mishify

Replaces RELU with Mish for standard RESNET and XRESNET models

In [1]:
#from fastai import *
from fastai.vision import *
from fastai.callbacks import *

In [2]:
from train import mishify

In [3]:
def get_data(size, woof, bs, workers=None):
    if   size<=128: path = URLs.IMAGEWOOF if woof else URLs.IMAGENETTE
    elif size<=224: path = URLs.IMAGEWOOF_320 if woof else URLs.IMAGENETTE_320
    else          : path = URLs.IMAGEWOOF     if woof else URLs.IMAGENETTE
    path = untar_data(path)

    n_gpus = num_distrib() or 1
    if workers is None: workers = min(8, num_cpus()//n_gpus)

    return (ImageList.from_folder(path).split_by_folder(valid='val')
            .label_from_folder().transform(([flip_lr(p=0.5)], []), size=size)
            .databunch(bs=bs, num_workers=workers)
            .presize(size, scale=(0.35,1))
            .normalize(imagenet_stats))

In [4]:
data = get_data(128, True, 8)

# Test using RESNET

In [5]:
selected_model = models.resnet18
# selected_model = models.resnet34
# selected_model = models.resnet50
# selected_model = models.resnet101
#selected_model = models.resnet152

learn = Learner(data, selected_model(), wd=1e-2, 
               bn_wd=False, true_wd=True, loss_func=LabelSmoothingCrossEntropy(),
               metrics=[accuracy])
learn.to_fp16()
my_model = learn.model
mishify(my_model, verbose=False)

In [6]:
my_model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): Mish()
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): Mish()
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): Mish()
      (conv2): Conv2d(64, 64, kernel_s

In [7]:
print('Performance using Mish')
learn.fit(1)

Performance using Mish


epoch,train_loss,valid_loss,accuracy,time
0,2.91485,2.933844,0.23,00:28


In [8]:
learn = Learner(data, selected_model(), wd=1e-2, 
               bn_wd=False, true_wd=True, loss_func=LabelSmoothingCrossEntropy(),
               metrics=[accuracy])
learn.to_fp16()
print('Performance using RELU')
learn.fit(1)

Performance using RELU


epoch,train_loss,valid_loss,accuracy,time
0,2.929266,2.93437,0.23,00:26


# Test using XRESNET

In [9]:
selected_model = partial(models.xresnet18, c_out=data.c)
# selected_model = partial(models.xresnet34, c_out=data.c)
# selected_model = partial(models.xresnet50, c_out=data.c)
# selected_model = partial(models.xresnet101, c_out=data.c)
# selected_model = partial(models.xresnet152, c_out=data.c)

learn = Learner(data, selected_model(), wd=1e-2, 
               bn_wd=False, true_wd=True, loss_func=LabelSmoothingCrossEntropy(),
               metrics=[accuracy])
learn.to_fp16()
my_model = learn.model
mishify(my_model)

In [10]:
my_model[0][2]

Mish()

In [11]:
my_model

XResNet(
  (0): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): Mish()
  )
  (1): Sequential(
    (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): Mish()
  )
  (2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): Mish()
  )
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): ResBlock(
      (convs): Sequential(
        (0): Sequential(
          (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running

In [12]:
print('Performance using Mish')
learn.fit(1)

Performance using Mish


epoch,train_loss,valid_loss,accuracy,time
0,2.086471,2.06156,0.238,00:30


In [13]:
learn = Learner(data, selected_model(), wd=1e-2, 
               bn_wd=False, true_wd=True, loss_func=LabelSmoothingCrossEntropy(),
               metrics=[accuracy])
learn.to_fp16()
print('Performance using RELU')
learn.fit(1)

Performance using RELU


epoch,train_loss,valid_loss,accuracy,time
0,1.971358,2.092928,0.274,00:29
