# [HarDNet: A Low Memory Traffic Network](https://arxiv.org/pdf/1909.00948.pdf)

*    [Official Pytorch Implementation](https://github.com/PingoLH/Pytorch-HarDNet)




### Setup [wandb](https://www.wandb.com/)


> Wandb will be used for all the visualizations, hyperparameters tracking 


In [None]:
!pip install --upgrade --quiet wandb
!wandb login ed0f2f2088e4953ef9392d1a2141681a565aec45

In [None]:
import wandb
wandb.init(project="hardnet", name="hardnet_classification_2")

In [None]:
!pip install --quiet pytorch_lightning

---



### Module Imports

In [None]:
## Imports
import argparse
import os
import random
import shutil
import time
import warnings
import sys
import numpy as np
from statistics import mean

import torch
import torchvision
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
import torch.nn.init as init
from pytorch_lightning.metrics.functional import accuracy


## Data Preparation

1. Download  
2. Unzip / Format 
3. Pytorch Dataloader



## Train/Validate Models

In [None]:
## Hyperparameters

model_names = ['hardnet39ds', 'hardnet68ds', 'hardnet68', 'hardnet85']
model_arch = model_names[0]
lr = 0.001
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

hyperparams = dict(
    workers = 8,
    batch_size = 64,
    max_epochs = 100,
    start_epoch = 0,
    learning_rate = lr,
    momentum = 0.9,
    weight_decay = 6e-5,
    dropout = 0.5,
    model_arch = model_arch,
    depth_wise = 'ds' in model_arch,
    pretrained = False,
    dataset = "pokemon",
    device = device
)

wandb.config.update(hyperparams)

data = "drive/My Drive/training/pokemon_data/"
wc = wandb.config


In [None]:
class ImbalancedDatasetSampler(torch.utils.data.sampler.Sampler):
    """Samples elements randomly from a given list of indices for imbalanced dataset
    Arguments:
        indices (list, optional): a list of indices
        num_samples (int, optional): number of samples to draw
        callback_get_label func: a callback-like function which takes two arguments - dataset and index
    """

    def __init__(self, dataset, indices=None, num_samples=None, callback_get_label=None):
                
        # if indices is not provided, 
        # all elements in the dataset will be considered
        self.indices = list(range(len(dataset))) \
            if indices is None else indices

        # define custom callback
        self.callback_get_label = callback_get_label

        # if num_samples is not provided, 
        # draw `len(indices)` samples in each iteration
        self.num_samples = len(self.indices) \
            if num_samples is None else num_samples
            
        # distribution of classes in the dataset 
        label_to_count = {}
        for idx in self.indices:
            label = self._get_label(dataset, idx)
            if label in label_to_count:
                label_to_count[label] += 1
            else:
                label_to_count[label] = 1
                
        # weight for each sample
        weights = [1.0 / label_to_count[self._get_label(dataset, idx)]
                   for idx in self.indices]
        self.weights = torch.DoubleTensor(weights)

    def _get_label(self, dataset, idx):
        if isinstance(dataset, torchvision.datasets.MNIST):
            return dataset.train_labels[idx].item()
        elif isinstance(dataset, torchvision.datasets.ImageFolder):
            return dataset.imgs[idx][1]
        elif isinstance(dataset, torch.utils.data.Subset):
            return dataset.dataset.imgs[idx][1]
        elif self.callback_get_label:
            return self.callback_get_label(dataset, idx)
        else:
            raise NotImplementedError
                
    def __iter__(self):
        return (self.indices[i] for i in torch.multinomial(
            self.weights, self.num_samples, replacement=True))

    def __len__(self):
        return self.num_samples

In [None]:
def make_weights_for_balanced_classes(images, nclasses):                        
    count = [0] * nclasses                                                      
    for item in images:                                                         
        count[item[1]] += 1                                                     
    weight_per_class = [0.] * nclasses                                      
    N = float(sum(count))                                                   
    for i in range(nclasses):                                                   
        weight_per_class[i] = N/float(count[i])                                 
    weight = [0] * len(images)                                              
    for idx, val in enumerate(images):                                          
        weight[idx] = weight_per_class[val[1]]                                  
    return weight  

In [None]:
ki = None
kj = None
def train(train_loader, model, criterion, optimizer, epoch):
    """
    Train an epoch
    """
    global ki, kj
    model.train() 
    epoch_loss = []
    epoch_accuracy = []

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        input = input.to(device)
        target = target.to(device)

        output = model(input)
        ki = target
        kj = output
        loss = criterion(output, target)
        epoch_loss.append(loss.item())
        

        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        epoch_accuracy.append(acc1.item())


        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        wandb.log({"train_loss_per_iteration": loss.item(),
                   "train_accuracy_per_iteration": acc1.item()})
        break

    wandb.log({"train_loss_per_epoch": mean(epoch_loss),
               "train_accuracy_per_epoch": mean(epoch_accuracy)})

        

In [None]:
def validate(val_loader, model, criterion):
    model.eval()
    epoch_loss = []
    epoch_accuracy = []

    with torch.no_grad():
        for i, (input, target) in enumerate(val_loader):
            input = input.to(device)
            target = target.to(device)

            # compute output
            output = model(input)
            loss = criterion(output, target)
            epoch_loss.append(loss.item())

            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            epoch_accuracy.append(acc1.item())


            wandb.log({"val_loss_per_iteration": loss.item(),
                      "val_accuracy_per_iteration": acc1.item()})

    wandb.log({"val_loss_per_epoch": mean(epoch_loss),
               "val_accuracy_per_epoch": mean(epoch_accuracy)})

    return mean(epoch_accuracy)

In [None]:
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')

In [None]:
def adjust_learning_rate(optimizer, epoch, lr):
    #Cosine learning rate decay
    lr = 0.5 * lr  * (1 + np.cos(np.pi * (epoch)/ wc.max_epochs ))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
    return lr

In [None]:
def accuracy(output, target, topk=(1,)):
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))
        print(pred)

        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

In [None]:
def weights_init(m):
    for key in m.state_dict():
        if key.split('.')[-1] == 'weight':
            if 'conv' in key:
                init.xavier_normal_(m.state_dict()[key])
            if 'bn' in key:
                m.state_dict()[key][...] = 1
        elif key.split('.')[-1] == 'bias':
            m.state_dict()[key][...] = 0

## HardNet Network Definitions

In [None]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F

class Flatten(nn.Module):
    def __init__(self):
        super().__init__()
    def forward(self, x):
        return x.view(x.data.size(0),-1)



class CombConvLayer(nn.Sequential):
    def __init__(self, in_channels, out_channels, kernel=1, stride=1, dropout=0.1, bias=False):
        super().__init__()
        self.add_module('layer1',ConvLayer(in_channels, out_channels, kernel))
        self.add_module('layer2',DWConvLayer(out_channels, out_channels, stride=stride))
        
    def forward(self, x):
        return super().forward(x)

class DWConvLayer(nn.Sequential):
    def __init__(self, in_channels, out_channels,  stride=1,  bias=False):
        super().__init__()
        out_ch = out_channels
        
        groups = in_channels
        kernel = 3
        #print(kernel, 'x', kernel, 'x', out_channels, 'x', out_channels, 'DepthWise')
        
        self.add_module('dwconv', nn.Conv2d(groups, groups, kernel_size=3,
                                          stride=stride, padding=1, groups=groups, bias=bias))
        self.add_module('norm', nn.BatchNorm2d(groups))
    def forward(self, x):
        return super().forward(x)  

class ConvLayer(nn.Sequential):
    def __init__(self, in_channels, out_channels, kernel=3, stride=1, dropout=0.1, bias=False):
        super().__init__()
        out_ch = out_channels
        groups = 1
        #print(kernel, 'x', kernel, 'x', in_channels, 'x', out_channels)
        self.add_module('conv', nn.Conv2d(in_channels, out_ch, kernel_size=kernel,          
                                          stride=stride, padding=kernel//2, groups=groups, bias=bias))
        self.add_module('norm', nn.BatchNorm2d(out_ch))
        self.add_module('relu', nn.ReLU6(True))                                          
    def forward(self, x):
        return super().forward(x)


class HarDBlock(nn.Module):
    def get_link(self, layer, base_ch, growth_rate, grmul):
        if layer == 0:
          return base_ch, 0, []
        out_channels = growth_rate
        link = []
        for i in range(10):
          dv = 2 ** i
          if layer % dv == 0:
            k = layer - dv
            link.append(k)
            if i > 0:
                out_channels *= grmul
        out_channels = int(int(out_channels + 1) / 2) * 2
        in_channels = 0
        for i in link:
          ch,_,_ = self.get_link(i, base_ch, growth_rate, grmul)
          in_channels += ch
        return out_channels, in_channels, link

    def get_out_ch(self):
        return self.out_channels

    def __init__(self, in_channels, growth_rate, grmul, n_layers, keepBase=False, residual_out=False, dwconv=False):
        super().__init__()
        self.keepBase = keepBase
        self.links = []
        layers_ = []
        self.out_channels = 0 # if upsample else in_channels
        for i in range(n_layers):
          outch, inch, link = self.get_link(i+1, in_channels, growth_rate, grmul)
          self.links.append(link)
          use_relu = residual_out
          if dwconv:
            layers_.append(CombConvLayer(inch, outch))
          else:
            layers_.append(ConvLayer(inch, outch))
          
          if (i % 2 == 0) or (i == n_layers - 1):
            self.out_channels += outch
        #print("Blk out =",self.out_channels)
        self.layers = nn.ModuleList(layers_)
        
    def forward(self, x):
        layers_ = [x]
        
        for layer in range(len(self.layers)):
            link = self.links[layer]
            tin = []
            for i in link:
                tin.append(layers_[i])
            if len(tin) > 1:            
                x = torch.cat(tin, 1)
            else:
                x = tin[0]
            out = self.layers[layer](x)
            layers_.append(out)
            
        t = len(layers_)
        out_ = []
        for i in range(t):
          if (i == 0 and self.keepBase) or \
             (i == t-1) or (i%2 == 1):
              out_.append(layers_[i])
        out = torch.cat(out_, 1)
        return out
        
        
        
        
class HarDNet(nn.Module):
    def __init__(self, depth_wise=False, arch=85, pretrained=True, weight_path=''):
        super().__init__()
        first_ch  = [32, 64]
        second_kernel = 3
        max_pool = True
        grmul = 1.7
        drop_rate = 0.1
        num_outputs = 150
        
        #HarDNet68
        ch_list = [  128, 256, 320, 640, 1024]
        gr       = [  14, 16, 20, 40,160]
        n_layers = [   8, 16, 16, 16,  4]
        downSamp = [   1,  0,  1,  1,  0]
        
        if arch==85:
          #HarDNet85
          first_ch  = [48, 96]
          ch_list = [  192, 256, 320, 480, 720, 1280]
          gr       = [  24,  24,  28,  36,  48, 256]
          n_layers = [   8,  16,  16,  16,  16,   4]
          downSamp = [   1,   0,   1,   0,   1,   0]
          drop_rate = 0.2
        elif arch==39:
          #HarDNet39
          first_ch  = [24, 48]
          ch_list = [  96, 320, 640, 1024]
          grmul = 1.6
          gr       = [  16,  20, 64, 160]
          n_layers = [   4,  16,  8,   4]
          downSamp = [   1,   1,  1,   0]
          
        if depth_wise:
          second_kernel = 1
          max_pool = False
          drop_rate = 0.05
        
        blks = len(n_layers)
        self.base = nn.ModuleList([])

        # First Layer: Standard Conv3x3, Stride=2
        self.base.append (
             ConvLayer(in_channels=3, out_channels=first_ch[0], kernel=3,
                       stride=2,  bias=False) )
  
        # Second Layer
        self.base.append ( ConvLayer(first_ch[0], first_ch[1],  kernel=second_kernel) )
        
        # Maxpooling or DWConv3x3 downsampling
        if max_pool:
          self.base.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
        else:
          self.base.append ( DWConvLayer(first_ch[1], first_ch[1], stride=2) )

        # Build all HarDNet blocks
        ch = first_ch[1]
        for i in range(blks):
            blk = HarDBlock(ch, gr[i], grmul, n_layers[i], dwconv=depth_wise)
            ch = blk.get_out_ch()
            self.base.append ( blk )
            
            if i == blks-1 and arch == 85:
                self.base.append ( nn.Dropout(0.1))
            
            self.base.append ( ConvLayer(ch, ch_list[i], kernel=1) )
            ch = ch_list[i]
            if downSamp[i] == 1:
              if max_pool:
                self.base.append(nn.MaxPool2d(kernel_size=2, stride=2))
              else:
                self.base.append ( DWConvLayer(ch, ch, stride=2) )
            
        
        ch = ch_list[blks-1]
        self.base.append (
            nn.Sequential(
                nn.AdaptiveAvgPool2d((1,1)),
                Flatten(),
                nn.Dropout(drop_rate),
                nn.Linear(ch, num_outputs) ))
                
        
        if pretrained:
          if hasattr(torch, 'hub'):
          
            if arch == 68 and not depth_wise:
              checkpoint = 'https://ping-chao.com/hardnet/hardnet68-5d684880.pth'
            elif arch == 85 and not depth_wise:
              checkpoint = 'https://ping-chao.com/hardnet/hardnet85-a28faa00.pth'
            elif arch == 68 and depth_wise:
              checkpoint = 'https://ping-chao.com/hardnet/hardnet68ds-632474d2.pth'
            else:
              checkpoint = 'https://ping-chao.com/hardnet/hardnet39ds-0e6c6fa9.pth'

            self.load_state_dict(torch.hub.load_state_dict_from_url(checkpoint, progress=False))
          else:
            postfix = 'ds' if depth_wise else ''
            weight_file = '%shardnet%d%s.pth'%(weight_path, arch, postfix)            
            if not os.path.isfile(weight_file):
              print(weight_file,'is not found')
              exit(0)
            weights = torch.load(weight_file)
            self.load_state_dict(weights)
          
          postfix = 'DS' if depth_wise else ''
          print('ImageNet pretrained weights for HarDNet%d%s is loaded'%(arch, postfix))
          
    def forward(self, x):
        for layer in self.base:
          x = layer(x)
        return x
        
        

In [None]:

def hardnet68(pretrained=False, **kwargs):
    """ # This docstring shows up in hub.help()
    Harmonic DenseNet 68 model
    pretrained (bool): kwargs, load pretrained weights into the model
    """
    # Call the model, load pretrained weights
    model = HarDNet(depth_wise=False, arch=68, pretrained=pretrained)
    return model

def hardnet85(pretrained=False, **kwargs):
    """ # This docstring shows up in hub.help()
    Harmonic DenseNet 85 model
    pretrained (bool): kwargs, load pretrained weights into the model
    """
    # Call the model, load pretrained weights
    model = HarDNet(depth_wise=False, arch=85, pretrained=pretrained)
    return model

def hardnet68ds(pretrained=False, **kwargs):
    """ # This docstring shows up in hub.help()
    Harmonic DenseNet 68ds (Depthwise Separable) model
    pretrained (bool): kwargs, load pretrained weights into the model
    """
    # Call the model, load pretrained weights
    model = HarDNet(depth_wise=True, arch=68, pretrained=pretrained)
    return model

def hardnet39ds(pretrained=False, **kwargs):
    """ # This docstring shows up in hub.help()
    Harmonic DenseNet 68ds (Depthwise Separable) model
    pretrained (bool): kwargs, load pretrained weights into the model
    """
    # Call the model, load pretrained weights
    model = HarDNet(depth_wise=True, arch=39, pretrained=pretrained)
    return model

In [None]:
## Load model and Start Training
is_best = False
best_acc1 = 0.0
model = HarDNet(wc.depth_wise, wc.model_arch, pretrained=wc.pretrained)
wandb.watch(model)

model.cuda()


# define loss function (criterion) and optimizer
criterion = nn.CrossEntropyLoss().cuda()

optimizer = torch.optim.SGD(model.parameters(), lr,
                            momentum=wc.momentum,
                            nesterov=True,
                            weight_decay=wc.weight_decay)

model.apply(weights_init)

total_params = sum(p.numel() for p in model.parameters())

print( "Parameters=", total_params )
cudnn.benchmark = True

# Data loading code
traindir = os.path.join(data)
valdir = os.path.join(data)
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                              std=[0.229, 0.224, 0.225])

train_dataset = datasets.ImageFolder(traindir,
                                     transforms.Compose([
                                          transforms.RandomResizedCrop(224),
                                          transforms.RandomHorizontalFlip(),
                                          transforms.ToTensor(),
                                          normalize,
                                      ]))

# For unbalanced dataset we create a weighted sampler                       
weights = make_weights_for_balanced_classes(train_dataset.imgs, len(train_dataset.classes))                                                                
weights = torch.DoubleTensor(weights)                                       
train_sampler = torch.utils.data.sampler.WeightedRandomSampler(weights, len(weights)) 

# train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)


train_loader = torch.utils.data.DataLoader(train_dataset, 
                                           batch_size=wc.batch_size, 
                                           num_workers=wc.workers, 
                                           pin_memory=True,
                                           sampler=train_sampler)

val_dataset = datasets.ImageFolder(valdir, 
                                   transforms.Compose([
                                   transforms.Resize(256),
                                   transforms.CenterCrop(224),
                                   transforms.ToTensor(),
                                   normalize,
                                ]))

val_loader = torch.utils.data.DataLoader(val_dataset, 
                                         batch_size=wc.batch_size, 
                                         shuffle=False,
                                         num_workers=wc.workers, 
                                         pin_memory=True)

# validate(val_loader, model, criterion)
# return

for epoch in range(wc.start_epoch, wc.max_epochs):
  # train_sampler.set_epoch(epoch)
  lr = adjust_learning_rate(optimizer, epoch, lr)
  wandb.log({"learning_rate": lr})

  train(train_loader, model, criterion, optimizer, epoch)

  acc1 = validate(val_loader, model, criterion)

  # remember best acc@1 and save checkpoint
  is_best = acc1 > best_acc1
  best_acc1 = max(acc1, best_acc1)

  save_checkpoint({
      'epoch': epoch + 1,
      'arch': model_arch,
      'state_dict': model.state_dict(),
      'best_acc1': best_acc1,
      'optimizer' : optimizer.state_dict(),
  }, is_best)

In [None]:
from torchviz import make_dot


In [None]:
!pip install torchviz

In [None]:
x = torch.zeros(1, 3, 224, 224, dtype=torch.float, requires_grad=False).to(wc.device)
out = model(x)
make_dot(out)
