<a href="https://colab.research.google.com/github/vishal-burman/PyTorch-Architectures/blob/master/DenseNet_121_CIFAR10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import time

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Subset

from torchvision import datasets
from torchvision import transforms

import matplotlib.pyplot as plt
from PIL import Image

if torch.cuda.is_available():
    torch.backends.cudnn.deterministic = True

In [2]:
#####################
# Settings
#####################

# Hyperparameters
random_seed = 1
learning_rate = 0.001
batch_size = 128
num_epochs = 20

# Architecture
num_classes = 10

# Other
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
grayscale = False

In [3]:
####################
# CIFAR-10
####################

train_indices = torch.arange(0, 48000)
valid_indices = torch.arange(48000, 50000)

train_and_valid = datasets.CIFAR10(root='data', 
                                   train=True, 
                                   transform=transforms.ToTensor(), 
                                   download=True)

train_dataset = Subset(train_and_valid, train_indices)
valid_dataset = Subset(train_and_valid, valid_indices)

test_dataset = datasets.CIFAR10(root='data', 
                                train=False, 
                                transform=transforms.ToTensor(), 
                                download=False)

train_loader = DataLoader(dataset=train_dataset, 
                          batch_size=batch_size, 
                          num_workers=4, 
                          shuffle=True)
valid_loader = DataLoader(dataset=valid_dataset, 
                          batch_size=batch_size, 
                          num_workers=4, 
                          shuffle=False)

test_loader = DataLoader(dataset=test_dataset, 
                         batch_size=batch_size, 
                         num_workers=4, 
                         shuffle=False)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/cifar-10-python.tar.gz to data


In [4]:
torch.manual_seed(0)

for epoch in range(2):
    for batch_idx, (x, y) in enumerate(train_loader):
        print('Epoch: %d | Batch index: %d | Batch size: %d' % (epoch+1, batch_idx, y.size()[0]))
        x = x.to(device)
        y = y.to(device)
        break

Epoch: 1 | Batch index: 0 | Batch size: 128
Epoch: 2 | Batch index: 0 | Batch size: 128


In [5]:
# Check if shuffling works properly

for images, labels in train_loader:
    pass
print(labels[:10])

for images, labels in train_loader:
    pass
print(labels[:10])

tensor([3, 0, 1, 3, 3, 5, 0, 4, 9, 4])
tensor([1, 0, 4, 1, 8, 2, 0, 3, 5, 3])


In [6]:
# Check if validation set and test set are diverse

for images, labels in valid_loader:
    pass
print(labels[:10])

for images, labels in test_loader:
    pass
print(labels[:10])

tensor([5, 0, 3, 6, 8, 7, 9, 5, 6, 6])
tensor([7, 5, 8, 0, 8, 2, 7, 0, 3, 5])


In [8]:
############################
# Model
###########################

import re
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.checkpoint as cp
from collections import OrderedDict

# Maybe hacky
def _bn_function_factory(norm, relu, conv):
    def bn_function(*inputs):
        concated_features = torch.cat(inputs, 1)
        bottleneck_output = conv(relu(norm(concated_features)))
        return bottleneck_output
    return bn_function

class _DenseLayer(nn.Sequential):
  def __init__(self, num_input_features, growth_rate, bn_size, drop_rate, memory_efficient=False):
    super(_DenseLayer, self).__init__()
    
    self.add_module('norm1', nn.BatchNorm2d(num_input_features)),
    self.add_module('relu1', nn.ReLU(inplace=True)),
    self.add_module('conv1', nn.Conv2d(num_input_features,
                                       bn_size * growth_rate,
                                       kernel_size=1,
                                       stride=1,
                                       bias=False)),
    self.add_module('norm2', nn.BatchNorm2d(bn_size * growth_rate)),
    self.add_module('relu2', nn.ReLU(inplace=True)),
    self.add_module('conv2', nn.Conv2d(bn_size * growth_rate,
                                       growth_rate,
                                       kernel_size=3,
                                       stride=1,
                                       padding=1,
                                       bias=False))
    self.drop_rate = drop_rate
    self.memory_efficient = memory_efficient

  def forward(self, *prev_features):
    bn_function = _bn_function_factory(self.norm1, self.relu_1, self.conv1)
    if self.memory_efficient and any(prev_feature.requires_grad for prev_feature in prev_features):
      bottleneck_output = cp.checkpoint(bn_function, *prev_features)
    else:
      bottleneck_output = bn_function(*prev_features)
    new_features = self.conv2(self.relu2(self.norm2(bottleneck_output)))
    if self.drop_rate > 0:
      new_features = F.dropout(new_features, p=self.drop_rate, training=self.training)
    return new_features

In [9]:
class _DenseBlock(nn.Module):
  def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate, memory_efficient=False):
    super(_DenseBlock, self).__init__()
    for i in range(num_layers):
      layer = _DenseLayer(
          num_input_features + i * growth_rate,
          growth_rate=growth_rate,
          bn_size=bn_size,
          drop_rate=drop_rate,
          memory_efficient=memory_efficient
      )
      self.add_module('denselayer%d' % (i+1), layer)
    
  def forward(self, init_features):
    features = [init_features]
    for name, layer in self.named_children():
      new_features = layer(*features)
      features.append(new_features)
    return torch.cat(features, 1)

In [None]:
class _Transition(nn.Sequential):
  def __init__(self, num_input_features, num_output_features):
    super(_Transition, self).__init__()
    self.add_module('norm', nn.BatchNorm2d(num_input_features))
    self.add_module('relu', nn.ReLU(inplace=True))
    self.add_module('conv', nn.Conv2d(num_input_features, 
                                      num_output_features, 
                                      kernel_size=1, 
                                      stride=1, 
                                      bias=False))
    self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2))

class DenseNet121(nn.Module):
  def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16),
               num_init_featuremaps=64, bn_size=4, drop_rate=0, num_classes=10, memory_efficient=False,
               grayscale=False):
    super(DenseNet121, self).__init__()

    # First Convolution
    if grayscale:
      in_channels = 1
    else:
      in_channels = 3
    
    self.features = nn.Sequential(OrderedDict([
                                                ('conv0', nn.Conv2d(in_channels=in_channels,
                                                                    out_channels=num_init_featuremaps,
                                                                    kernel_size=7,
                                                                    stride=2,
                                                                    padding=3, bias=False)),
                                               ('norm0', nn.BatchNorm2d(num_init_featuremaps)),
                                               ('relu0', nn.ReLU(inplace=True)),
                                               ('pool0', nn.AvgPool2d(kernel_size=3, stride=2, padding=1)),
    ]))

    # Each denseblock
    num_features = num_init_featuremaps
    for i, num_layers in enumerate(block_config):
      block = _DenseBlock(
          num_layers=num_layers,
          num_input_features=num_features,
          bn_size=bn_size,
          growth_rate=growth_rate,
          memory_efficient=memory_efficient
      )
      self.features.add_module('denseblock%d' % (i+1), block)
      num_features = num_features + num_layers * growth_rate
      if i != len(block_config) - 1:
        trans = _Transition(num_input_features=num_features,
                            num_output_features=num_features//2)
        self.features.add_module('transition%d' % (i+1), trans)
        num_features = num_features // 2
    
    # Final Batch Norm
    self.features.add_module('norm5', nn.BatchNorm2d(num_features))

    # Linear Layer
    self.classifier = nn.Linear(num_features, num_classes)

    # Official init from torch repo
    for m in self.modules():
      if isinstance(m, Conv2d):
        nn.init.kaiming_normal_(m.weight)
      elif isinstance(m, nn.BatchNorm2d):
        nn.init.constant_(m.weight, 1)
        nn.init.constant_(m.bias, 0)
      elif instance(m, nn.Linear):
        nn.init.constant_(m.bias, 0)
    
  def forward(self, x):
    features = self.features(x)
    out = F.relu(features, inplace=True)
    out = F.