<a href="https://colab.research.google.com/github/vishal-burman/PyTorch-Architectures/blob/master/DenseNet_121_CIFAR10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import time

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Subset

from torchvision import datasets
from torchvision import transforms

if torch.cuda.is_available():
  torch.backends.cudnn.deterministic = True

In [None]:
###################
# Model Settings
###################

# Hyperparameters
RANDOM_SEED = 1
LEARNING_RATE = 0.001
BATCH_SIZE = 128
NUM_EPOCHS = 20

# Architecture
NUM_CLASSES = 10

# Other
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
grayscale = False

In [None]:
####################
# CIFAR-10 Dataset
####################

# training samples ~ 48000 samples
train_indices = torch.arange(0, 48000)
# validation samples ~ 2000 samples
valid_indices = torch.arange(48000, 50000)

train_and_valid = datasets.CIFAR10(root='data',
                                   train=True,
                                   transform=transforms.ToTensor(),
                                   download=True)

train_dataset = Subset(train_and_valid, train_indices)
valid_dataset = Subset(train_and_valid, valid_indices)
test_dataset = datasets.CIFAR10(root="data",
                                train=False,
                                transform=transforms.ToTensor(),
                                download=False)

train_loader = DataLoader(dataset=train_dataset,
                          batch_size=BATCH_SIZE,
                          num_workers=4,
                          shuffle=True)

valid_loader = DataLoader(dataset=valid_dataset,
                          batch_size=BATCH_SIZE,
                          num_workers=4,
                          shuffle=False)

test_loader = DataLoader(dataset=test_dataset,
                         batch_size=BATCH_SIZE,
                         num_workers=4,
                         shuffle=False)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/cifar-10-python.tar.gz to data


In [None]:
# Sanity check the loaders
torch.manual_seed(0)

for epoch in range(2):
  for batch_idx, (features, label) in enumerate(train_loader):

    print("Epoch: %d | Batch: %d | Batch Size: %d" % (epoch+1, batch_idx, label.size()[0]))

    features = features.to(device)
    label = label.to(device)
    break

Epoch: 1 | Batch: 0 | Batch Size: 128
Epoch: 2 | Batch: 0 | Batch Size: 128


In [None]:
# Sanity check shuffling
# label indices should be in different order
# label order should be different in second epoch

for images, labels in train_loader:
  pass
print(labels[:10])

for images, labels in train_loader:
  pass
print(labels[:10])

tensor([3, 0, 1, 3, 3, 5, 0, 4, 9, 4])
tensor([1, 0, 4, 1, 8, 2, 0, 3, 5, 3])


In [None]:
# Sanity check ~ validation and test set should be diverse
# ~ should contain all classes

for images, labels in valid_loader:
  pass
print(labels[:10])

for images, labels in test_loader:
  pass
print(labels[:10])

tensor([5, 0, 3, 6, 8, 7, 9, 5, 6, 6])
tensor([7, 5, 8, 0, 8, 2, 7, 0, 3, 5])


In [None]:
#######################
# Model
######################

def _bn_function_factory(norm, relu, conv):
  def bn_function(*inputs):
    # TODO Add dimension changes
    concated_features = torch.cat(inputs, 1)
    bottleneck_output = conv(relu(norm(concated_features)))
    return bottleneck_output
  return bn_function

class _DenseLayer(nn.Sequential):
  def __init__(self, num_input_features, growth_rate, bn_size, drop_rate, memory_efficient=False):
    super(_DenseLayer, self).__init__()
    self.add_module('norm1', nn.BatchNorm2d(num_input_features)),
    self.add_module('relu1', nn.ReLU(inplace=True)),
    self.add_module('conv1', nn.Conv2d(in_channels=num_input_features,
                                       out_channels=bn_size * growth_rate,
                                       kernel_size=1,
                                       stride=1,
                                       bias=False)),
    self.add_module('norm2', nn.BatchNorm2d(bn_size * growth_rate)),
    self.add_module('relu2', nn.ReLU(inplace=True)),
    self.add_module('conv2', nn.Conv2d(in_channels=bn_size * growth_rate,
                                       out_channels=growth_rate,
                                       kernel_size=3,
                                       stride=1,
                                       padding=1,
                                       bias=False)),
    self.drop_rate = drop_rate
    self.memory_efficient = memory_efficient
  
  def forward(self, *prev_features):
    # TODO Add dimensions
    bn_function = _bn_function_factory(self.norm1, self,relu1, self.conv1)
    if self.memory_efficient and any(prev_feature.requires_grad for prev_feature in prev_efatures):
      bottleneck_output = cp.checkpoint(bn_function, *prev_features)
    else:
      bottleneck_output = bn_function(*prev_features)
    new_features = self.conv2(self.relu2(self.norm2(bottleneck_output)))
    if self.drop_rate > 0:
      new_features = F.dropout(new_features, p=self.drop_rate,
                               training=self.training)
    return new_features

In [None]:
class _DenseBlock(nn.Module):
  def __init__(self, num_layers, num_input_features, bn_size, growth_rate, drop_rate, memory_efficient=False):
    super(_DenseBlock, self).__init__()
    for i in range(num_layers):
      layer = _DenseLayer(
          num_input_features + i * growth_rate,
          growth_rate=growth_rate,
          bn_size=bn_size,
          drop_rate=drop_rate,
          memory_efficient=memory_efficient
      )
      self.add_module('denselayer%d' % (i + 1), layer)
  
  def forward(self, init_features):
    # TODO Add dimension changes
    features = [init_features]
    for name, layer in self.named_children():
      new_features = layer(*features)
      features.append(new_features)
    return torch.cat(features, 1)

In [None]:
class _Transition(nn.Sequential):
  def __init__(self, num_input_features, num_output_features):
    super(_Transition, self).__init__()
    self.add_module('norm', nn.BatchNorm2d(num_input_features))
    self.add_module('relu', nn.ReLU(inplace=True))
    self.add_module('conv', nn.Conv2d(in_channels=num_input_features,
                                      out_channels=num_output_features,
                                      kernel_size=1,
                                      stride=1,
                                      bias=False))
    self.add_module('pool', nn.AvgPool2d(kernel_size=2,
                                         stride=2))

In [None]:
class DenseNet121(nn.Module):
  def __init__(self, 
               growth_rate=32, 
               block_config=(6, 12, 24, 16),
               num_init_featuremaps=64,
               bn_size=4,
               drop_rate=0,
               num_classes=1000,
               memory_efficient=False):
    super(DenseNet121, self).__init__()

    # First Convolution
    if grayscale:
      in_channels = 1
    else:
      in_channels = 3
    
    self.features = nn.Sequential(OrderedDict([
                                               ('conv0', nn.Conv2d(in_channels=in_channels,
                                                                  out_channels=num_init_featuremaps,
                                                                  kernel_size=7,
                                                                  stride=2,
                                                                  padding=3,
                                                                  bias=False)),
                                               ('norm0', nn.BatchNorm2d(num_features=num_init_featuremaps)),
                                               ('relu0', nn.ReLU(inplace=True)),
                                               ('pool0', nn.MaxPool2d(kernel_size=3,
                                                                      stride=2,
                                                                      padding=1)),
    ]))

    # Each DenseBlock
    num_features = num_init_featuremaps
    for i, num_layers in enumerate(block_config):
      block = _DenseBlock(
          num_layers=num_layers,
          num_input_features=
      )


In [None]:
for (x, y) in train_loader:
  pass
print(x.shape)


torch.Size([128, 3, 32, 32])


In [None]:
conv_sample = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3)
x = conv_sample(x)

torch.Size([128, 64, 16, 16])


In [None]:
pool_sample = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
x = pool_sample(x)
print(x.shape)

torch.Size([128, 64, 8, 8])
