In [5]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

In [6]:
# Train Phase transformations
train_transforms = transforms.Compose([
                                      #transforms.ToPILImage(),
                                      #transforms.Resize((28, 28)),
                                      #transforms.ColorJitter(brightness=0.10, contrast=0.1, saturation=0.10, hue=0.1),
                                       #transforms.RandomRotation((-7.0, 7.0), fill=(0.13,)),
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.1307,), (0.3081,)), # The mean and std have to be sequences (e.g., tuples), therefore you should add a comma after the values. 
                                       # Note the difference between (0.1307) and (0.1307,)
                                       #transforms.RandomRotation((-5.0, 5.0), fill=(-0.42,)),
                                       transforms.RandomAffine((-5,5), translate=None, scale=None, shear=5, resample=False, fillcolor=(-0.42,))
                                       ])

# Test Phase transformations
test_transforms = transforms.Compose([
                                      #  transforms.Resize((28, 28)),
                                      #  transforms.ColorJitter(brightness=0.10, contrast=0.1, saturation=0.10, hue=0.1),
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.1307,), (0.3081,))
                                       ])

train = datasets.MNIST('./data', train=True, download=True, transform=train_transforms)
test = datasets.MNIST('./data', train=False, download=True, transform=test_transforms)

SEED = 1

# CUDA?
cuda = torch.cuda.is_available()
print("CUDA Available?", cuda)

# For reproducibility
torch.manual_seed(SEED)

if cuda:
    torch.cuda.manual_seed(SEED)

# dataloader arguments - something you'll fetch these from cmdprmt
dataloader_args = dict(shuffle=True, batch_size=64, num_workers=4, pin_memory=True) if cuda else dict(shuffle=True, batch_size=64)

# train dataloader
train_loader = torch.utils.data.DataLoader(train, **dataloader_args)

# test dataloader
test_loader = torch.utils.data.DataLoader(test, **dataloader_args)

CUDA Available? True


  "Argument resample is deprecated and will be removed since v0.10.0. Please, use interpolation instead"
  "Argument fillcolor is deprecated and will be removed since v0.10.0. Please, use fill instead"
  cpuset_checked))


In [7]:
# from tqdm.notebook import tqdm

#Use scheduler
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    # pbar = tqdm(train_loader)
    train_loss = 0
    correct = 0
    num_loops = 0
    criterion = nn.CrossEntropyLoss()
    # for batch_idx, (data, target) in enumerate(pbar):
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        # loss = F.nll_loss(output, target)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        # pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')
        train_loss += loss.item()
        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        correct += pred.eq(target.view_as(pred)).sum().item()
        num_loops +=1
 
    train_loss /= num_loops
    # scheduler.step()
    # scheduler.step(train_loss)
    hist_train_loss.append(train_loss)
    hist_train_acc.append(100. * correct / len(train_loader.dataset))
    print("Training Average loss: {:.6f}, Accuracy = ({:.6f}%)".format(train_loss, 100. * correct / len(train_loader.dataset)))

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    hist_test_loss.append(test_loss)
    hist_test_acc.append(100. * correct / len(test_loader.dataset))

    print('Test set: Average loss: {:.6f}, Accuracy: {}/{} ({:.6f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    
import numpy as np
import matplotlib.pyplot as plt

plt.rcParams['figure.figsize'] = [15, 6]

def training_curves():
  plt.subplot(1,2, 1)

  plt.plot(np.array(hist_test_acc))
  plt.plot(np.array(hist_train_acc), 'r')
  plt.legend(["test_acc", "train_acc"])
  plt.title("Accuracy per epoch")

  plt.subplot(1,2,2)
  plt.plot(hist_test_loss)
  plt.plot(hist_train_loss, 'r')
  plt.legend(["test_loss", "train_loss"])
  plt.title("Loss per epoch")

In [8]:
class Net1(nn.Module):

    def __init__(self):
      
        super(Net1, self).__init__()

        # Input Block
        self.convblock1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=8, kernel_size=(3, 3), padding=0, bias=False), #
            nn.ReLU(),
            # LNorm()
            # nn.LayerNorm((8,26,26)),
            nn.BatchNorm2d(8),
        ) # output_size = 26

        # TRANSITION BLOCK 1
        self.pool1 = nn.MaxPool2d(2, 2) # output_size = 13

        # CONVOLUTION BLOCK 2
        self.convblock2 = nn.Sequential(
            nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3, 3), padding=0, bias=False),
            nn.ReLU(),
            # nn.LayerNorm((16,11,11)),
            nn.BatchNorm2d(16),
        ) # output_size = 11

        # TRANSITION BLOCK 2
        self.pool2 = nn.MaxPool2d(2, 2) # output_size = 6

        self.convblock3 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(3, 3), padding=0, bias=False),
            nn.ReLU(),
            nn.BatchNorm2d(16),
            # nn.LayerNorm((16,3,3)),
        ) # output_size = 4

        self.gap = nn.Sequential(
            nn.AvgPool2d(kernel_size=3)
        ) # output_size = 1

        self.fc1 = nn.Linear(16, 10)

    def forward(self, x):
        x = self.convblock1(x)
        x = self.pool1(x)
        x = self.convblock2(x)
        x = self.pool2(x)
        x = self.convblock3(x)

        x = self.gap(x)
        x = x.view(-1, x.shape[1]*x.shape[2]*x.shape[3])
        x = self.fc1(x)
        x = x.view(-1, 10)
        return F.log_softmax(x, dim=-1)

model = Net1().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 26, 26]              72
              ReLU-2            [-1, 8, 26, 26]               0
       BatchNorm2d-3            [-1, 8, 26, 26]              16
         MaxPool2d-4            [-1, 8, 13, 13]               0
            Conv2d-5           [-1, 16, 11, 11]           1,152
              ReLU-6           [-1, 16, 11, 11]               0
       BatchNorm2d-7           [-1, 16, 11, 11]              32
         MaxPool2d-8             [-1, 16, 5, 5]               0
            Conv2d-9             [-1, 16, 3, 3]           2,304
             ReLU-10             [-1, 16, 3, 3]               0
      BatchNorm2d-11             [-1, 16, 3, 3]              32
        AvgPool2d-12             [-1, 16, 1, 1]               0
           Linear-13                   [-1, 10]             170
Total params: 3,778
Trainable params: 3

In [9]:
class Print(nn.Module):
    def forward(self, x):
        print(x.size())
        return x

def get_output_shape(model, image_dim):
    return model(torch.rand(*(image_dim))).data.shape

# def conv_output_shape(h_w, kernel_size=1, stride=1, pad=0, dilation=1):
#     from math import floor
#     if type(kernel_size) is not tuple:
#         kernel_size = (kernel_size, kernel_size)
#     h = floor( ((h_w[0] + (2 * pad) - ( dilation * (kernel_size[0] - 1) ) - 1 )/ stride) + 1)
#     w = floor( ((h_w[1] + (2 * pad) - ( dilation * (kernel_size[1] - 1) ) - 1 )/ stride) + 1)
#     return h, w

def conv_output_shape(h_w, kernel_size=1, stride=1, pad=0, dilation=1):
    """
    Utility function for computing output of convolutions
    takes a tuple of (h,w) and returns a tuple of (h,w)
    """
    
    if type(h_w) is not tuple:
        h_w = (h_w, h_w)
    
    if type(kernel_size) is not tuple:
        kernel_size = (kernel_size, kernel_size)
    
    if type(stride) is not tuple:
        stride = (stride, stride)
    
    if type(pad) is not tuple:
        pad = (pad, pad)
    
    h = (h_w[0] + (2 * pad[0]) - (dilation * (kernel_size[0] - 1)) - 1)// stride[0] + 1
    w = (h_w[1] + (2 * pad[1]) - (dilation * (kernel_size[1] - 1)) - 1)// stride[1] + 1
    
    return h, w

In [10]:
x, y = conv_output_shape(28, 3, 1,0,1)
x, y


(26, 26)

In [11]:
cfg = [8, "M", 16, 'M', 16]


cnt=0
for x in cfg:
  cnt+= 1
  print(cnt)
  len(cfg)
  if cnt == len(cfg):
    print('reached the end')

1
2
3
4
5
reached the end


In [75]:
cfg = {
    'basic': [8, "M", 16, 'M', 16]
}


class Building_Net(nn.Module):
  def __init__(self, m_name, img):
    super(Building_Net, self).__init__()
    self.shape = img.shape[1:]
    print(self.shape)
    self.features = self._make_layers(cfg[m_name], self.shape)

    self.fc1 = nn.Linear(16, 10)


  def _make_layers(self, cfg, initial_shape):
    layers = []
    in_channels = 1
    updated_shape = initial_shape[1]
    # print('updated shape', updated_shape)
    cnt=0
    
    for x in cfg:
      cnt += 1
      if x == 'M':
          layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
          updated_shape = conv_output_shape(updated_shape, 2, 2)
          # print('max pool', updated_shape)

      else:
          layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=0, bias=False)]
          layers += [nn.ReLU()]
          # if cnt != len(cfg):
          layers += [nn.BatchNorm2d(x)]
          updated_shape = conv_output_shape(updated_shape, 3, 1)
          # print('conv ', updated_shape)
                      
          in_channels = x

          # print(layers)
      
    layers += [nn.AvgPool2d(kernel_size=3)]
    return nn.Sequential(*layers)


  def forward(self,x):
    x = self.features(x)
    # x = self.gap(x)
    x = x.view(-1, x.shape[1]*x.shape[2]*x.shape[3])
    x = self.fc1(x)
    x = x.view(-1, 10)
    return F.log_softmax(x, dim=-1)



In [74]:
model2 = Building_Net('basic',torch.rand(2,1,28,28)).to(device)
summary(model2, input_size=(1, 28, 28))

torch.Size([1, 28, 28])
[Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), bias=False), ReLU(), BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)]
[Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), bias=False), ReLU(), BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), bias=False), ReLU(), BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)]
[Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), bias=False), ReLU(), BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), bias=False), ReLU(), BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=Fals

In [67]:
model2

Building_Net(
  (features): Sequential(
    (0): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (1): ReLU()
    (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (5): ReLU()
    (6): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (9): ReLU()
    (10): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): AvgPool2d(kernel_size=3, stride=3, padding=0)
  )
  (fc1): Linear(in_features=16, out_features=10, bias=True)
)

In [68]:
model

Net1(
  (convblock1): Sequential(
    (0): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (1): ReLU()
  )
  (bn1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (convblock2): Sequential(
    (0): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (1): ReLU()
  )
  (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (convblock3): Sequential(
    (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (1): ReLU()
  )
  (gap): Sequential(
    (0): AvgPool2d(kernel_size=3, stride=3, padding=0)
  )
  (fc1): Linear(in_features=16, out_features=10, bias=True)
)

In [69]:
hist_test_loss = []
hist_test_acc = []
hist_train_loss = []
hist_train_acc = []

In [70]:
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.optim.lr_scheduler import ExponentialLR

optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9,)

scheduler =ReduceLROnPlateau(optimizer=optimizer, patience=2, verbose=True)

for epoch in range(1, 3):
    print("Epoch: ", epoch)
    train(model2, device, train_loader, optimizer, epoch)
    print("learning rate", optimizer.param_groups[0]['lr'])
    test(model2, device, test_loader)


Epoch:  1


  cpuset_checked))


Training Average loss: 2.361981, Accuracy = (9.371667%)
learning rate 0.1
Test set: Average loss: 2.359781, Accuracy: 1015/10000 (10.150000%)

Epoch:  2


KeyboardInterrupt: ignored

In [None]:
class Gen_BN(nn.Module):
    def forward(self, x):
        print(x.size())
        return x


In [85]:
class Net1(nn.Module):

    def conv_block(self, in_features, out_features, kernel_size, BN_flag):
      layers = []
      layers = [nn.Conv2d(in_features, out_features, kernel_size, padding=0, bias=False),
                nn.ReLU()]

      # pass the image as parameter and get the size of the image
      if BN_flag:
        layers.append(nn.BatchNorm2d(out_features))
      
      block = nn.Sequential(*layers)

      return block


    def __init__(self):
        super(Net1, self).__init__()

        self.convblock1 = self.conv_block(1,8,3,True)
        self.pool1 = nn.MaxPool2d(2, 2) # output_size = 13
        self.convblock2 = self.conv_block(8, 16, 3, True)
        self.pool2 = nn.MaxPool2d(2, 2) # output_size = 6
        self.convblock3 = self.conv_block(16, 16, 3, True)

        self.gap = nn.Sequential(
            nn.AvgPool2d(kernel_size=3)
        ) # output_size = 1

        self.fc1 = nn.Linear(16, 10)

    def forward(self, x):
        x = self.convblock1(x)
        x = self.pool1(x)
        x = self.convblock2(x)
        x = self.pool2(x)
        x = self.convblock3(x)

        x = self.gap(x)
        x = x.view(-1, x.shape[1]*x.shape[2]*x.shape[3])
        x = self.fc1(x)
        x = x.view(-1, 10)
        return F.log_softmax(x, dim=-1)

model = Net1().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 26, 26]              72
              ReLU-2            [-1, 8, 26, 26]               0
       BatchNorm2d-3            [-1, 8, 26, 26]              16
         MaxPool2d-4            [-1, 8, 13, 13]               0
            Conv2d-5           [-1, 16, 11, 11]           1,152
              ReLU-6           [-1, 16, 11, 11]               0
       BatchNorm2d-7           [-1, 16, 11, 11]              32
         MaxPool2d-8             [-1, 16, 5, 5]               0
            Conv2d-9             [-1, 16, 3, 3]           2,304
             ReLU-10             [-1, 16, 3, 3]               0
      BatchNorm2d-11             [-1, 16, 3, 3]              32
        AvgPool2d-12             [-1, 16, 1, 1]               0
           Linear-13                   [-1, 10]             170
Total params: 3,778
Trainable params: 3

In [86]:
model

Net1(
  (convblock1): Sequential(
    (0): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (1): ReLU()
    (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (convblock2): Sequential(
    (0): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (1): ReLU()
    (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (convblock3): Sequential(
    (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (1): ReLU()
    (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (gap): Sequential(
    (0): AvgPool2d(kernel_size=3, stride=3, padding=0)
  )
  (fc1): Linear(in_features=16, out_features=10, bias=True)
)

In [87]:
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.optim.lr_scheduler import ExponentialLR

optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9,)

scheduler =ReduceLROnPlateau(optimizer=optimizer, patience=2, verbose=True)

for epoch in range(1, 3):
    print("Epoch: ", epoch)
    train(model, device, train_loader, optimizer, epoch)
    print("learning rate", optimizer.param_groups[0]['lr'])
    test(model, device, test_loader)


Epoch:  1


  cpuset_checked))


Training Average loss: 0.160809, Accuracy = (95.123333%)
learning rate 0.1
Test set: Average loss: 0.062201, Accuracy: 9790/10000 (97.900000%)

Epoch:  2
Training Average loss: 0.070589, Accuracy = (97.746667%)
learning rate 0.1
Test set: Average loss: 0.063156, Accuracy: 9804/10000 (98.040000%)



In [221]:

'''VGG11/13/16/19 in Pytorch.'''
import torch
import torch.nn as nn


cfg = {
    'VGG11': [8 ,'M',16,  16, 'M', 16,16, 16, 'M', 16, 16, 16],
    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}


class VGG(nn.Module):
    def __init__(self, vgg_name):
        super(VGG, self).__init__()
        self.features = self._make_layers(cfg[vgg_name])
        self.classifier = nn.Linear(16, 10)

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        return out

    def _make_layers(self, cfg):
        layers = []
        in_channels = 1
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        layers += [nn.AvgPool2d(kernel_size=3, stride=1)]
        return nn.Sequential(*layers)


def test():
    net = VGG('VGG11')
    x = torch.randn(2,3,32,32)
    y = net(x)
    print(y.size())


In [222]:
model2 = VGG('VGG11').to(device)
summary(model2, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 28, 28]              80
       BatchNorm2d-2            [-1, 8, 28, 28]              16
              ReLU-3            [-1, 8, 28, 28]               0
         MaxPool2d-4            [-1, 8, 14, 14]               0
            Conv2d-5           [-1, 16, 14, 14]           1,168
       BatchNorm2d-6           [-1, 16, 14, 14]              32
              ReLU-7           [-1, 16, 14, 14]               0
            Conv2d-8           [-1, 16, 14, 14]           2,320
       BatchNorm2d-9           [-1, 16, 14, 14]              32
             ReLU-10           [-1, 16, 14, 14]               0
        MaxPool2d-11             [-1, 16, 7, 7]               0
           Conv2d-12             [-1, 16, 7, 7]           2,320
      BatchNorm2d-13             [-1, 16, 7, 7]              32
             ReLU-14             [-1, 1

In [223]:
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.optim.lr_scheduler import ExponentialLR

optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9,)

scheduler =ReduceLROnPlateau(optimizer=optimizer, patience=2, verbose=True)

for epoch in range(1, 3):
    print("Epoch: ", epoch)
    train(model2, device, train_loader, optimizer, epoch)
    print("learning rate", optimizer.param_groups[0]['lr'])
    # test(model2, device, test_loader)


Epoch:  1


  cpuset_checked))


Training Average loss: 2.342123, Accuracy = (9.566667%)
learning rate 0.1
Epoch:  2


Exception in thread Thread-78:
Traceback (most recent call last):
  File "/usr/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.7/dist-packages/torch/utils/data/_utils/pin_memory.py", line 25, in _pin_memory_loop
    r = in_queue.get(timeout=MP_STATUS_CHECK_INTERVAL)
  File "/usr/lib/python3.7/multiprocessing/queues.py", line 113, in get
    return _ForkingPickler.loads(res)
  File "/usr/local/lib/python3.7/dist-packages/torch/multiprocessing/reductions.py", line 282, in rebuild_storage_fd
    fd = df.detach()
  File "/usr/lib/python3.7/multiprocessing/resource_sharer.py", line 57, in detach
    with _resource_sharer.get_connection(self._id) as conn:
  File "/usr/lib/python3.7/multiprocessing/resource_sharer.py", line 87, in get_connection
    c = Client(address, authkey=process.current_process().authkey)
  File "/usr/lib/pytho

KeyboardInterrupt: ignored

In [None]:
import torch.nn as nn
import torch

In [None]:
input = torch.randn(20, 5, 10, 10)

In [None]:
input.shape

torch.Size([20, 5, 10, 10])

In [None]:
input.size()[1:]

torch.Size([5, 10, 10])

In [None]:
m = nn.LayerNorm(input.size()[1:])

In [None]:
m

LayerNorm((5, 10, 10), eps=1e-05, elementwise_affine=True)

In [None]:
output = m(input)

In [None]:
output.shape

torch.Size([20, 5, 10, 10])