<a href="https://colab.research.google.com/github/vinayk19/Assignment/blob/master/CNN_Inception_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms
import torch.nn.functional as F
import time
import torch.nn as nn

In [0]:
train_data = datasets.MNIST(root = "MNIST/processed/training.pt", train = True, transform=transforms.ToTensor(), download= True)
test_data = datasets.MNIST(root = "MNIST/processed/test.pt", train = False, transform=transforms.ToTensor(), download = True)

In [0]:
train_loader = DataLoader(dataset = train_data, batch_size= 64, shuffle= True)
test_loader = DataLoader(dataset = test_data, batch_size= 64, shuffle= False)

In [83]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("device is ", device)

device is  cuda


In [0]:
# class inception(torch.nn.Module):
#   def __init__(self, in_channel):
#     super(inception, self).__init__()

#     self.in_channel = in_channel

#     self.b1x1 = torch.nn.Conv2d(in_channels= self.in_channel, out_channels=16, kernel_size=1 )

#     self.b241x1 = torch.nn.Conv2d(in_channels=self.in_channel, out_channels= 24, kernel_size=1 )

#     self.b5x5_1 = torch.nn.Conv2d(in_channels=self.in_channel, out_channels=16, kernel_size=1 )
#     self.b5x5_2 = torch.nn.Conv2d(in_channels=16, out_channels=24, kernel_size=5, padding =2 )

#     self.b3x3_1 = torch.nn.Conv2d(in_channels=self.in_channel, out_channels=16, kernel_size=1 )
#     self.b3x3_2 = torch.nn.Conv2d(in_channels=16, out_channels=24, kernel_size=3 )
#     self.b3x3_3 = torch.nn.Conv2d(in_channels=24, out_channels=24, kernel_size=3 )
    
#     def forward(self, image):
#       in_size = image.size(0)# it will give input shape

#       branch1x1 = self.b1x1(image)

#       branch_pool = F.avg_pool2d(image, kernel_size=3, stride=1, padding=1)
#       branch241x1 = self.b241x1(branch_pool)

#       branch5x5 = self.b5x5_2(self.b5x5_1(image))

#       branch3x3 = self.b3x3_3(self.b3x3_2(self.b3x3_1(image)))

#       output = [branch1x1, branch5x5, branch3x3, branch241x1] #what is sape of each

#       return torch.cat(output, 1)

In [0]:
class InceptionA(torch.nn.Module): #https://cs231n.github.io/convolutional-networks/
    def __init__(self, in_channel):
      super(InceptionA, self).__init__()

      self.in_channel = in_channel

      self.b1x1 = torch.nn.Conv2d(in_channels= self.in_channel, out_channels=16, kernel_size=1 )

      self.b241x1 = torch.nn.Conv2d(in_channels=self.in_channel, out_channels= 24, kernel_size=1 )

      self.b5x5_1 = torch.nn.Conv2d(in_channels=self.in_channel, out_channels=16, kernel_size=1 )
      self.b5x5_2 = torch.nn.Conv2d(in_channels=16, out_channels=24, kernel_size=5, padding =2 )

      self.b3x3_1 = torch.nn.Conv2d(in_channels=self.in_channel, out_channels=16, kernel_size=1 )
      self.b3x3_2 = torch.nn.Conv2d(in_channels=16, out_channels=24, kernel_size=3, padding = 1 )
      self.b3x3_3 = torch.nn.Conv2d(in_channels=24, out_channels=24, kernel_size=3, padding = 1 )
    
    def forward(self, image): # IMAGE [64, 10, 12, 12] [64, 20, 4, 4]) &  BS, channel, h w
      in_size = image.size(0)# it will give input shape
      # print("image detal", type(image), image.shape )

      branch1x1 = self.b1x1(image) # out 64 16 12 12 (W2=(W1−F+2P)/S)+1
      # print("1x1 details", type(branch1x1), branch1x1.shape )

      branch_pool = F.avg_pool2d(image, kernel_size=3, stride=1, padding=1) # 64 10 (W1−F)/S+1=12 12 how 12?  
      # print("brach pool 24 details", type(branch_pool), branch_pool.shape )
      branch241x1 = self.b241x1(branch_pool)  #out 64 24 12 12
      # print("brach24 details", type(branch241x1), branch241x1.shape )

      branch5x5 = self.b5x5_2(self.b5x5_1(image)) #out 64 16 12 12 :> 64 24 W2=(W1−F+2P)/S+1=(12-5+4) +1=12 12 
      # print("5x5 details", type(branch5x5), branch5x5.shape )

      branch3x3 = self.b3x3_3(self.b3x3_2(self.b3x3_1(image))) #1 64 16 12 12 :> 64 24 12 12 : 64 24 12 12 
      # print("3x3 details", type(branch3x3), branch3x3.shape )

      output = [branch1x1, branch5x5, branch3x3, branch241x1] # out [bs=64 chanell=[16+24+24+24]=88 12 12] 2nd 64 88 4 4 
      return torch.cat(output, 1)

In [0]:
class Net(torch.nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    self.conv1 = nn.Conv2d(1,10,kernel_size=5)
    self.conv2 = nn.Conv2d(88,20,kernel_size=5 ) #why 88 becase 88 is out from incept 1

    self.incept1 = InceptionA(in_channel=10)
    self.incept2 = InceptionA(in_channel=20)

    self.mp = nn.MaxPool2d(2)
    self.fc = nn.Linear(1408,10) # 64 88 4 4 : 88*4*4 =1408

  def forward (self,x): #image 64 1 28 28
    in_size = x.size(0) # 64
    x = F.relu(self.mp(self.conv1(x))) # mp(64 10 24 24) : > 64 10 12 12
    x = self.incept1(x) # input 64 10 12 12 & out 64 88 12 12
    # print("x incept1 detail", type(x), x.shape)
    x = F.relu(self.mp(self.conv2(x))) # mp (64 20 12 12):> 64 20 6 6 :> 64 20 4 4 ?
    x = self.incept2(x) # input 64 20 4 4 & out 64 88 4 4
    # print("x incept2 detail", type(x), x.shape)
    x = x.view(in_size, -1) 
    x = self.fc(x)
    return F.log_softmax(x)

  


In [0]:
# class mnist(torch.nn.Module):
#   def __init__(self):
#     super(mnist,self).__init__()
#     self.incepA= inception(input_channel=1)
#     self.linear = torch.nn.Linear(24,10)

#   def forward(self, image):
#     in_size = image.size(0)
#     outputA = self.incepA(image)
#     outputA = self.linear(outputA)
#     outputA = F.relu(outputA)
#     outputA = outputA.view(in_size, -1)
#     outputA = self.linear(outputA)
#     return F.log_softmax(x)


In [88]:
model = Net()
model.to(device)

Net(
  (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(88, 20, kernel_size=(5, 5), stride=(1, 1))
  (incept1): InceptionA(
    (b1x1): Conv2d(10, 16, kernel_size=(1, 1), stride=(1, 1))
    (b241x1): Conv2d(10, 24, kernel_size=(1, 1), stride=(1, 1))
    (b5x5_1): Conv2d(10, 16, kernel_size=(1, 1), stride=(1, 1))
    (b5x5_2): Conv2d(16, 24, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (b3x3_1): Conv2d(10, 16, kernel_size=(1, 1), stride=(1, 1))
    (b3x3_2): Conv2d(16, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (b3x3_3): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
  (incept2): InceptionA(
    (b1x1): Conv2d(20, 16, kernel_size=(1, 1), stride=(1, 1))
    (b241x1): Conv2d(20, 24, kernel_size=(1, 1), stride=(1, 1))
    (b5x5_1): Conv2d(20, 16, kernel_size=(1, 1), stride=(1, 1))
    (b5x5_2): Conv2d(16, 24, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (b3x3_1): Conv2d(20, 16, kernel_size=(1, 1), stride=

Loss

In [0]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=.01, momentum=.4)

trainig

In [0]:

def train(epoch):
  model.train()
  for Epoch in range(5):
    for batch_idx, (data, target) in enumerate(train_loader):
      data, target = data.to(device), target.to(device)
      optimizer.zero_grad()
      logit = model(data)
      loss = criterion(logit, target)
      loss.backward()
      optimizer.step()
      if batch_idx % 500  == 0:
        # print('Train Epoch: {} | Batch Status: {}/{} ({:.0f}% | Loss: {:.6f}', format(
        #     epoch, batch_idx*len(data), len(train_loader.dataset), 100. *batch_idx / len(train_loader), loss.item()
        # ))
        print('Train Epoch: {} | Batch Status: {}/{} ({:.0f}%) | Loss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

In [0]:
def test():
  model.eval()  # what is meaning of it ?
  with torch.no_grad(): # its not compulsary but it will save some merory
    test_loss =0
    correct = 0
    for data, target in test_loader: # enumeration is not required as this is a single batch
      data, target = data.to(device), target.to(device)
      output = model(data)  # output.shape = 64,10
      #sum up the match loss
      test_loss += criterion(output, target).item()
      #get the index of the max value
      pred = output.data.max(1, keepdim=True)[1]  #output.data is ouput data/value ? what is max function https://www.journaldev.com/39463/pytorch-torch-max
      #1 is axis: 0 is row (ie colum wise max:> 1, 10, and 1 is colum ie row wise max)
      # provides max data and and its index
      correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    test_loss /= len(test_loader.dataset)
    print(f'===========================\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} '
            f'({100. * correct / len(test_loader.dataset):.0f}%)')
    # print(f'====================\n test set : Average loss: {test_loss: .4f}, Accuracy: {correct}/{len(test_loader.dataset)} ' 
            # f'({100. * correct/len(test_loader.dataset):. 0f}%)')
            
  #         ===========================
  # Test set: Average loss: 0.0016, Accuracy: 9787/10000 (98%)
  # Testing timr: 0m 25s

In [92]:
if __name__ == '__main__':
  since = time.time()
  for epoch in range(1,4):
    epoch_start = time.time()
    train(epoch)
    m, s = divmod(time.time() - epoch_start, 60)
    print(f'Training timr: {m:.0f}m {s:.0f}s')
    test()
    m, s = divmod(time.time() - epoch_start, 60)
    print(f'Testing timr: {m:.0f}m {s:.0f}s')
  m, s = divmod(time.time() - since, 60)
  print(f'Total Time: {m:.0f}m {s:.0f}s\nModel was trained on {device}!')

Train Epoch: 1 | Batch Status: 0/60000 (0%) | Loss: 2.308220




Train Epoch: 1 | Batch Status: 32000/60000 (53%) | Loss: 0.490040
Train Epoch: 1 | Batch Status: 0/60000 (0%) | Loss: 0.178098
Train Epoch: 1 | Batch Status: 32000/60000 (53%) | Loss: 0.139638
Train Epoch: 1 | Batch Status: 0/60000 (0%) | Loss: 0.062934
Train Epoch: 1 | Batch Status: 32000/60000 (53%) | Loss: 0.097600
Train Epoch: 1 | Batch Status: 0/60000 (0%) | Loss: 0.091277
Train Epoch: 1 | Batch Status: 32000/60000 (53%) | Loss: 0.123872
Train Epoch: 1 | Batch Status: 0/60000 (0%) | Loss: 0.036129
Train Epoch: 1 | Batch Status: 32000/60000 (53%) | Loss: 0.117809
Training timr: 1m 19s
Test set: Average loss: 0.0010, Accuracy: 9801/10000 (98%)
Testing timr: 1m 20s
Train Epoch: 2 | Batch Status: 0/60000 (0%) | Loss: 0.023398
Train Epoch: 2 | Batch Status: 32000/60000 (53%) | Loss: 0.079696
Train Epoch: 2 | Batch Status: 0/60000 (0%) | Loss: 0.041699
Train Epoch: 2 | Batch Status: 32000/60000 (53%) | Loss: 0.059996
Train Epoch: 2 | Batch Status: 0/60000 (0%) | Loss: 0.079186
Train Epo

KeyboardInterrupt: ignored