<a href="https://colab.research.google.com/github/utkarshpandey6/Project-Report-1-/blob/main/Report.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch, gc
import torchvision
import numpy
import torch.nn as nn
import matplotlib.pyplot as plt
from torchvision.datasets import MNIST
from torch.utils.data import random_split, DataLoader
from torchvision import transforms
import torch.nn.functional as F
import tarfile
import os
from torchvision.datasets.utils import download_url
from torchvision.datasets import ImageFolder
from torchvision.transforms import ToTensor

In [None]:
class Self_Attn(nn.Module):
    """ Self attention Layer"""
    def __init__(self,in_dim,activation):
        super(Self_Attn,self).__init__()
        self.chanel_in = in_dim
        self.activation = activation
        
        self.query_conv = nn.Conv2d(in_channels = in_dim , out_channels = in_dim//8 , kernel_size= 1)
        self.key_conv = nn.Conv2d(in_channels = in_dim , out_channels = in_dim//8 , kernel_size= 1)
        self.value_conv = nn.Conv2d(in_channels = in_dim , out_channels = in_dim , kernel_size= 1)
        self.gamma = nn.Parameter(torch.zeros(1))

        self.softmax  = nn.Softmax(dim=-1) #
    def forward(self,x):
        """
            inputs :
                x : input feature maps( B X C X W X H)
            returns :
                out : self attention value + input feature 
                attention: B X N X N (N is Width*Height)
        """
        m_batchsize,C,width ,height = x.size()
        proj_query  = self.query_conv(x).view(m_batchsize,-1,width*height).permute(0,2,1) # B X CX(N)
        proj_key =  self.key_conv(x).view(m_batchsize,-1,width*height) # B X C x (*W*H)
        energy =  torch.bmm(proj_query,proj_key) # transpose check
        attention = self.softmax(energy) # BX (N) X (N) 
        proj_value = self.value_conv(x).view(m_batchsize,-1,width*height) # B X C X N

        out = torch.bmm(proj_value,attention.permute(0,2,1) )
        out = out.view(m_batchsize,C,width,height)
        
        out = self.gamma*out + x
        return out

In [None]:
class block(nn.Module):
    expansion = 4

    def __init__(self, in_planes, planes, stride=1):
        super(block, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, self.expansion *
                               planes, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(self.expansion*planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out



In [None]:
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

In [None]:

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

In [None]:
def ResNet34():
    return ResNet(BasicBlock, [3, 4, 6, 3])


def ResNet50():
    return ResNet(block, [3, 4, 6, 3])

In [None]:
#model = ResNet50().to('cuda')
model = ResNet34().to('cuda')
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=

In [None]:
data_dir = './data/cifar10'
from torchvision.datasets import ImageFolder
from torchvision.transforms import ToTensor
transform_train = transforms.Compose([                        
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
dataset = ImageFolder(data_dir+'/train', transform=transform_train)

In [None]:
def show_example(img, label):
    print('Label: ', dataset.classes[label], "("+str(label)+")")
    plt.imshow(img.permute(1, 2, 0))

In [None]:
def split(dataset, split=0.2):
  random_seed = 42
  torch.manual_seed(random_seed);
  val_set_len = int(len(dataset) * (split))
  train_set_len = int(len(dataset) - val_set_len)
  return random_split(dataset, [train_set_len, val_set_len])

In [None]:
train_set, val_set = split(dataset)
len(train_set) , len(val_set)

(40000, 10000)

In [None]:
batch_size=128

train_dl = DataLoader(train_set, batch_size, shuffle=True, num_workers=2, pin_memory=True)
val_dl = DataLoader(val_set, batch_size*2, num_workers=2, pin_memory=True)

In [None]:
def get_device():
  if torch.cuda.is_available():
    return torch.device("cuda")
  return torch.device("cpu")

def to_device(data, device):
  
  if isinstance(data, (list, tuple)):
    return [to_device(x, device) for x in data]
  return data.to(device, non_blocking = True)

class DataLoaderWrapper():
  def __init__(self, dl, device):
    self.dl = dl
    self.device = device

  def __iter__(self):
    for b in self.dl:
      yield to_device(b, self.device)


  def __len__(self):
    return len(self.dl)

In [None]:
train_loader = DataLoaderWrapper(train_dl,get_device())
val_loader = DataLoaderWrapper(val_dl,get_device())

In [None]:
def accuracy(outputs, labels):
  _, preds = torch.max(outputs, dim=1)
  return torch.tensor(torch.sum(preds == labels).item() / len(preds))

In [None]:
@torch.no_grad()
def evaluate(val_loader, model):
  val_losses = []
  acc_losses = []
  model.eval()
  for images, labels in val_loader:
     
      output = model(images)
      loss = F.cross_entropy(output, labels)
      acc = accuracy(output, labels)

      val_losses.append(loss)
      acc_losses.append(acc)
      
      

  epoch_loss = torch.stack(val_losses).mean()
  epoch_acc = torch.stack(acc_losses).mean()  
  
  return epoch_loss, epoch_acc

In [None]:
def fit(model, epoches, lr, train_loader, val_loader, opt=torch.optim.SGD):
  optimizer = torch.optim.SGD(model.parameters(), lr, momentum=0.9, weight_decay=5e-4 )
  scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)
  
  history = []


  for epoch in range(epoches):
    
    
    for images, labels in train_loader:
      model.train()
      output = model(images)
      loss = F.cross_entropy(output, labels)
      loss.backward()
      optimizer.step()
      optimizer.zero_grad()

    epoch_loss, epoch_acc = evaluate(val_loader,model)
    print("Epoch [{}], epoch_loss: {:.4f}, epoch_acc: {:.4f}".format(epoch, epoch_loss, epoch_acc))
    history.append({'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}) 

    scheduler.step()
  return history

In [None]:
history = fit(model, 10, 0.1, train_loader, val_loader)

Epoch [0], epoch_loss: 1.4638, epoch_acc: 0.4695
Epoch [1], epoch_loss: 1.1449, epoch_acc: 0.5915
Epoch [2], epoch_loss: 0.9671, epoch_acc: 0.6551
Epoch [3], epoch_loss: 0.8760, epoch_acc: 0.6973
Epoch [4], epoch_loss: 0.8354, epoch_acc: 0.7050
Epoch [5], epoch_loss: 0.7179, epoch_acc: 0.7530
Epoch [6], epoch_loss: 0.6427, epoch_acc: 0.7792
Epoch [7], epoch_loss: 0.7423, epoch_acc: 0.7481
Epoch [8], epoch_loss: 0.6572, epoch_acc: 0.7789
Epoch [9], epoch_loss: 0.5485, epoch_acc: 0.8098


In [None]:
history1 = fit(model, 10, 0.1, train_loader, val_loader)

Epoch [0], epoch_loss: 0.6844, epoch_acc: 0.7746
Epoch [1], epoch_loss: 0.7591, epoch_acc: 0.7431
Epoch [2], epoch_loss: 0.5964, epoch_acc: 0.7982
Epoch [3], epoch_loss: 0.7431, epoch_acc: 0.7607
Epoch [4], epoch_loss: 0.5247, epoch_acc: 0.8186
Epoch [5], epoch_loss: 0.6032, epoch_acc: 0.8032
Epoch [6], epoch_loss: 0.6408, epoch_acc: 0.7873
Epoch [7], epoch_loss: 0.5656, epoch_acc: 0.8052
Epoch [8], epoch_loss: 0.5840, epoch_acc: 0.8014
Epoch [9], epoch_loss: 0.7646, epoch_acc: 0.7508


In [None]:
history2 = fit(model, 20, 0.1, train_loader, val_loader)

Epoch [0], epoch_loss: 0.4924, epoch_acc: 0.8291
Epoch [1], epoch_loss: 0.5001, epoch_acc: 0.8338
Epoch [2], epoch_loss: 0.6404, epoch_acc: 0.7949
Epoch [3], epoch_loss: 0.5748, epoch_acc: 0.8111
Epoch [4], epoch_loss: 0.5461, epoch_acc: 0.8228
Epoch [5], epoch_loss: 0.5846, epoch_acc: 0.8088
Epoch [6], epoch_loss: 0.7989, epoch_acc: 0.7631
Epoch [7], epoch_loss: 0.5537, epoch_acc: 0.8143
Epoch [8], epoch_loss: 0.4873, epoch_acc: 0.8353
Epoch [9], epoch_loss: 0.4748, epoch_acc: 0.8418
Epoch [10], epoch_loss: 0.7791, epoch_acc: 0.7623
Epoch [11], epoch_loss: 0.5027, epoch_acc: 0.8309
Epoch [12], epoch_loss: 0.5994, epoch_acc: 0.8119
