In [None]:
# !wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar
# !wget https://image-net.org/data/ILSVRC/2012/ILSVRC2012_devkit_t12.tar.gz

In [None]:
!ls

ILSVRC2012_devkit_t12.tar.gz  meta.bin	   val
ILSVRC2012_img_val.tar	      sample_data


In [None]:
import torch
import torchvision
import torchvision.transforms as transforms

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]),
])


dataset_val = torchvision.datasets.ImageNet(root= "./", split='val', download= None, transform = transform)
data_loader = torch.utils.data.DataLoader(dataset_val,
                                          batch_size=16,
                                          shuffle=False,
                                          num_workers=2)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [None]:
[4,8,2,3,6]

In [None]:
from tqdm.notebook import tqdm

def validate(model, dataset, device):
    correct, all = 0,0
    with torch.no_grad():
        for batch in tqdm(dataset):
            img , label = batch[0].to(device), batch[1].to(device)
            output = model(img)
            all += len(label)
            correct += sum(label == torch.argmax(output, dim=1))
    print('Accuracy : %.4f' % (correct/all))

In [None]:
import torchvision.models as models
from torchsummary import summary as summary_

model = models.alexnet(pretrained=True).to(device)
summary_(model,(3,224,224))
validate(model, data_loader, device)


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 55, 55]          23,296
              ReLU-2           [-1, 64, 55, 55]               0
         MaxPool2d-3           [-1, 64, 27, 27]               0
            Conv2d-4          [-1, 192, 27, 27]         307,392
              ReLU-5          [-1, 192, 27, 27]               0
         MaxPool2d-6          [-1, 192, 13, 13]               0
            Conv2d-7          [-1, 384, 13, 13]         663,936
              ReLU-8          [-1, 384, 13, 13]               0
            Conv2d-9          [-1, 256, 13, 13]         884,992
             ReLU-10          [-1, 256, 13, 13]               0
           Conv2d-11          [-1, 256, 13, 13]         590,080
             ReLU-12          [-1, 256, 13, 13]               0
        MaxPool2d-13            [-1, 256, 6, 6]               0
AdaptiveAvgPool2d-14            [-1, 25

HBox(children=(FloatProgress(value=0.0, max=3125.0), HTML(value='')))


Accuracy : 0.4798


In [None]:
model = models.vgg16(pretrained=True).to(device)
summary_(model,(3,224,224))
validate(model, data_loader, device)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
              ReLU-2         [-1, 64, 224, 224]               0
            Conv2d-3         [-1, 64, 224, 224]          36,928
              ReLU-4         [-1, 64, 224, 224]               0
         MaxPool2d-5         [-1, 64, 112, 112]               0
            Conv2d-6        [-1, 128, 112, 112]          73,856
              ReLU-7        [-1, 128, 112, 112]               0
            Conv2d-8        [-1, 128, 112, 112]         147,584
              ReLU-9        [-1, 128, 112, 112]               0
        MaxPool2d-10          [-1, 128, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]         295,168
             ReLU-12          [-1, 256, 56, 56]               0
           Conv2d-13          [-1, 256, 56, 56]         590,080
             ReLU-14          [-1, 256,

HBox(children=(FloatProgress(value=0.0, max=3125.0), HTML(value='')))


Accuracy : 0.6000


In [None]:
model = models.resnet18(pretrained=True).to(device)
summary_(model,(3,224,224))
validate(model, data_loader, device)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]          36,864
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
       BasicBlock-11           [-1, 64, 56, 56]               0
           Conv2d-12           [-1, 64, 56, 56]          36,864
      BatchNorm2d-13           [-1, 64, 56, 56]             128
             ReLU-14           [-1, 64,

HBox(children=(FloatProgress(value=0.0, max=3125.0), HTML(value='')))


Accuracy : 0.6727


In [None]:

model = models.googlenet(pretrained=True).to(device)
summary_(model,(3,224,224))
validate(model, data_loader, device)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
       BasicConv2d-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]           4,096
       BatchNorm2d-6           [-1, 64, 56, 56]             128
       BasicConv2d-7           [-1, 64, 56, 56]               0
            Conv2d-8          [-1, 192, 56, 56]         110,592
       BatchNorm2d-9          [-1, 192, 56, 56]             384
      BasicConv2d-10          [-1, 192, 56, 56]               0
        MaxPool2d-11          [-1, 192, 28, 28]               0
           Conv2d-12           [-1, 64, 28, 28]          12,288
      BatchNorm2d-13           [-1, 64, 28, 28]             128
      BasicConv2d-14           [-1, 64,

HBox(children=(FloatProgress(value=0.0, max=3125.0), HTML(value='')))


Accuracy : 0.6697


# Custom DataLoader 1
```latex
├── data
│    ├── ILSVRC2012_img_val
│    │    ├── *.JPEG
│    ├── ILSVRC2012_bbox_val_v3/val
│    │    ├── val
│    │    │   ├── *.xml


In [None]:
from glob import glob
from skimage import io, transform
from xml.etree.ElementTree import parse
from torch.utils.data import Dataset, DataLoader




class ImageNetDataloader(Dataset):
    def __init__(self, img_dir, label_dir, sub_dir, transform = None):
        self.transform = transform
        f = open(sub_dir, 'r')
        code2label = {}
        for r in f.readlines():
            code2label[r.split(" ")[0]] = int(r.split(" ")[1])

        self.label_file = sorted(glob(label_dir +"/*.xml"))
        self.image_file = sorted(glob(img_dir +"/*.JPEG"))
        self.labels = [code2label[parse(f).getroot().find("object").findtext("name")] for f in self.label_file]
        
        
    def __len__(self):
        return len(self.lables)

    
    def __getitem__(self,idx):
        if self.transform == None:
            img = io.imread(self.image_file[idx])
        else:
            img = transform(io.imread(self.image_file[idx]))
        return img, self.labels[idx]
                                     

                                 
dataloader = ImageNetDataloader(img_dir = "ILSVRC2012_img_val/",  label_dir = "ILSVRC2012_bbox_val_v3/val/",
                                   sub_dir = "label.txt")                        

# Custom DataLoader 2
```latex
├── data
│    ├── n02119789 
│    │    ├── *.JPEG
│    ├── n02100735  
│    │    ├── *.JPEG
│    ├── n02110185  
│    │    ├── *.JPEG
│    ├── n02096294  
│    │    ├── *.JPEG
        ...


In [None]:
import os
import json
import random
from glob import glob
from skimage import io, transform
from xml.etree.ElementTree import parse
from torch.utils.data import Dataset, DataLoader
from PIL import Image




class ImageNetDataloader(Dataset):
    def __init__(self, data_dir, category_dir = "imageNetCategory.json", random_rate = 0.0,transform = None):
        self.transform = transform
        f = open(sub_dir, 'r')
        code2label = {}
        for r in f.readlines():
            code2label[r.split(" ")[0]] = int(r.split(" ")[1])
        self.labels = []
        self.img_dirs = []
        for files in os.listdir(data_dir):
            for f in os.listdir(data_dir + "/"+files):
                self.labels.append(code2label[files])
                self.img_dirs.append(data_dir + "/" + files + "/" + f)

                
    def __len__(self):
        return len(self.labels)

    
    def __getitem__(self,idx):
        if self.transform == None:
            img = Image.open(self.image_file[idx]).convert("RGB")
        else:
            img = self.transform(Image.open(self.image_file[idx]).convert("RGB"))
        return img, labels[idx]          

# dataloader_train = ImageNetDataloader(data_dir = "train/", random_rate = 0.5)

# dataloader_val = ImageNetDataloader(data_dir = "val/", random_rate = 0.5)