In [1]:
import albumentations as A
import numpy as np

import matplotlib.pyplot as plt
from torchvision.datasets import Caltech256 ,Caltech101 ,CIFAR100,CIFAR10,MNIST,ImageNet
import os
from PIL import Image
from urllib.request import urlretrieve
import requests
import tarfile
from sklearn.model_selection import train_test_split
from torch.utils.data import Subset

In [2]:
class Custom_Cifar_10(CIFAR10) :
    def __init__(self,root,transform = None,multi=False,s_max=None,s_min=256,download=False,val=False,train=True):

        self.multi = multi
        self.s_max = 512
        self.s_min= 256
        if multi :
            self.S = np.random.randint(low=self.s_min,high=self.s_max)
        else :
            self.S = s_min
            transform = A.Compose(
                    [
                        A.Normalize(mean =(0.5071, 0.4867, 0.4408) , std = (0.2675, 0.2565, 0.2761)),
                        A.SmallestMaxSize(max_size=self.S),
                        A.RandomCrop(height =224,width=224),
                        A.HorizontalFlip(),
                        # A.RGBShift()
                    ]

            )
        super().__init__(root,transform=transform,train=train,download=download)
        self.val =train
        self.multi = multi
    def __getitem__(self, index: int) :
        """
        Args:
            index (int): Index

        Returns:
            tuple: (image, target) where target is index of the target class.
        """
        img, target = self.data[index], self.targets[index]

        # doing this so that it is consistent with all other datasets
        # to return a PIL Image

        img = Image.fromarray(img)

        if img.mode == 'L' : img = img.convert('RGB')
        img=np.array(img,dtype=np.float32)
        
        
        if self.transform is not None:
            img = self.transform(image=img)
            if len(img['image'].shape) == 3 and self.val==False :
                img = A.RGBShift()(image=img['image'])
            img = img['image']

        if self.target_transform is not None:
            target = self.target_transform(target)
        # print(img)
        img=img.transpose((2,0,1))

        return img, target

In [8]:
cifar_10_custom = Custom_Cifar_10(root=os.getcwd(),download=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /home/woongjoon/DeepLearningPaper-Reproducing/Vgg/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:10<00:00, 15915037.16it/s]


Extracting /home/woongjoon/DeepLearningPaper-Reproducing/Vgg/cifar-10-python.tar.gz to /home/woongjoon/DeepLearningPaper-Reproducing/Vgg


In [5]:
val_data  = Custom_Cifar_10(root=os.getcwd(),train=False,download=True)
val_data.val= True
val_data.s_min = 256
val_data.transform=    A.Compose(
                [
                    A.Normalize(mean =(0.5071, 0.4867, 0.4408) , std = (0.2675, 0.2565, 0.2761)),
                    A.SmallestMaxSize(max_size=val_data.S),
                    A.CenterCrop(height =224,width=224),
                    # A.HorizontalFlip(),
                    # A.RGBShift()
                ]

            )

Files already downloaded and verified


In [6]:
len(val_data)

10000

In [10]:
label_dict= {}
for _,label in cifar_10_custom :
    if label not in label_dict.keys(): label_dict[label]= 0
    label_dict[label]+=1

In [7]:
class Cusotm_MNIST(MNIST) :
    def __init__(self,root,transform = None,multi=False,s_max=None,s_min=256,download=False,val=False,train=True):

        self.multi = multi
        self.s_max = 512
        self.s_min= 256
        if multi :
            self.S = np.random.randint(low=self.s_min,high=self.s_max)
        else :
            self.S = s_min
            transform = A.Compose(
                    [
                        A.Normalize(),
                        A.SmallestMaxSize(max_size=self.S),
                        A.RandomCrop(height =224,width=224),
                        A.HorizontalFlip(),
                        # A.RGBShift()
                    ]

            )
        super().__init__(root,transform=transform,train=train,download=download)
        self.val =train
        self.multi = multi
    def __getitem__(self, index: int) :
        """
        Args:
            index (int): Index

        Returns:
            tuple: (image, target) where target is index of the target class.
        """
        img, target = self.data[index], self.targets[index]

        # doing this so that it is consistent with all other datasets
        # to return a PIL Image
        img=np.array(img)
        img = Image.fromarray(img)

        if img.mode == 'L' : img = img.convert('RGB')
        img=np.array(img,dtype=np.float32)
        
        
        if self.transform is not None:
            img = self.transform(image=img)
            if len(img['image'].shape) == 3 and self.val==False :
                img = A.RGBShift()(image=img['image'])
            img = img['image']

        if self.target_transform is not None:
            target = self.target_transform(target)
        # print(img)
        img=img.transpose((2,0,1))

        return img, target

In [20]:
custom_mnist = Cusotm_MNIST(root=os.getcwd(),download=True)

In [9]:
val_data  = Cusotm_MNIST(root=os.getcwd(),train=False,download=True)
val_data.val= True
val_data.s_min = 256
val_data.transform=    A.Compose(
                [
                    A.Normalize(),
                    A.SmallestMaxSize(max_size=val_data.S),
                    A.CenterCrop(height =224,width=224),
                    # A.HorizontalFlip(),
                    # A.RGBShift()
                ]

            )

In [10]:
val_data

Dataset Cusotm_MNIST
    Number of datapoints: 10000
    Root location: /home/woongjoon/DeepLearningPaper-Reproducing/Vgg
    Split: Test
    StandardTransform
Transform: Compose([
             Normalize(always_apply=False, p=1.0, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, normalization='standard'),
             SmallestMaxSize(always_apply=False, p=1.0, max_size=[256], interpolation=1),
             RandomCrop(always_apply=False, p=1.0, height=224, width=224),
             HorizontalFlip(always_apply=False, p=0.5),
           ], p=1.0, bbox_params=None, keypoint_params=None, additional_targets={}, is_check_shapes=True)

In [2]:
class Cusotm_ImageNet(ImageNet) :
    def __init__(self,root,transform = None,multi=False,s_max=None,s_min=256,split=None,val=False):

        self.multi = multi
        self.s_max = 512
        self.s_min= 256
        if multi :
            self.S = np.random.randint(low=self.s_min,high=self.s_max)
        else :
            self.S = s_min
            transform = A.Compose(
                    [
                        A.Normalize(),
                        A.SmallestMaxSize(max_size=self.S),
                        A.RandomCrop(height =224,width=224),
                        A.HorizontalFlip(),
                        # A.RGBShift()
                    ]

            )
        super().__init__(root,transform=transform,split=split)
        self.val =val
        self.multi = multi
    def __getitem__(self, index: int) :
        """
        Args:
            index (int): Index

        Returns:
            tuple: (image, target) where target is index of the target class.
        """
        path, target = self.samples[index]
        img = self.loader(path)
        # doing this so that it is consistent with all other datasets
        # to return a PIL Image
        img=np.array(img)
        img = Image.fromarray(img)
        # print('hi')
        if img.mode == 'L' : 
            print('im gray')
            img = img.convert('RGB')
        img=np.array(img,dtype=np.float32)
        
        # print(self.transform)
        
        if self.transform is not None:
            img = self.transform(image=img)
            if len(img['image'].shape) == 3 and self.val==False :
                img = A.RGBShift()(image=img['image'])
            img = img['image']

        if self.target_transform is not None:
            target = self.target_transform(target)
        # print(img)
        img=img.transpose((2,0,1))

        return img, target

In [6]:
train_data =ImageNet('ImageNet',split='train')

In [8]:
len(train_data)

1281167

In [3]:
val_data= Cusotm_ImageNet('ImageNet',split='val',val=False)

In [4]:
val_data.val= True
val_data.s_min = 256
val_data.transform=    A.Compose(
                [
                    A.Normalize(),
                    A.SmallestMaxSize(max_size=val_data.S),
                    A.CenterCrop(height =224,width=224),
                    # A.HorizontalFlip(),
                    # A.RGBShift()
                ]

            )

In [7]:
len(train_data)

1281167

In [49]:

[val_data[i][1] for i in range(50000) ]

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
