# Augmentation Tutorial | Classification Preperation

### In this notebook, my purpose is to show you to usage of albumentation with saved augmented data. That particular operation may be vital for our datasets because sometimes we can not get same results without saving the augmentations...

In [1]:
import torch
import torch.nn as nn

import numpy as np
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data import Dataset, DataLoader

import io
import matplotlib.pyplot as plt
import time
import os
import copy
from glob import glob
from tqdm import tqdm
import warnings
import pandas as pd

import albumentations as A
from PIL import Image
import cv2
from albumentations.pytorch import ToTensorV2
warnings.simplefilter('ignore')


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
print(torch.cuda.get_device_name())


cuda:0
Tesla P100-PCIE-16GB


### We will specify our class labels in order to get correct labels when increasing our dataset size. In ClassificationDataset, we are aiming to preprocess the data and return label and images...

In [2]:
class ClassificationDataset(Dataset):
    def __init__(self, images_filepaths, transform=None):
        self.images_filepaths = images_filepaths
        self.transform = transform

    def __len__(self):
        return len(self.images_filepaths)

    def __getitem__(self, idx):
        image_filepath = self.images_filepaths[idx]
        image = cv2.imread(image_filepath)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if os.path.normpath(image_filepath).split(os.sep)[-2] == "birds":
            label = 0
        else:
            label = 1
        if self.transform is not None:
            image = self.transform(image=image)["image"]

        return image, label


### I generally use splitfolders in order to split my training-test-validation datas. 

### ratio=(train size,val size,test size)

In [4]:
!pip install split-folders
import splitfolders

path='../input/drone-bird-classification/drone_or_bird'
splitfolders.ratio(path,ratio=(0.7,0.2,0.1))

[0m

Copying files: 330 files [00:01, 227.24 files/s]


In [7]:
class_names = os.listdir('../input/drone-bird-classification/drone_or_bird')
class_names

['birds', 'drones']

### With glob we are taking our file paths...

In [8]:
datasets={
        'train':[],
        'val':[],
        'test':[]
    }
for phase in ['train','val','test']:
    l=[]
    for i in glob(f'./output/{phase}/**/*'):
        l.append(i)
    datasets[phase]=l

### We can determine our specific augmentation details like this. For more information you can visit https://albumentations.ai/docs/

In [10]:
train_transform = A.Compose(
    [   A.Resize(height=180, width=180),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
        A.RGBShift(r_shift_limit=15, g_shift_limit=15, b_shift_limit=15, p=0.5),
        A.RandomBrightnessContrast(p=0.5),
        A.ColorJitter(),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
    ]
)
original_transform = A.Compose(
    [   A.Resize(180,180),
        A.CenterCrop(height=128, width=128),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
    ]
)
alb_dataset = ClassificationDataset(images_filepaths=datasets['train'], transform=train_transform)
original_dataset=ClassificationDataset(images_filepaths=datasets['train'], transform=original_transform)

dataset_sizes = {x: len(datasets[x]) for x in ['train', 'val']}
class_names = ['birds','drones']
dataset_sizes

{'train': 230, 'val': 66}

### We need folders for our augmented images so we are creating them and saving them with unique names.

In [13]:
import uuid
try:
    os.mkdir('./prepdata')
    os.mkdir('./prepdata/train')
    os.mkdir('./prepdata/train/birds')
    os.mkdir('./prepdata/train/drones')
   
except:
    print('Dosyalar var')

def OriginalSave(originalDataset,limit):
    s={0:'birds',1:'drones'}
    originalDataset.transform = A.Compose([t for t in originalDataset.transform if not isinstance(t, (A.Normalize, ToTensorV2))])
    
    for idx in range(limit):
        try:
            image,label=originalDataset[idx]

            cv2.imwrite(f'./prepdata/{s[label]}/{str(uuid.uuid4())}.jpg',image)
        except:
            print('Error')
OriginalSave(original_dataset, dataset_sizes['train'])

Dosyalar var
Error
Error
Error


### In this function, we will be generating the augmented images. Also with that operation, we can balance our data with for loops by specifying the range values.

In [15]:
def AlbSave(albDataset,limit):
    s={0:'birds',1:'drones'}
    sizes={'birds':165,'drones':165}

    albDataset.transform = A.Compose([t for t in albDataset.transform if not isinstance(t, (A.Normalize, ToTensorV2))])
    for idx in range(limit):
        for _ in range(4):
            try:
                image,label=albDataset[idx]
                if label==0:
                    cv2.imwrite(f'./prepdata/train/{s[label]}/{str(uuid.uuid4())}.jpg',image)
            except:
                print('Error')

        for _ in range(4):
            try:
                image,label=albDataset[idx]
                if label==1:
                    cv2.imwrite(f'./prepdata/train/{s[label]}/{str(uuid.uuid4())}.jpg',image)
            except:
                print('Error')


AlbSave(alb_dataset,dataset_sizes['train'])

Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error
Error


### It appears, that some images are problematic. These errors do not arise based on the code...

In [16]:
import shutil
shutil.move('./output/val','./prepdata/')

'./prepdata/val'