In [1]:
import numpy as np
import cv2                
import matplotlib.pyplot as plt
import torch
import torchvision.models as models
import torchvision.transforms as transforms
import os
import torch.nn as nn
import torch.nn.functional as F
import json
import torch.optim as optim
import torch.utils.data as data_utils

from glob import glob
from tqdm import tqdm
from PIL import Image, ImageFile 
from torchvision import datasets
from keras.utils import np_utils
from keras.preprocessing import image

                      
%matplotlib inline  

# Set PIL to be tolerant of image files that are truncated.
ImageFile.LOAD_TRUNCATED_IMAGES = True

Using TensorFlow backend.


In [31]:
batch_size = 32

 # Image transformations
image_transforms = {
    # Train uses data augmentation
    'train':
    transforms.Compose([
        transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),
        transforms.RandomRotation(degrees=15),
        transforms.ColorJitter(),
        transforms.RandomHorizontalFlip(),
        transforms.CenterCrop(size=224),  # Image net standards
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])  # Imagenet standards
    ]),
    # Validation does not use augmentation
    'valid':
    transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    # Test does not use augmentation
    'test':
    transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}
 
# Location of data
dog_file='../../dogImages/'

# train=os.path.join(dog_file,'train')
valid=os.path.join(dog_file,'valid')
test=os.path.join(dog_file,'test')

train = f'{dog_file}/train'

# Datasets from each folder
train_file=datasets.ImageFolder(train,transform=image_transforms['train'])
valid_file=datasets.ImageFolder(valid,transform=image_transforms['valid'])
test_file=datasets.ImageFolder(test,transform=image_transforms['test'])


# To avoid loading all of the data into memory at once, I used training DataLoaders. First, I created 
# a dataset object from the image folders, and then I passed these to a DataLoader. 
# At training time, the DataLoader will load the images from disk, apply the transformations, 
# and yield a batch. To train and validation, we'll iterate through all the batches in the respective 
# DataLoader.
# One crucial aspect is to shuffle the data before passing it to the network. 
# This means that the ordering of the image categories changes on each pass through the data 
# (one pass through the data is one training epoch).



loaders={
    'train':torch.utils.data.DataLoader(train_file,batch_size,shuffle=True),
    'valid':torch.utils.data.DataLoader(valid_file,batch_size,shuffle=True),
    'test': torch.utils.data.DataLoader(test_file,batch_size,shuffle=True)
}

In [32]:
data, target = next(iter(loaders['train']))

In [33]:
target

tensor([ 20,  41,  16,  84, 105,  95,  11,  34,  81,  14,  22,  50,  56, 129,
        105, 100,  91, 127,  78,  62,  61,  45, 128,  70,  90,  24,  56,  16,
         52,  35,  80,  24])

In [44]:
dog_data_dir = '../../dogImages'
human_data_dir = 'lfw'

def get_data_dir(is_dog):
    
    data_dir = dog_data_dir if is_dog else human_data_dir
    
    if not is_dog:
        return {
        'train_dir': data_dir,
        'valid_dir': data_dir,
        'test_dir': data_dir
    }
    
    train_dir = data_dir + '/train'
    valid_dir = data_dir + '/valid'
    test_dir = data_dir + '/test'
    
    return {
        'train_dir': train_dir,
        'valid_dir': valid_dir,
        'test_dir': test_dir
    }

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

train_transforms = transforms.Compose([transforms.RandomRotation((0, 360)),
                                       transforms.RandomResizedCrop(224),              
                                       transforms.ToTensor(),  
                                       normalize])

valid_transforms = transforms.Compose([transforms.RandomRotation((0, 360)),
                                       transforms.RandomResizedCrop(224),              
                                       transforms.ToTensor(),  
                                       normalize])

test_transforms = transforms.Compose([transforms.Resize(256), 
                                       transforms.CenterCrop(224),              
                                       transforms.ToTensor(), 
                                       normalize])

def get_data_loaders(is_dog, batch_sizes=[32, 32, 32]):
    
    data_dir_dict = get_data_dir(is_dog)
    
    train_dir = data_dir_dict['train_dir']
    valid_dir = data_dir_dict['valid_dir']
    test_dir = data_dir_dict['test_dir']
    
    train_data = datasets.ImageFolder(train, transform=train_transforms)
    valid_data = datasets.ImageFolder(valid_dir, transform=valid_transforms)
    test_data = datasets.ImageFolder(test_dir, transform=test_transforms)

    train_loader = torch.utils.data.DataLoader(train_file,batch_size,shuffle=True)
    valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=batch_sizes[1])
    test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_sizes[2])
    
    return {
        'train': train_loader,
        'valid': valid_loader,
        'test': test_loader
    }

In [45]:
data, target = next(iter(get_data_loaders(True)['train']))
target

tensor([  8,  87, 117,  79,  90,  36,   0,  58,  20, 100,  89,  88, 130,  55,
         97,  27, 100,  13,  76,  18,  95,  37,  20,  20,  81,  55, 115, 111,
         82,  42, 125, 111])