In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from glob import glob
import os
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms
from torchvision.datasets import ImageFolder

In [None]:
is_cuda = False
if torch.cuda.is_available():
    is_cuda = True

# Load data

In [None]:
path = 'C:/Users/Yeonkang/Desktop/Deep_Learning/Image_Recognition/Vanilla_CNN/Python/data/101_ObjectCategories/dataset'

- Dataset can be downloaded [here](https://www.kaggle.com/c/dogs-vs-cats/data).

In [None]:
files = glob(os.path.join(path, '*/*.jpg'))
print(f'Total no of images {len(files)}')

In [None]:
no_of_images = 8677

## Determine size

In [None]:
null_transform = transforms.Compose([transforms.ToTensor()])
data = ImageFolder(path, null_transform)

In [None]:
data[2100][0]

In [None]:
height,width = [],[]

for i in range(len(data)):
    temp1 = data[i][0].shape[1]
    temp2 = data[i][0].shape[2]
    height.append(temp1)
    width.append(temp2)

In [None]:
plt.hist(height)

In [None]:
plt.hist(width)

In [None]:
def imshow(inp):
    inp = inp.numpy().transpose((1,2,0))
    inp = np.clip(inp,0,1)
    plt.imshow(inp)

In [None]:
imshow(data[2100][0]) #Image before resizing

In [None]:
transform224 = transforms.Compose([transforms.Resize((224,224)), transforms.ToTensor()])
data = ImageFolder(path, transform224)
imshow(data[2100][0])

In [None]:
transform448 = transforms.Compose([transforms.Resize((448,448)), transforms.ToTensor()])
data = ImageFolder(path, transform448)
imshow(data[2100][0])

## Compute mean and standard deviation of each RGB channel

In [None]:
red,green,blue = [],[],[]

for i in range(len(data)):
    temp0 = data[i][0][0].numpy()
    temp1 = data[i][0][1].numpy()
    temp2 = data[i][0][2].numpy()
    red.append(temp0)
    green.append(temp1)
    blue.append(temp2)

In [None]:
red,green,blue = np.array(red)[0].flatten(),np.array(green)[0].flatten(),np.array(blue)[0].flatten()

In [None]:
r_mean,r_sd = red.mean(),np.sqrt(red.var())
g_mean,g_sd = green.mean(),np.sqrt(green.var())
b_mean,b_sd = blue.mean(),np.sqrt(blue.var())

## Split into train and validation data

In [None]:
np.random.seed(0)
shuffle = np.random.permutation(no_of_images)

In [None]:
def get_folder(data):
    return data.split('/')[10].split('\\')[1] + '/'

In [None]:
lst_folder = list(map(get_folder, files))

In [None]:
lst_folder = list(set(lst_folder))
len(lst_folder)

- 101 categories

In [None]:
path = 'C:/Users/Yeonkang/Desktop/Deep_Learning/Image_Recognition/Vanilla_CNN/Python/data/101_ObjectCategories'

In [None]:
os.mkdir(os.path.join(path,'train'))
os.mkdir(os.path.join(path,'valid'))

for t in ['train','valid']:
    for folder in lst_folder:
        os.mkdir(os.path.join(path,t,folder))

In [None]:
for i in shuffle[:677]:
    folder = files[i].split('/')[10].split('\\')[1]
    image = files[i].split('/')[10].split('\\')[2]
    os.rename(files[i], os.path.join(path,'valid',folder,image))

In [None]:
for i in shuffle[677:]:
    folder = files[i].split('/')[10].split('\\')[1]
    image = files[i].split('/')[10].split('\\')[2]
    os.rename(files[i], os.path.join(path,'train',folder,image))

In [None]:
simple_transform = transforms.Compose([transforms.Resize((224,224)), transforms.ToTensor(), 
                                       transforms.Normalize([0.576,0.537,0.500],[0.224,0.196,0.188])])
train = ImageFolder(os.path.join(path,'train'), simple_transform)
valid = ImageFolder(os.path.join(path,'valid'), simple_transform)

In [None]:
train.class_to_idx

In [None]:
def imshow(inp):
    inp = inp.numpy().transpose((1,2,0))
    mean,std = np.array([0.576,0.537,0.500]),np.array([0.224,0.196,0.188])
    inp = std * inp + mean
    inp = np.clip(inp,0,1)
    plt.imshow(inp)

In [None]:
imshow(train[0][0])

In [None]:
train_data_loader = torch.utils.data.DataLoader(train, batch_size=16, shuffle=True, num_workers=3)
valid_data_loader = torch.utils.data.DataLoader(valid, batch_size=16, shuffle=True, num_workers=3)

# Build vanilla CNN

In [None]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 10, kernel_size=5, stride=1, padding=0)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5, stride=1, padding=0)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(56180,5000)
        self.fc2 = nn.Linear(5000,500)
        self.fc3 = nn.Linear(500,101)
        
    def forward(self,x):
        x = F.relu(F.max_pool2d(self.conv1(x),2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)),2))
        x = x.view(x.size(0),-1)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = F.relu(self.fc2(x))
        x = F.dropout(x, training=self.training)
        x = self.fc3(x)
        return F.log_softmax(x,dim=1)

1. convolutional layer1 - max pooling - ReLU activation
2. convolutional layer2 - dropout - max pooling - ReLU activation
3. view
4. linear layer1 - ReLU activation
5. dropout
6. linear layer2 - ReLU activation
7. dropout
8. linear layer3
9. softmax

In [None]:
def fit(epoch, model, data_loader, phase='training', volatile=False):
    if phase == 'training':
        model.train()
    if phase == 'validation':
        model.eval()
        volatile = True
    running_loss = 0.0
    running_correct = 0
    
    for batch_idx,(data,target) in enumerate(data_loader):
        if is_cuda:
            data,target = data.cuda(),target.cuda()
        data,target = Variable(data,volatile),Variable(target)
        if phase == 'training':
            optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output,target)
        running_loss += F.nll_loss(output, target, reduction='mean').data
        preds = output.data.max(dim=1, keepdim=True)[1]
        running_correct += preds.eq(target.data.view_as(preds)).cpu().sum()
        if phase == 'training':
            loss.backward()
            optimizer.step()
            
    loss = running_loss/len(data_loader.dataset)
    accuracy = 100. * running_correct.item()/len(data_loader.dataset)
        
    print(f'{phase} loss is {loss:{5}.{2}} and {phase} accuracy is {running_correct}/{len(data_loader.dataset)}{accuracy:{10}.{4}}')
    return loss,accuracy

In [None]:
model = Net()
if is_cuda:
    model.cuda()

In [None]:
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
train_losses,train_accuracy = [],[]
val_losses,val_accuracy = [],[]

for epoch in range(1,20):
    train_epoch_loss,train_epoch_accuracy = fit(epoch, model, train_data_loader, phase='training')
    val_epoch_loss,val_epoch_accuracy = fit(epoch, model, valid_data_loader, phase='validation')
    train_losses.append(train_epoch_loss)
    train_accuracy.append(train_epoch_accuracy)
    val_losses.append(val_epoch_loss)
    val_accuracy.append(val_epoch_accuracy)

In [None]:
plt.plot(range(1,len(train_losses)+1), train_losses, 'bo', label='training')
plt.plot(range(1,len(val_losses)+1), val_losses, 'r', label='validation')
plt.title('Loss')
plt.legend()

In [None]:
plt.plot(range(1,len(train_accuracy)+1), train_accuracy, 'bo', label='training')
plt.plot(range(1,len(val_accuracy)+1), val_accuracy, 'r', label='validation')
plt.title('Accuracy')
plt.legend()