## VGG implementation with SVM

*Python Modules*

In [170]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import sklearn.svm
import sklearn.cross_validation
plt.ion() 

use_gpu = torch.cuda.is_available()
if use_gpu:
    print("Using CUDA")

In [171]:
data_dir = "C:/Users/Umashanker Deekshith/Google Drive/Germany/Uni-Bonn/Semester 3/Deep Learning for VR/Exercise/DeepLearningWS/project/Deep-Learning-Project/src/images"
TRAIN = 'train'
TEST = 'test'

# VGG-16 Takes 224x224 images as input, so we resize all of them
data_transforms = {
    TRAIN: transforms.Compose([
        # Data augmentation is a good practice for the train set
        # Here, we randomly crop the image to 224x224 and
        # randomly flip it horizontally. 
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ]),
    TEST: transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
    ])
}

image_datasets = {
    x: datasets.ImageFolder(
        os.path.join(data_dir, x), 
        transform=data_transforms[x]
    )
    for x in [TRAIN, TEST]
}

dataloaders = {
    x: torch.utils.data.DataLoader(
        image_datasets[x], batch_size=1,
        shuffle=True, num_workers=1
    )
    for x in [TRAIN, TEST]
}

dataset_sizes = {x: len(image_datasets[x]) for x in [TRAIN, TEST]}

for x in [TRAIN, TEST]:
    print("Loaded {} images under {}".format(dataset_sizes[x], x))
    
print("Classes: ")
class_names = image_datasets[TRAIN].classes
print(image_datasets[TRAIN].classes)

Loaded 7500 images under train
Loaded 2500 images under test
Classes: 
['beef_tartare', 'caesar_salad', 'chocolate_cake', 'croque_madame', 'escargots', 'fried_calamari', 'macaroni_and_cheese', 'poutine', 'spring_rolls', 'tuna_tartare']


## Utils

Some utility function to visualize the dataset and the model's predictions

In [172]:
# def imshow(inp, title=None):
#     inp = inp.numpy().transpose((1, 2, 0))
#     # plt.figure(figsize=(10, 10))
#     plt.axis('off')
#     plt.imshow(inp)
#     if title is not None:
#         plt.title(title)
#     plt.pause(0.001)

# def show_databatch(inputs, classes):
#     out = torchvision.utils.make_grid(inputs)
#     imshow(out, title=[class_names[x] for x in classes])

# Get a batch of training data
inputs, classes = next(iter(dataloaders[TRAIN]))
inputs_test, classes_test = next(iter(dataloaders[TEST]))
# show_databatch(inputs, classes)

In [173]:
# def visualize_model(vgg, num_images=6):
#     was_training = vgg.training
    
#     # Set model for evaluation
#     vgg.train(False)
#     vgg.eval() 
    
#     images_so_far = 0

#     for i, data in enumerate(dataloaders[TEST]):
#         inputs, labels = data
#         size = inputs.size()[0]
        
#         if use_gpu:
#             inputs, labels = Variable(inputs.cuda(), volatile=True), Variable(labels.cuda(), volatile=True)
#         else:
#             inputs, labels = Variable(inputs, volatile=True), Variable(labels, volatile=True)
        
#         outputs = vgg(inputs)
        
#         _, preds = torch.max(outputs.data, 1)
#         predicted_labels = [preds[j] for j in range(inputs.size()[0])]
        
#         print("Ground truth:")
#         show_databatch(inputs.data.cpu(), labels.data.cpu())
#         print("Prediction:")
#         show_databatch(inputs.data.cpu(), predicted_labels)
        
#         del inputs, labels, outputs, preds, predicted_labels
#         torch.cuda.empty_cache()
        
#         images_so_far += size
#         if images_so_far >= num_images:
#             break
        
#     vgg.train(mode=was_training) # Revert model back to original training state

In [174]:
def eval_model(vgg, criterion):
    since = time.time()
    avg_loss = 0
    avg_acc = 0
    loss_test = 0
    acc_test = 0
    
    test_batches = len(dataloaders[TEST])
    print("Evaluating model")
    print('-' * 10)
    
    for i, data in enumerate(dataloaders[TEST]):
        if i % 100 == 0:
            print("\rTest batch {}/{}".format(i, test_batches), end='', flush=True)

        vgg.train(False)
        vgg.eval()
        inputs, labels = data

        if use_gpu:
            inputs, labels = Variable(inputs.cuda(), volatile=True), Variable(labels.cuda(), volatile=True)
        else:
            inputs, labels = Variable(inputs, volatile=True), Variable(labels, volatile=True)

        outputs = vgg(inputs)

        _, preds = torch.max(outputs.data, 1)
        loss = criterion(outputs, labels)

        loss_test += loss.data[0]
        acc_test += torch.sum(preds == labels.data)

        del inputs, labels, outputs, preds
        torch.cuda.empty_cache()
        
    avg_loss = loss_test / dataset_sizes[TEST]
    avg_acc = acc_test / dataset_sizes[TEST]
    
    elapsed_time = time.time() - since
    print()
    print("Evaluation completed in {:.0f}m {:.0f}s".format(elapsed_time // 60, elapsed_time % 60))
    print("Avg loss (test): {:.4f}".format(avg_loss))
    print("Avg acc (test): {:.4f}".format(avg_acc))
    print('-' * 10)

In [200]:
def set_up_network(net, freeze_training = True):
    if net == 'vgg16':
    # Load the pretrained model from pytorch
        network = models.vgg16(pretrained=True)

        # Freeze training for all layers
        # Newly created modules have require_grad=True by default
        if freeze_training:
            for param in network.features.parameters():
                param.require_grad = False

        
        features = list(network.classifier.children())[:-5] # Remove last layer
        network.classifier = nn.Sequential(*features) # Replace the model classifier
        # print(vgg16)
    
    elif net == 'alexnet':
        network = models.alexnet(pretrained=True)
        if freeze_training:
            for param in network.features.parameters():
                param.require_grad = False
        
        features = list(network.classifier.children())[:-4] # Remove last layer
        network.classifier = nn.Sequential(*features) # Replace the model classifier
        print(alex_net)
    return network


In [201]:
vgg16 = set_up_network('vgg16', freeze_training = True)
alex_net = set_up_network('alexnet', freeze_training = True)

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.5)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace)
  )
)


In [176]:
# If you want to train the model for more than 2 epochs, set this to True after the first run
resume_training = False

if resume_training:
    print("Loading pretrained model..")
    vgg16.load_state_dict(torch.load('../input/vgg16-transfer-learning-pytorch/VGG16_v2-OCT_Retina.pt'))
    print("Loaded!")

In [177]:
if use_gpu:
    vgg16.cuda() #.cuda() will move everything to the GPU side
    
criterion = nn.CrossEntropyLoss()

optimizer_ft = optim.SGD(vgg16.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [178]:
print("Test before training")
# eval_model(vgg16, criterion)

Test before training


In [179]:
# visualize_model(vgg16) #test before training

In [180]:
def train_model(vgg, criterion, optimizer, scheduler, num_epochs=10):
    since = time.time()
    best_model_wts = copy.deepcopy(vgg.state_dict())
    best_acc = 0.0
    
    avg_loss = 0
    avg_acc = 0
    avg_loss_val = 0
    avg_acc_val = 0
    
    train_batches = len(dataloaders[TRAIN])
    val_batches = len(dataloaders[VAL])
    
    for epoch in range(num_epochs):
        print("Epoch {}/{}".format(epoch, num_epochs))
        print('-' * 10)
        
        loss_train = 0
        loss_val = 0
        acc_train = 0
        acc_val = 0
        
        vgg.train(True)
        
        for i, data in enumerate(dataloaders[TRAIN]):
            if i % 100 == 0:
                print("\rTraining batch {}/{}".format(i, train_batches / 2), end='', flush=True)
                
            # Use half training dataset
            if i >= train_batches / 2:
                break
                
            inputs, labels = data
            
            if use_gpu:
                inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
            else:
                inputs, labels = Variable(inputs), Variable(labels)
            
            optimizer.zero_grad()
            
            outputs = vgg(inputs)
            
            _, preds = torch.max(outputs.data, 1)
            loss = criterion(outputs, labels)
            
            loss.backward()
            optimizer.step()
            
            loss_train += loss.data[0]
            acc_train += torch.sum(preds == labels.data)
            
            del inputs, labels, outputs, preds
            torch.cuda.empty_cache()
        
        print()
        # * 2 as we only used half of the dataset
        avg_loss = loss_train * 2 / dataset_sizes[TRAIN]
        avg_acc = acc_train * 2 / dataset_sizes[TRAIN]
        
        vgg.train(False)
        vgg.eval()
            
        for i, data in enumerate(dataloaders[VAL]):
            if i % 100 == 0:
                print("\rValidation batch {}/{}".format(i, val_batches), end='', flush=True)
                
            inputs, labels = data
            
            if use_gpu:
                inputs, labels = Variable(inputs.cuda(), volatile=True), Variable(labels.cuda(), volatile=True)
            else:
                inputs, labels = Variable(inputs, volatile=True), Variable(labels, volatile=True)
            
            optimizer.zero_grad()
            
            outputs = vgg(inputs)
            
            _, preds = torch.max(outputs.data, 1)
            loss = criterion(outputs, labels)
            
            loss_val += loss.data[0]
            acc_val += torch.sum(preds == labels.data)
            
            del inputs, labels, outputs, preds
            torch.cuda.empty_cache()
        
        avg_loss_val = loss_val / dataset_sizes[VAL]
        avg_acc_val = acc_val / dataset_sizes[VAL]
        
        print()
        print("Epoch {} result: ".format(epoch))
        print("Avg loss (train): {:.4f}".format(avg_loss))
        print("Avg acc (train): {:.4f}".format(avg_acc))
        print("Avg loss (val): {:.4f}".format(avg_loss_val))
        print("Avg acc (val): {:.4f}".format(avg_acc_val))
        print('-' * 10)
        print()
        
        if avg_acc_val > best_acc:
            best_acc = avg_acc_val
            best_model_wts = copy.deepcopy(vgg.state_dict())
        
    elapsed_time = time.time() - since
    print()
    print("Training completed in {:.0f}m {:.0f}s".format(elapsed_time // 60, elapsed_time % 60))
    print("Best acc: {:.4f}".format(best_acc))
    
    vgg.load_state_dict(best_model_wts)
    return vgg

In [191]:
def get_features(ipnet, train_batches = 10):
    
    imgfeatures = []
    imglabels = []
    for i, data in enumerate(dataloaders[TRAIN]):
        if i % 100 == 0:
            print("\rTraining batch {}/{}".format(i, train_batches / 2), end='', flush=True)

        # Use half training dataset
        if i > train_batches:
            break

        inputs, labels = data

        if use_gpu:
            inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
        else:
            inputs, labels = Variable(inputs), Variable(labels)

        feature = ipnet(inputs)
        print("The shape of output is: ", feature.shape)
        print(labels)
        imgfeatures.append(feature.detach().numpy().flatten())
        imglabels.append(labels.detach().numpy())
        del inputs, labels, feature

    return imgfeatures, imglabels


In [204]:
def fit_features_to_SVM(features, labels, train_batch_size, K=5 ):
    print("The shape of the class is", classes.shape)
    kf = sklearn.cross_validation.KFold(train_batch_size, n_folds=K)
    print("The split information is: ", kf)
    scores = []
    features = np.array(features)
    labels = np.array(labels)
    print(features.shape)
    print(labels.shape)

    i=0
    for train, test in kf:
        print(train)
        print(test)
        print(i,"/",K)
        i+=1
        model = sklearn.svm.SVC(C=100)#, C=1, gamma=0)
        model.fit(features[train, :], labels[train])
        s=model.score(features[test, :], labels[test])
        print("The score for this classification is: ", s)
        scores.append(s)
    return np.mean(scores), np.std(scores)


In [205]:
train_batch_size = 10

imgfeatures_vgg, imglabels_vgg = get_features(vgg16, train_batch_size)
mean_accuracy, sd = fit_features_to_SVM(imgfeatures_vgg, imglabels_vgg, train_batch_size, K=5 )
print("The mean and standard deviation of classification for vgg 16 is: ",mean_accuracy, sd)

imgfeatures_an, imglabels_an = get_features(alex_net, train_batch_size)
mean_accuracy, sd = fit_features_to_SVM(imgfeatures_an, imglabels_an, train_batch_size, K=5 )
print("The mean and standard deviation of classification for alexnet is: ",mean_accuracy, sd)


# vgg16 = train_model(vgg16, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=2)
torch.save(vgg16.state_dict(), 'VGG16_v2-OCT_Retina_half_dataset.pt')
torch.save(alex_net.state_dict(), 'ALEXNET_v2-OCT_Retina_half_dataset.pt')

Training batch 0/5.0The shape of output is:  torch.Size([1, 4096])
tensor([1])
The shape of output is:  torch.Size([1, 4096])
tensor([1])
The shape of output is:  torch.Size([1, 4096])
tensor([7])
The shape of output is:  torch.Size([1, 4096])
tensor([1])
The shape of output is:  torch.Size([1, 4096])
tensor([1])
The shape of output is:  torch.Size([1, 4096])
tensor([6])
The shape of output is:  torch.Size([1, 4096])
tensor([6])
The shape of output is:  torch.Size([1, 4096])
tensor([3])
The shape of output is:  torch.Size([1, 4096])
tensor([0])
The shape of output is:  torch.Size([1, 4096])
tensor([5])
The shape of output is:  torch.Size([1, 4096])
tensor([9])
The shape of the class is torch.Size([1])
The split information is:  sklearn.cross_validation.KFold(n=10, n_folds=5, shuffle=False, random_state=None)
(11, 4096)
(11, 1)
[2 3 4 5 6 7 8 9]
[0 1]
0 / 5
The score for this classification is:  1.0
[0 1 4 5 6 7 8 9]
[2 3]
1 / 5
The score for this classification is:  0.5
[0 1 2 3 6 7 8 

  y = column_or_1d(y, warn=True)


TypeError: 'NoneType' object is not iterable

In [None]:
# eval_model(vgg16, criterion)