In this tutorial, we are going to fine-tune the pre-trained VGG-Face descriptors for the task of classifying the gender of a person from his/her face image.

In [3]:
import cv2, math
import numpy as np




In [4]:
import torch
from torch.autograd import Variable
import torch.nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.serialization import load_lua
from torch.legacy import nn

In [5]:
# The code to load and pre-process image for VGG-Face has been encapsulated within a function

def loadImage(imgPath):
    inputImg = cv2.imread(imgPath)

    # re-scale the smaller dim (among width, height) to refSize
    refSize, targetSize = 256, 224
    imgRows, imgCols = inputImg.shape[0], inputImg.shape[1]
    if imgCols < imgRows: resizedImg = cv2.resize(inputImg, (refSize, refSize * imgRows / imgCols))
    else: resizedImg = cv2.resize(inputImg, (refSize * imgCols / imgRows, refSize))

    # center-crop
    oH, oW = targetSize, targetSize
    iH, iW = resizedImg.shape[0], resizedImg.shape[1]
    anchorH, anchorW = int(math.ceil((iH - oH)/2)), int(math.ceil((iW - oW) / 2))
    croppedImg = resizedImg[anchorH:anchorH+oH, anchorW:anchorW+oW]

    # convert shape from (height, width, 3) to (3, width, height)
    channel_1, channel_2, channel_3 = croppedImg[:, :, 0], croppedImg[:, :, 1], croppedImg[:, :, 2]
    croppedImg = np.empty([3, croppedImg.shape[0], croppedImg.shape[1]])
    croppedImg[0], croppedImg[1], croppedImg[2] = channel_1, channel_2, channel_3

    # subtract training mean
    inputImg = inputImg.astype(float)
    trainingMean = [129.1863, 104.7624, 93.5940]
    for i in range(3): croppedImg[i] = croppedImg[i] - trainingMean[i]
    return croppedImg

In [6]:
# Function to compute pre-trained VGG-Face descriptors for a set of images 

def getVggFeatures(imgPaths, preTrainedNet):
    nImgs = len(imgPaths)
    vggFace.modules[31] = nn.View(nImgs, 25088)
    
    batchInput = torch.Tensor(nImgs, 3, 224, 224)
    for i in range(nImgs): batchInput[i] = torch.from_numpy(loadImage(imgPaths[i]))
    
    batchOutput = preTrainedNet.forward(batchInput)
    return preTrainedNet.modules[35].output

In [7]:
# Network structure that we'll train for gender classification

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = torch.nn.Linear(4096, 2)
        self.softmax = torch.nn.LogSoftmax()

    def forward(self, x):
        x = self.fc1(x)
        x = self.softmax(x)
        return x

In [11]:
# fix the seeds of random number generators
torch.manual_seed(0)
np.random.seed(0)

# load the dataset and the pre-trained network
vggFace = load_lua("preTrainedNets/VGG_FACE_pyTorch_small.t7")
dataset = load_lua("datasets/Experiment_2/celeba-gender-dataset.t7")

# initialize the net, loss and optimizer (SGD)
net = Net()
criterion = torch.nn.NLLLoss()
optimizer = optim.SGD(net.parameters(), lr=0.00005, momentum=0.9, weight_decay=0.0005)
nEpochs, batchSize = 1, 10

# define a function to test the performance of our model on a separate test set
def evaluate(net, dataset):
    correctPreds = 0.0
    for startIdx in range(0, dataset['testset'].size, batchSize):
        endIdx = min(startIdx + batchSize - 1, dataset['testset'].size - 1)
        size = (endIdx - startIdx + 1)
        
        batchInput, batchLabel = torch.Tensor(size, 4096), torch.LongTensor(size)
        batchImgPaths = []
        for offset in range(size):
            imgPath = "datasets/Experiment_2/" + dataset['testset'].imgPaths[startIdx+offset]
            batchImgPaths.append(imgPath)
            label = dataset['testset'].labels[startIdx+offset]
            batchLabel[offset] = int(label)
        
        batchInput = getVggFeatures(batchImgPaths, vggFace)
        batchInput = Variable(batchInput)

        batchOutput = net(batchInput)
        batchOutput, batchLabel = batchOutput.data.numpy(), batchLabel.numpy()
        predictions = np.argmax(batchOutput, 1)
        correctPreds += np.sum(predictions == batchLabel)
    return correctPreds / dataset['testset'].size

print "accuracy (before training) = ", evaluate(net, dataset)

# start training

for epochCtr in range(nEpochs):
    
    shuffle = np.random.permutation(dataset['trainset'].size)
    runningLoss, iterCnt = 0.0, 0
    for startIdx in range(0, dataset['trainset'].size, batchSize):
        endIdx = min(startIdx + batchSize - 1, dataset['trainset'].size-1)
        size = (endIdx - startIdx + 1)
    
        batchInput, batchLabel = torch.Tensor(size, 4096), torch.LongTensor(size)
        batchImgPaths = []
        for offset in range(size):
            imgPath = "datasets/Experiment_2/" + dataset['trainset'].imgPaths[shuffle[startIdx+offset]]
            batchImgPaths.append(imgPath)
            label = dataset['trainset'].labels[shuffle[startIdx+offset]]
            batchLabel[offset] = int(label) 
        
        batchInput = getVggFeatures(batchImgPaths, vggFace)
        batchInput = Variable(batchInput)
        batchLabel = Variable(batchLabel)
        batchOutput = net(batchInput)
        loss = criterion(batchOutput, batchLabel)
        loss.backward()
        optimizer.step()

        runningLoss += loss.data[0]
        iterCnt += 1

    print "epoch ", (epochCtr+1), "/", nEpochs, ": loss = ", runningLoss/iterCnt

print "accuracy (after training) = ", evaluate(net, dataset)

 accuracy (before training) =  0.65
epoch  1 / 1 : loss =  0.635333937127
accuracy (after training) =  0.95
