In [1]:
import cv2
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.optim import Adam

import os
import pathlib
import shutil

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
"""
lfw/
    csv files
    lfw-deepfunneled/
data/
    train/
    val/
    test/
notebook
"""

'\nlfw/\n    csv files\n    lfw-deepfunneled/\ndata/\n    train/\n    val/\n    test/\nnotebook\n'

In [3]:
# device = ("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
data_folder = './lfw/'

In [5]:
lfw_allnames = pd.read_csv(data_folder+"lfw_allnames.csv")

image_paths = lfw_allnames.loc[lfw_allnames.index.repeat(lfw_allnames['images'])]
image_paths['image_path'] = 1 + image_paths.groupby('name').cumcount()
image_paths['image_path'] = image_paths.image_path.apply(lambda x: str(x).zfill(4))
image_paths['image_path'] = image_paths.name + "/" + image_paths.name + "_" + image_paths.image_path + ".jpg"
image_paths = image_paths.drop("images", axis=1)

In [6]:
print(image_paths['name'].value_counts()[:4])
list_people = list(image_paths['name'].value_counts()[:4].keys())
list_num_images = list(image_paths['name'].value_counts()[:4])
print(list_people, list_num_images)

George_W_Bush      530
Colin_Powell       236
Tony_Blair         144
Donald_Rumsfeld    121
Name: name, dtype: int64
['George_W_Bush', 'Colin_Powell', 'Tony_Blair', 'Donald_Rumsfeld'] [530, 236, 144, 121]


In [7]:
num_ppl = 4
num_for_each = 50
tmp_l = []
for name in list(image_paths['name'].value_counts()[:num_ppl].keys()):
    tmp_l.append(image_paths[image_paths.name==name].sample(num_for_each))
data = pd.concat(tmp_l)
print(data)

                 name                                image_path
1871    George_W_Bush      George_W_Bush/George_W_Bush_0225.jpg
1871    George_W_Bush      George_W_Bush/George_W_Bush_0426.jpg
1871    George_W_Bush      George_W_Bush/George_W_Bush_0219.jpg
1871    George_W_Bush      George_W_Bush/George_W_Bush_0437.jpg
1871    George_W_Bush      George_W_Bush/George_W_Bush_0108.jpg
...               ...                                       ...
1404  Donald_Rumsfeld  Donald_Rumsfeld/Donald_Rumsfeld_0096.jpg
1404  Donald_Rumsfeld  Donald_Rumsfeld/Donald_Rumsfeld_0049.jpg
1404  Donald_Rumsfeld  Donald_Rumsfeld/Donald_Rumsfeld_0072.jpg
1404  Donald_Rumsfeld  Donald_Rumsfeld/Donald_Rumsfeld_0004.jpg
1404  Donald_Rumsfeld  Donald_Rumsfeld/Donald_Rumsfeld_0027.jpg

[200 rows x 2 columns]


In [8]:
data_train, data_test = train_test_split(data, test_size=0.2)
data_train, data_val = train_test_split(data_train, test_size=0.2)

In [9]:
print(data_train.shape, data_val.shape, data_test.shape)

(128, 2) (32, 2) (40, 2)


In [10]:
data_root = './data/'

data_list = [data_train, data_val, data_test]
dirs = ['train', 'val', 'test']

"""             # (un)comment this line and run, to copy
                # first remove data directory if it exists
for i in range(len(dirs)):
    pathlib.Path(os.path.join(data_root, dirs[i])).mkdir(parents=True, exist_ok=True)
    
    for person in list_people:
        if len(data_train[data_train['name']==person])>0:
            pathlib.Path(os.path.join(data_root, dirs[i], person)).mkdir(parents=True, exist_ok=True)

    for im_path in data_list[i].image_path:
        name = data[data['image_path']==im_path]['name'].iloc[0]
        path_from = os.path.join(data_folder+'/lfw-deepfunneled/lfw-deepfunneled/', im_path)
        path_to = os.path.join(data_root, dirs[i], name)
        if not os.path.isfile(os.path.join(path_to, im_path)):
            shutil.copy(path_from, path_to)
# """

"             # uncomment this line and run, to copy\n                # first remove data directory if it exists\nfor i in range(len(dirs)):\n    pathlib.Path(os.path.join(data_root, dirs[i])).mkdir(parents=True, exist_ok=True)\n    \n    for person in list_people:\n        if len(data_train[data_train['name']==person])>0:\n            pathlib.Path(os.path.join(data_root, dirs[i], person)).mkdir(parents=True, exist_ok=True)\n\n    for im_path in data_list[i].image_path:\n        name = data[data['image_path']==im_path]['name'].iloc[0]\n        path_from = os.path.join(data_folder+'/lfw-deepfunneled/lfw-deepfunneled/', im_path)\n        path_to = os.path.join(data_root, dirs[i], name)\n        if not os.path.isfile(os.path.join(path_to, im_path)):\n            shutil.copy(path_from, path_to)\n# "

In [11]:
train_path = os.path.join(data_root, dirs[0])
val_path = os.path.join(data_root, dirs[1])
test_path = os.path.join(data_root, dirs[2])

train_transform = transforms.Compose(transforms=[
    # transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=0, std=255),      # output = (input-mean)/std
])
test_transform = transforms.Compose(transforms=[
    transforms.ToTensor(),
    transforms.Normalize(mean=0, std=255)
])

train_loader = DataLoader(
    torchvision.datasets.ImageFolder(train_path, train_transform), shuffle=True     # batch_size
)
val_loader = DataLoader(
    torchvision.datasets.ImageFolder(val_path, test_transform), shuffle=True
)
test_loader = DataLoader(
    torchvision.datasets.ImageFolder(test_path, test_transform), shuffle=True
)

In [12]:
for data in train_loader:
    print(data[0].shape, data[1].shape)
    break
# Total train data is of shape (128, 3, 250, 250)

torch.Size([1, 3, 250, 250]) torch.Size([1])


In [19]:
class FaceCNN(nn.Module):
    def __init__(self, num_classes, stride=1, padding=1):
        super().__init__()

        self.network = nn.Sequential(

        nn.Conv2d(in_channels=3, out_channels=50, kernel_size=3, stride=stride, padding=padding),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2),

        nn.Conv2d(in_channels=50, out_channels=20, kernel_size=3, stride=stride, padding=padding),
        nn.ReLU(),

        nn.Flatten(),
        nn.Linear(in_features=20*125*125, out_features=num_classes)

        )

    def forward(self, input):
        output = self.network(input)
        return output

In [20]:
model = FaceCNN(num_classes=len(list_people))

In [21]:
optimizer = Adam(model.parameters(), lr=1e-3, weight_decay=1e-3)
loss_fn = nn.CrossEntropyLoss()
num_epochs = 10

In [22]:
def evaluate(loader, model):

    model.eval()

    score = 0
    cnt = 0

    with torch.no_grad():       # not training, so no need to calculate gradients
        for data in loader:
            # images, labels = data
            output = model(data[0])
            _, pred = torch.max(output.data, 1)
            score += float(torch.sum(pred==data[1].data))
            cnt += data[0].shape[0]

    return score/cnt

In [23]:
def train():
    best_acc = 0.0
    
    for epoch in range(num_epochs):
        train_score = 0
        cnt = 0
        train_loss = 0

        model.train()
        
        for batch in train_loader:
            optimizer.zero_grad()
            
            output = model(batch[0])
            label = batch[1]
            
            loss = loss_fn(output, label)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()

            _, pred = torch.max(output.data, 1)
            train_score += float(torch.sum(pred==label.data))
            cnt += batch[0].shape[0]

        train_acc = train_score/cnt
        val_acc = evaluate(val_loader, model)
        
        print("Epoch:", epoch, "\tLoss:", train_loss, "\tTraining Acc:", train_acc, "\tVal Acc:", val_acc)

        if val_acc > best_acc:
            torch.save(model.state_dict(),'best.model')
            best_acc = val_acc

In [25]:
train()

Epoch: 0 	Loss: 188.8102216720581 	Training Acc: 0.265625 	Val Acc: 0.1875
Epoch: 1 	Loss: 182.70674860477448 	Training Acc: 0.28125 	Val Acc: 0.1875
Epoch: 2 	Loss: 177.01730728149414 	Training Acc: 0.28125 	Val Acc: 0.1875
Epoch: 3 	Loss: 176.987757563591 	Training Acc: 0.28125 	Val Acc: 0.1875
Epoch: 4 	Loss: 176.97773730754852 	Training Acc: 0.28125 	Val Acc: 0.1875
Epoch: 5 	Loss: 178.7574906349182 	Training Acc: 0.2734375 	Val Acc: 0.1875
Epoch: 6 	Loss: 226.97059273719788 	Training Acc: 0.3046875 	Val Acc: 0.1875
Epoch: 7 	Loss: 176.93903875350952 	Training Acc: 0.28125 	Val Acc: 0.1875
Epoch: 8 	Loss: 176.96955049037933 	Training Acc: 0.28125 	Val Acc: 0.1875
Epoch: 9 	Loss: 176.9579074382782 	Training Acc: 0.28125 	Val Acc: 0.1875
