In [1]:
import sys
import os
import math
import random
import cv2
import numpy as np
from PIL import Image
import pandas as pd
import matplotlib.pyplot as plt


def gallery(array, ncols=3):
    if isinstance(array, list):
        array_paths = array
        array = []
        for path in array_paths:
            img_np = cv2.imread(path)
            array.append(img_np)
        array = np.array(array)
    nindex, height, width, intensity = array.shape
    nrows = math.ceil(nindex / ncols)
    if nrows*ncols > len(array):
        # add zero images
        buffer = []
        for k in range(nrows*ncols - len(array)):
            buffer.append(np.zeros((array.shape[1], array.shape[2], array.shape[3]), dtype=np.uint8))
        array = np.concatenate((array, np.array(buffer)))
    # want result.shape = (height*nrows, width*ncols, intensity)
    result = (array.reshape(nrows, ncols, height, width, intensity)
              .swapaxes(1, 2)
              .reshape(height*nrows, width*ncols, intensity))
    return result


def show_group(df, N=49, title='', s=2):
    img_paths = gt_df['file'].values
    random.shuffle(img_paths)
    img_paths = img_paths[:N]
    imgs = [cv2.cvtColor(cv2.imread(os.path.join(images_path, path)), cv2.COLOR_BGR2RGB) for path in img_paths]
    ncols = 9 #math.ceil(math.sqrt(len(imgs)))
    img_gallery = gallery(np.array(imgs), ncols=ncols)
    plt.figure(figsize=[s*ncols, s*ncols])  # 1 inch per face
    plt.imshow(img_gallery)
    plt.title(title)

# FairFace

In [2]:
gt_path = '/data/face/fairface/fairface_label_train.csv'
# images_path = '/data/face/aligned_rf-r50/fairface'
images_path = '/data/face/fairface/fairface-img-margin125-trainval'

gt_df = pd.read_csv(gt_path)
gt_df

Unnamed: 0,file,age,gender,race,service_test
0,train/1.jpg,50-59,Male,East Asian,True
1,train/2.jpg,30-39,Female,Indian,False
2,train/3.jpg,3-9,Female,Black,False
3,train/4.jpg,20-29,Female,Indian,True
4,train/5.jpg,20-29,Female,Indian,True
...,...,...,...,...,...
86739,train/86740.jpg,20-29,Male,Indian,True
86740,train/86741.jpg,10-19,Male,Indian,True
86741,train/86742.jpg,more than 70,Female,Indian,True
86742,train/86743.jpg,10-19,Female,Black,True


## Male/Female

In [3]:
for gender in ['Male', 'Female']:
    show_group(gt_df[gt_df['gender'] == gender], title=gender)

NameError: name 'random' is not defined

## Age

In [None]:
for age in ['0-2', '3-9', '10-19', '20-29', '30-39', '40-49', '50-59', '60-69', 'more than 70']:
    show_group(gt_df[gt_df['age'] == age], title=age)

## Race

In [None]:
for race in ['East Asian', 'Indian', 'Black', 'White', 'Middle Eastern', 'Latino_Hispanic', 'Southeast Asian']:
    show_group(gt_df[gt_df['race'] == race], title=race)