 # Augments and saves KDEF Data to Preprocessed Folder

#### Running this notebook will:
- firstly create a pd dataframe with emotion labels and img paths for KDEF data
- augment and save to training/validation/test folders with an equal number of samples per emotion

#### NOTE: KDEF basically has the same number of images per class, so you can just run this notebook to directly split and save the data into the Preprocessed data folder

#### Dataset download instructions - do this before running notebook

- KDEF dataset: https://www.kdef.se/?fbclid=IwAR102R1eWOMWp87LQK83DDGRsNVLvofz1DdV6TtCGl5tFivNmo3KzEbJc84
Download 'KDEF_and_AKDEF' from above link, and put it under '../FER_Resnet'

In [1]:
# imports

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import os, os.path
import numpy as np
import cv2
import matplotlib.pyplot as plt
import pandas as pd
import scipy.ndimage
from PIL import Image

In [2]:
def get_label_KDEF(file_name):
    """Returns the label of KDEF image given its file name
    Args: file_name is a string of the full name of the file, e.g."AF03DIFL.JPG"
    """
    emos = {
        "AN": "0", # anger
        "DI": "1", # disgusted
        "AF": "2", # fear
        "HA": "3" , # happy
        "SA": "4" , # sad
        "SU": "5" , # surprise
        "NE": "6" , # neutral
                }
    return emos[str(file_name[4:6])]
    

In [3]:
get_label_KDEF("AF03DIFL.JPG")

'1'

#### Visualize a "black image" to see what the np values are like
- Note: black images are supposed to have all 0 entries, but this doesn't seem to be the case with our images
- We can manually delete all the iamges that are "black"

In [4]:
# Black image
img = Image.open("./KDEF_and_AKDEF/KDEF/AF01/AF01SUFR.JPG")
img_array = np.asarray(img)
print(img_array)

FileNotFoundError: [Errno 2] No such file or directory: './KDEF_and_AKDEF/KDEF/AF01/AF01SUFR.JPG'

In [5]:
# Non-black image

img = Image.open("./KDEF_and_AKDEF/KDEF/AF01/AF01AFFR.JPG")
img_array = np.asarray(img)
print(img_array)
print('shape:',img_array.shape)

[[[143 127 101]
  [144 128 102]
  [149 132 104]
  ...
  [144 129 108]
  [147 131 108]
  [146 130 107]]

 [[145 129 103]
  [141 125  99]
  [146 129 101]
  ...
  [141 126 103]
  [139 123 100]
  [140 124 101]]

 [[147 131 105]
  [143 127 101]
  [147 130 102]
  ...
  [140 125 102]
  [140 125 102]
  [142 127 104]]

 ...

 [[ 85  71  62]
  [ 78  64  55]
  [ 76  62  53]
  ...
  [123 107  84]
  [125 105  81]
  [122 102  77]]

 [[ 91  77  68]
  [ 87  73  64]
  [ 85  71  62]
  ...
  [119 103  80]
  [121 101  77]
  [122 100  77]]

 [[ 90  76  67]
  [ 87  73  64]
  [ 87  73  64]
  ...
  [123 107  84]
  [124 103  82]
  [125 103  80]]]
shape: (762, 562, 3)


#### Manually delete "black images"

In [6]:
black_imgs = ["BM24DIFL",'BM22DIHL','BM21DIFL','AM34DIFR','AM25DIFL','AM20DIHL','AF11NEHL','AF10AFFR','AF01SUFR']
for img in black_imgs:
    os.remove('./KDEF_and_AKDEF/KDEF/' + img[0:4] + '/' + img + '.JPG')
print("Successfully deleted black images")

FileNotFoundError: [WinError 2] The system cannot find the file specified: './KDEF_and_AKDEF/KDEF/BM24/BM24DIFL.JPG'

#### Create dataframe of KDEF images. Columns are emotion labels and img paths

In [7]:
def get_KDEF_df(sideview, halfside, straight):
    """
    Loads all relevant images into a dataframe consisting of emotion label and the image path.

    Args:
        sideview: True/False, includes datasets that are the full left/right profiles
        halfside: True/False, includes datasets that are the half left/right profiles
        straight: True/False, includes datasets that are a straight profile
    """
    
    KDEF_df = pd.DataFrame(columns=["emotion", "img_path"])
    
    # Path to KDEF folder
    KDEF_path = './KDEF_and_AKDEF/KDEF/'
        
    # initialize df row counter
    row = 0
        
    # Iterate through KDEF folder and append jpgs and their labels to the KDEF dataframe
    for folder in os.listdir(KDEF_path):
        path = KDEF_path + str(folder)
        
        for filename in os.listdir(path):
            
            if (filename.endswith('FL.JPG') or filename.endswith('FR.JPG')) and sideview==True:
                KDEF_df.loc[row] = [get_label_KDEF(filename), path + '/' + filename]
                
            elif (filename.endswith('HR.JPG') or filename.endswith('HL.JPG')) and halfside==True:
                KDEF_df.loc[row] = [get_label_KDEF(filename), path + '/' + filename]
                    
            elif filename.endswith('S.JPG') and straight==True:
                KDEF_df.loc[row] = [get_label_KDEF(filename), path + '/' + filename]
                
            row += 1
                    
    return KDEF_df
                

In [8]:
KDEF_df = get_KDEF_df(sideview=False, halfside=True, straight=True)

In [9]:
KDEF_df = KDEF_df[KDEF_df.emotion != '1']
KDEF_df["emotion"].value_counts()

3    420
2    420
0    420
4    419
6    419
5    419
Name: emotion, dtype: int64

In [10]:
KDEF_df.shape

(2517, 2)

In [11]:
print(KDEF_df.iloc[0])

emotion                                           2
img_path    ./KDEF_and_AKDEF/KDEF/AF01/AF01AFHL.JPG
Name: 2, dtype: object


In [12]:
print(KDEF_df.iloc[-1])

emotion                                          5
img_path    ./KDEF_and_AKDEF/KDEF/BM35/BM35SUS.JPG
Name: 4890, dtype: object


#### Augment KDEF images and save to train/validate/test folders 

In [13]:
'''
Reads and resizes an image from the mux dataset to 48 x 48
Also makes it grayscale
'''

def read_and_resize(img_path, plot_img):   
    """
    Args:
        img_path: relative path to the KDEF image
        
    Returns: the resized image as numpy array
    """ 
    # Read and resize image with OpenCV 
    img_pixels = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2GRAY) # cv2 reads in BGR, need to convert to grayscale

    # Perform a crop so that the image is square i.e. 560x560
    crop_img = np.asarray(img_pixels[100:660, 0:560])
    
    # Resize to 48 x 48
    crop_img = cv2.resize(crop_img, dsize=(48, 48), interpolation=cv2.INTER_CUBIC)
    img_data = np.asarray(crop_img)
    
    # Plot if plot_img = True
    #if plot_img == True:
        #plt.imshow(crop_img)
        #cv2.imshow("Converted Image",crop_img)
        #print("img array shape:",crop_img.shape)
        #cv2.waitKey(0)
           
    return img_data

In [14]:
read_and_resize(img_path="./KDEF_and_AKDEF/KDEF/AF01/AF01AFFR.JPG", plot_img = True)

array([[133, 131, 128, ..., 127, 129, 130],
       [129, 132, 133, ..., 128, 132, 130],
       [130, 136, 129, ..., 132, 128, 130],
       ...,
       [  8,   9,   9, ..., 114, 111, 117],
       [  6,   8,   9, ..., 114, 112, 115],
       [  9,  10,   8, ..., 115, 109, 115]], dtype=uint8)

In [15]:
'''
Returns an array of images as 3 pytorch tensors: [Original, Flipped, Rotated by 5 degrees]
'''

def augument(img):
    
    # Original Image and Add RGB channels
    img_tensor = torch.from_numpy(np.copy(img)).unsqueeze(0).repeat(3,1,1)
    # Rotate Image and add RGB channels
    img_rotated_tensor = torch.from_numpy(scipy.ndimage.rotate(np.copy(img), 5, order=1, reshape=False)).unsqueeze(0).repeat(3,1,1)
    # Flip image and add RGB channels
    img_flipped_tensor = (torch.from_numpy(np.fliplr(np.copy(img)).copy())).repeat(3,1,1)
    
    return [img_tensor, img_rotated_tensor, img_flipped_tensor]

In [16]:
def preprocess_and_save(df, total_images):
    
    """
    Preprocesses and saves KDEF data to the preprocessed data directory. Splits into train/validate/test and ensures equal 
    number of samples per class are saved for the KDEF dataset.
    
    Images are saved as "KDEF" + <integer> + <augmentation>

    Args: 
        KDEF dataframe created above
        total_images = number of images per class
    
    """
    
    # Paths to save processed tensor
    train_path = './ProcessedNoCutoffData/train/' 
    val_path = './ProcessedNoCutoffData/validate/' 
    test_path = './ProcessedNoCutoffData/test/' 
    
    # Check if the save paths exist, make them if they don't
    if not os.path.isdir(train_path):
        os.mkdir(train_path)
    if not os.path.isdir(val_path):
        os.mkdir(val_path)
    if not os.path.isdir(test_path):
        os.mkdir(test_path)
        
    # Decides when to save to which folder
    train_count = (total_images * 0.7)               
    val_count = train_count + total_images * 0.15    
    test_count = total_images
    
    # Current count - counts how many images in each class have been saved currently
    # First 70% saves to train_path, next 15% saves to val_path, next 15% saves to test path
    count = [0, 0, 0, 0, 0, 0, 0]
    
    num_imgs = len(df.index)
    
    for i in range(num_imgs):
        
        # retrieve img path 
        img_path = df.iloc[i]["img_path"]    
        
        # crop/resize to 48x48
        img_data = read_and_resize(img_path,plot_img=False)
        
        # Normalize to between 0 and 1
        img_data = img_data/255   # Since values are in (0, 255)
        
        # Augument: Add RGB, Flip, Rotate
        # Returns 3 tensors
        augumented_images = augument(img_data)
        emotion = df.iloc[i]["emotion"]
        
        # Decide whether to save to train, val, or test
        if(count[int(emotion)] < train_count):
            folder_name = train_path + str(emotion)
        elif(count[int(emotion)] < val_count):
            folder_name = val_path + str(emotion)
        elif(count[int(emotion)] < test_count):
            folder_name = test_path + str(emotion)
        
        # Save if total images for emotion is less than total images
        if(count[int(emotion)] < total_images):
            
            # Create directory for emotion if it doesn't already exist
            if not os.path.isdir(folder_name):
                os.mkdir(folder_name)

            # Save original and augmented images
            torch.save(augumented_images[0], folder_name + '/KDEF' + str(i) + '.tensor')
            torch.save(augumented_images[1], folder_name + '/KDEF' + str(i) + '_rotated.tensor')
            torch.save(augumented_images[2], folder_name + '/KDEF' + str(i) + '_flipped.tensor')
            count[int(emotion)] += 3

    return True

In [17]:
preprocess_and_save(df=KDEF_df, total_images=1257)

True