 # Augments and saves KDEF Data to Preprocessed Folder

#### Running this notebook will:
- firstly create a pd dataframe with emotion labels and img paths for KDEF data
- augment and save to training/validation/test folders with an equal number of samples per emotion

#### NOTE: KDEF basically has the same number of images per class, so you can just run this notebook to directly split and save the data into the Preprocessed data folder

#### Dataset download instructions - do this before running notebook

- KDEF dataset: https://www.kdef.se/?fbclid=IwAR102R1eWOMWp87LQK83DDGRsNVLvofz1DdV6TtCGl5tFivNmo3KzEbJc84
Download 'KDEF_and_AKDEF' from above link, and put it under '../FER_Resnet'

In [1]:
# imports

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import os, os.path
import numpy as np
import cv2
import matplotlib.pyplot as plt
import pandas as pd
import scipy.ndimage
from PIL import Image
import preprocess as p

#### Manually delete "black images"
- Note: black images are supposed to have all 0 entries, but this doesn't seem to be the case with our images
- We can manually delete all the iamges that are "black"

In [2]:
p.delete_black_images_KDEF()

Successfully deleted 0 black images


#### Create dataframe of KDEF images. Columns are emotion labels and img paths

In [3]:
KDEF_df = p.get_KDEF_df(sideview=False, halfside=True, straight=True)
KDEF_df["emotion"].value_counts()

3    420
2    420
0    420
5    419
6    419
4    419
1    418
Name: emotion, dtype: int64

In [4]:
KDEF_df

Unnamed: 0,emotion,img_path
2,2,../datasets/KDEF_and_AKDEF/KDEF/AF01/AF01AFHL.JPG
3,2,../datasets/KDEF_and_AKDEF/KDEF/AF01/AF01AFHR.JPG
4,2,../datasets/KDEF_and_AKDEF/KDEF/AF01/AF01AFS.JPG
7,0,../datasets/KDEF_and_AKDEF/KDEF/AF01/AF01ANHL.JPG
8,0,../datasets/KDEF_and_AKDEF/KDEF/AF01/AF01ANHR.JPG
...,...,...
4884,4,../datasets/KDEF_and_AKDEF/KDEF/BM35/BM35SAHR.JPG
4885,4,../datasets/KDEF_and_AKDEF/KDEF/BM35/BM35SAS.JPG
4888,5,../datasets/KDEF_and_AKDEF/KDEF/BM35/BM35SUHL.JPG
4889,5,../datasets/KDEF_and_AKDEF/KDEF/BM35/BM35SUHR.JPG


#### Augment KDEF images and save to train/validate/test folders 

In [5]:
'''
Reads and resizes an image from the mux dataset to 48 x 48
Also makes it grayscale
'''

def read_and_resize(img_path, plot_img):   
    """
    Args:
        img_path: relative path to the KDEF image
        
    Returns: the resized image as numpy array
    """ 
    # Read and resize image with OpenCV 
    img_pixels = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2GRAY) # cv2 reads in BGR, need to convert to grayscale

    # Perform a crop so that the image is square i.e. 560x560
    crop_img = np.asarray(img_pixels[100:660, 0:560])
    
    # Resize to 48 x 48
    crop_img = cv2.resize(crop_img, dsize=(48, 48), interpolation=cv2.INTER_CUBIC)
    img_data = np.asarray(crop_img)
    
    # Plot if plot_img = True
    #if plot_img == True:
        #plt.imshow(crop_img)
        #cv2.imshow("Converted Image",crop_img)
        #print("img array shape:",crop_img.shape)
        #cv2.waitKey(0)
           
    return img_data

In [6]:
read_and_resize(img_path="./KDEF_and_AKDEF/KDEF/AF01/AF01AFFR.JPG", plot_img = True)

error: OpenCV(4.4.0) C:\Users\appveyor\AppData\Local\Temp\1\pip-req-build-9d_dfo3_\opencv\modules\imgproc\src\color.cpp:182: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'


In [None]:
'''
Returns an array of images as 3 pytorch tensors: [Original, Flipped, Rotated by 5 degrees]
'''

def augument(img):
    
    # Original Image and Add RGB channels
    img_tensor = torch.from_numpy(np.copy(img)).unsqueeze(0).repeat(3,1,1)
    # Rotate Image and add RGB channels
    img_rotated_tensor = torch.from_numpy(scipy.ndimage.rotate(np.copy(img), 5, order=1, reshape=False)).unsqueeze(0).repeat(3,1,1)
    # Flip image and add RGB channels
    img_flipped_tensor = (torch.from_numpy(np.fliplr(np.copy(img)).copy())).repeat(3,1,1)
    
    return [img_tensor, img_rotated_tensor, img_flipped_tensor]

In [None]:
def preprocess_and_save(df, total_images):
    
    """
    Preprocesses and saves KDEF data to the preprocessed data directory. Splits into train/validate/test and ensures equal 
    number of samples per class are saved for the KDEF dataset.
    
    Images are saved as "KDEF" + <integer> + <augmentation>

    Args: 
        KDEF dataframe created above
        total_images = number of images per class
    
    """
    
    # Paths to save processed tensor
    train_path = './ProcessedNoCutoffData/train/' 
    val_path = './ProcessedNoCutoffData/validate/' 
    test_path = './ProcessedNoCutoffData/test/' 
    
    # Check if the save paths exist, make them if they don't
    if not os.path.isdir(train_path):
        os.mkdir(train_path)
    if not os.path.isdir(val_path):
        os.mkdir(val_path)
    if not os.path.isdir(test_path):
        os.mkdir(test_path)
        
    # Decides when to save to which folder
    train_count = (total_images * 0.7)               
    val_count = train_count + total_images * 0.15    
    test_count = total_images
    
    # Current count - counts how many images in each class have been saved currently
    # First 70% saves to train_path, next 15% saves to val_path, next 15% saves to test path
    count = [0, 0, 0, 0, 0, 0, 0]
    
    num_imgs = len(df.index)
    
    for i in range(num_imgs):
        
        # retrieve img path 
        img_path = df.iloc[i]["img_path"]    
        
        # crop/resize to 48x48
        img_data = read_and_resize(img_path,plot_img=False)
        
        # Normalize to between 0 and 1
        img_data = img_data/255   # Since values are in (0, 255)
        
        # Augument: Add RGB, Flip, Rotate
        # Returns 3 tensors
        augumented_images = augument(img_data)
        emotion = df.iloc[i]["emotion"]
        
        # Decide whether to save to train, val, or test
        if(count[int(emotion)] < train_count):
            folder_name = train_path + str(emotion)
        elif(count[int(emotion)] < val_count):
            folder_name = val_path + str(emotion)
        elif(count[int(emotion)] < test_count):
            folder_name = test_path + str(emotion)
        
        # Save if total images for emotion is less than total images
        if(count[int(emotion)] < total_images):
            
            # Create directory for emotion if it doesn't already exist
            if not os.path.isdir(folder_name):
                os.mkdir(folder_name)

            # Save original and augmented images
            torch.save(augumented_images[0], folder_name + '/KDEF' + str(i) + '.tensor')
            torch.save(augumented_images[1], folder_name + '/KDEF' + str(i) + '_rotated.tensor')
            torch.save(augumented_images[2], folder_name + '/KDEF' + str(i) + '_flipped.tensor')
            count[int(emotion)] += 3

    return True

In [None]:
preprocess_and_save(df=KDEF_df, total_images=1257)