This notebook will be used to test helper functions

In [1]:
import numpy as np
import h5py
from PIL import Image
import csv
from glob import iglob
import os
from pathlib import Path
from sklearn.model_selection import KFold

In [2]:
import matplotlib.pyplot as plt
%matplotlib inline

### Reference: https://realpython.com/storing-images-in-python/#reading-many-images (Accessed 18/03/2022)

In [None]:
def display_frame(frame, title):
    plt.figure(figsize=(10, 10))
    plt.title(title)
    plt.imshow(frame)

In [None]:
# with Image.open(r'D:\OneDrive\Documents\rPPG-Projects\Datasets-Preprocessed\UBFC2\DATASET_2\1\subject1\0.png') as image:
#     print(np.array(image))

In [None]:
# with Image.open(r'D:\OneDrive\Documents\rPPG-Projects\Datasets-Preprocessed\UBFC2\DATASET_2\1\subject1\0.png') as image:
#     image = np.array(image) /255
#     print(image)

In [None]:
def read_many_disk(num_images_required, imagesPath, gtPath):
    
    images, labels = [], []

    # Variable to count the number of images in the folder
    num_images = 0

     # For each frame
    for imagePath in imagesPath:
        
        # Store each frame 
        # print(f'[INFO] Working on Image: {image}')

        # Read and resize the image
        # Reference: https://pillow.readthedocs.io/en/stable/reference/Image.html (Accessed 21/03/2022)

        with Image.open(imagePath) as image:
            image_resized = image.resize((36, 36))

            # Normalize the image
            image_normalized = np.array(image_resized) / 255.0

            images.append(image_normalized)
        
        num_images += 1

        # print(f'[INFO] images list contains: {len(images)} elements  of type {type(images[0])}')


    with open(gtPath, "r") as csvfile:
        reader = csv.reader(
            csvfile, delimiter=","
        )

        for idx, row in enumerate(reader):
            
            # Skip the title row
            if idx > 0:
                
                # Skip the ppg recording for the last frame as it doesn't have a successor for normalization. 
                # This frame will only be used to normalize the 2nd last frame.
                if len(labels) < num_images - 1:        
                    ppg = float(row[2])                 # row[2] is the column containing ppg signal (label)
                    # print(f'[INFO] ppg: {ppg}')
                    labels.append(ppg)  

    # print(f'[INFO] labels list contains: {len(labels)} elements  of type {type(labels[0])}')

    # List containing the images with normailzed frames added in the 3rd dimension
    expanded_images = []

    # Perform frame normalization using every two adjacent frames as (c(t + 1) - c(t))/(c(t) + c(t + 1))
    # where c is the channel of the frame.
    for idx, image in enumerate(images):
        if idx < num_images - 1:
            for i in range(3):

                # print(f'[INFO] Shape of Frame {idx}: {(images[idx][:, :, i]).shape}')

                # Displaying the frame at channel i
                # display_frame(images[idx][:, :, i], f'Frame {idx} Channel {i}')   

                # Normalized frame calculated by the formula above
                normalizedFrame = (images[idx + 1][:, :, i] - images[idx][:, :, i]
                ) / (images[idx][:, :, i] + images[idx + 1][:, :, i])

                # Displaying the normalized frame at channel i
                # display_frame(normalizedFrame, f'Normalized Frame {idx} Channel {i}')

                # print(f'[INFO] Shape of Normalized Frame {idx}: {normalizedFrame.shape}')

                # Adding an extra dimension to the normalized frame to make it possible to append to original image
                normalizedFrame = np.expand_dims(normalizedFrame, axis=2)

                image = np.append(image, normalizedFrame, axis=2)
            
            #     print(f'shape of normalizedFrame: {normalizedFrame.shape}')
            #     print(f'shape of image: {image.shape}')

            # print(f'shape of image after going through each channel: {image.shape}')
            
            # Storing the expanded images 
            expanded_images.append(image)

    # print(f'Adding extra blank images and labels')

    # Add blank images to the expanded images list until the length of the list is equal to the number of images required
    while len(expanded_images) < num_images_required:
        expanded_images.append(np.zeros((36, 36, 6)))

    # print(f'Done adding blank images')

    # Add blank labels to the labels list until the length of the list is equal to the number of images required
    while len(labels) < num_images_required:
        labels.append(0.0)

    # For each image in the expanded images, subtract the mean and scale to unit standard deviation
    for idx, image in enumerate(expanded_images):
        for i in range(6):
            image[:, :, i] -= np.mean(image[:, :, i])
            image[:, :, i] /= np.std(image[:, :, i])
        expanded_images[idx] = image


    # print(f'Done adding blank labels, returning from function')

    return np.array(expanded_images), np.array(labels)

In [None]:
def store_many_hdf5(target_dir, subID, images, labels):
    """ Stores an array of images to HDF5.
        Parameters:
        ---------------
        target_dir:  path to the directory where the HDF5 file will be stored.
        subID:       subject ID.
        images       images array, (N, W, H, NC) to be stored (where N: number of images, W: width, H: height, NC: number of channels).
        labels       labels array, (N, 1) to be stored

        Returns:
        ----------
        pathToTarget    path to the HDF5 file.
    """

    pathToTarget = os.path.join(target_dir, f"{subID}.h5")

    # Create a new HDF5 file
    file = h5py.File(pathToTarget, "w")

    # Create a dataset in the file
    dataset = file.create_dataset(
        "images", data=images
    )
    meta_set = file.create_dataset(
        "labels", data=labels
    )
    file.close()
    
    return pathToTarget

In [None]:
def read_hdf5(file_path):
    """ Reads images from HDF5.
        Parameters:
        ---------------
        path   path to file

        Returns:
        ----------
        images       images array, (N, W, H, NC) to be stored (where N: number of images, W: width, H: height, NC: number of channels).
        labels       labels array, (N, 1) to be stored
    """
    images, labels = [], []

    # Open the HDF5 file
    file = h5py.File(file_path, "r+")

    images = np.array(file["/images"]).astype("float64")
    labels = np.array(file["/labels"]).astype("float64")

    return images, labels


In [None]:
images, _ = read_hdf5(r'D:\OneDrive\Documents\rPPG-Projects\Datasets-Preprocessed\hdf5\UBFC2\DATASET_2\subject1.h5')

In [None]:
def split_subj(data_dir, cv_split):
    """ Splits the data from data_dir into train and test sets.
    Parameters:
    ---------------
    data_dir:  path to the directory containing the data.
    cv_split:  percentage of the data to be used for testing (written as a float, e.g. 50% = 0.5).

    Returns:
    ----------
    subTrain: list of paths to the training data.
    subTest:  list of paths to the testing data.
"""
    # Get the total no. of subjects
    num_sub = len(os.listdir(data_dir))

    # Store the paths of each subject into a list
    sub_paths = [os.path.join(data_dir, sub) for sub in os.listdir(data_dir)]

    # Get the no. of training paths
    num_train = int(num_sub * cv_split)
    
    # Create a list of training paths
    subTrain = sub_paths[:num_train]

    # Create a list of testing paths
    subTest = sub_paths[num_train:]

    return subTrain, subTest


In [None]:
# # Function that returns the minimum number of frames of all videos in the dataset
# def get_min_num_of_frames(globExp):

#     # Variables that will store the min no. of frames and the corresponding directory of the subject 
#     min_num_files = float('inf')
#     folder_with_min_num_files = ''

#     # Get iterator over different subjects
#     imageDirs = iglob(globExp)

#     for path_ in imageDirs:

#         num_images = len(os.listdir(path_))
        
#         if num_images < min_num_files:
#             min_num_files = num_images
#             folder_with_min_num_files = path_

#     # print(f'folder with min images: { folder_with_min_num_files} \nfile count:{min_num_files} \n')

#     return min_num_files

In [None]:
# Function that returns the maximum number of frames of all videos in the dataset
def get_max_num_of_frames(globExp):

    # Variables that will store the max no. of frames and the corresponding directory of the subject 
    max_num_files = -1
    folder_with_max_num_files = ''

    # Get iterator over different subjects
    imageDirs = iglob(globExp)

    for path_ in imageDirs:

        num_images = len(os.listdir(path_))
        
        if num_images > max_num_files:
            max_num_files = num_images
            folder_with_max_num_files = path_

    # print(f'folder with max images: { folder_with_max_num_files} \nfile count:{max_num_files} \n')

    return max_num_files

In [None]:
# get_max_num_of_frames(r'D:\\OneDrive\\Documents\\rPPG-Projects\\Datasets-Preprocessed\\UBFC2\\DATASET_2\\[0-9]*\\subject[0-9]*')

In [None]:
# Get iterator over different subjects
imageDirs = iglob("D:\\OneDrive\\Documents\\rPPG-Projects\\Datasets-Preprocessed\\UBFC2\\DATASET_2\\[0-9]*\\subject[0-9]*")

# Lists that will contain the images, labels and the subject IDs
images, labels, subjects = [], [], []

# Get the maximum number of frames
max_num_images = get_max_num_of_frames(r'D:\\OneDrive\\Documents\\rPPG-Projects\\Datasets-Preprocessed\\UBFC2\\DATASET_2\\[0-9]*\\subject[0-9]*')

# Make maximum no. of images a multiple of 10 (add extra frames)
max_num_images = max_num_images + (10 - max_num_images % 10)

# For each subject
for idx, path_ in enumerate(imageDirs):
    # print(f"[INFO] Working in {path_}")

    # if idx > 9:
    #     break  # Testing only for first 10 subjects (for now)
    
    # print(f'[INFO] New maximum number of images: {max_num_images}')
    
    # Get the path to the frames
    imagesPath = iglob(os.path.join(path_, "*.png"))

    # Get subject number from path
    subID = path_.split("\\")[-1]

    # Add the subject ID to subjects list
    subjects.append(subID)

    # Get the path to the csv file
    gtPath = path_.replace(subID, r"0\phys.csv")    

    images, labels = read_many_disk(max_num_images, imagesPath, gtPath)

     # Create a new directory for the hdf5 file of subject
    hdf5_dir = Path(r'D:\OneDrive\Documents\rPPG-Projects\Datasets-Preprocessed\hdf5\UBFC2\DATASET_2')
    if not os.path.exists(hdf5_dir): 
        hdf5_dir.mkdir(parents=True, exist_ok=True)

    # Store the images and labels at the target path
    storedFilePath = store_many_hdf5(hdf5_dir, subID, images, labels)

    # print(f'[INFO] Stored file at {storedFilePath}')

    # Reading the stored data for each subject
    images_, labels_ = read_hdf5(storedFilePath)

    # print(f'[INFO] Read {len(images_)} images and {len(labels_)} labels')

    # Checking if the read_image function works by comparing stored images and labels with the images and labels 
    # read from the file 
    assert(images.all()==images_.all())
    assert(labels.all()==labels_.all())

    # print(f'all good')

# print(f'shape of images: {np.shape(images)}, type: {type(images)}')
# print(f'shape of labels: {np.shape(labels)}, type: {type(labels)}')


In [22]:
data_dir = r'D:\OneDrive\Documents\rPPG-Projects\Datasets-Preprocessed\hdf5\UBFC2\DATASET_2'

# Store the paths of each subject into a list
sub_paths = [os.path.join(data_dir, sub) for sub in os.listdir(data_dir)]



In [26]:
kfold = KFold(n_splits=5, shuffle=True, random_state=1)

data = np.array(sub_paths)
# enumerate splits
for train, test in kfold.split(data):
	# print(f'train: {data[train]}, test: {data[test]}')
	# print(train)

[ 0  1  4  5  6  7  8  9 10 11 12 13 14 15 16 18 20 21 22 24 25 26 27 28
 30 32 33 34 36 37 38 40 41]
[ 0  1  2  3  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 23 24 25 26
 28 29 31 35 36 37 39 40 41]
[ 0  1  2  3  4  5  6  7  8  9 11 12 13 15 16 17 19 21 22 23 25 27 28 29
 30 31 32 33 34 35 37 38 39 40]
[ 0  2  3  4  5  8  9 10 11 12 14 15 17 18 19 20 21 22 23 24 26 27 29 30
 31 32 33 34 35 36 37 38 39 41]
[ 1  2  3  4  6  7 10 13 14 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
 31 32 33 34 35 36 38 39 40 41]
