In [None]:
import h5py
import io
import pandas as pd
import tensorflow as tf
from tensorflow.keras.utils import img_to_array, load_img

test_images_path = 'test-image.hdf5'

def get_images_dataframe(hdf5_file_path: str) -> pd.DataFrame:

    image_arrays = []

    with h5py.File(hdf5_file_path, 'r') as f:

        for dataset_name in f.keys():

            # Loading the image data from the current dataset
            # 'f[dataset_name][()]' retrieves the binary image data
            # 'io.BytesIO' converts the binary data into a format that can be read as an image
            image_grayscale = load_img(io.BytesIO(f[dataset_name][()]), color_mode='grayscale')

            image_array = img_to_array(image_grayscale) / 255.0

            flattened_image = tf.image.resize(image_array, (32, 32)).numpy().flatten()

            # Adding a new column "image_code" to the table that stores the dataset name (unique identifier for each image)
            df_image_pixels = pd.DataFrame([flattened_image])
            df_image_pixels["image_code"] = str(dataset_name)
            
            image_arrays.append(df_image_pixels)

    return pd.concat(image_arrays, ignore_index=True)

df_images = get_images_dataframe(test_images_path)