## CSV to H5

For Model Training workflow, this is Step 2.

In [2]:
import pandas as pd
import h5py
import os

In [3]:
def csv_to_h5(csv_path, h5_path):

    # Read the CSV file and exclude the last column, 'label'. CSR / WHC does count and not class so it isnt needed
    data = pd.read_csv(csv_path).iloc[:, :-1]
    
    # Create the .h5 file and save all columns as UTF-8 encoded strings
    with h5py.File(h5_path, 'w') as h5_file:
        for col in data.columns:
            # Convert the column data to strings to ensure compatibility
            str_data = data[col].astype(str).to_numpy()
            h5_file.create_dataset(col, data=str_data, dtype = h5py.string_dtype(encoding = 'utf-8'))
    
    print(f"Created .h5 file at {h5_path} excluding the last column from {csv_path}")

In [5]:
# Example usage

# Define folder paths - switch between train and test manually
csv_folder = r'csv_folder_with_rescaled_annotations'
h5_folder = r'destination_for_h5'
os.makedirs(h5_folder, exist_ok = True) # standard folder check

# This does the conversion
for csv_filename in os.listdir(csv_folder):
    if csv_filename.endswith('.csv'):
        csv_path = os.path.join(csv_folder, csv_filename)
        h5_path = os.path.join(h5_folder, csv_filename.replace('.csv', '.h5'))
        csv_to_h5(csv_path, h5_path)

Created .h5 file at D:\AI\CSRNet\dataset\test\ground_truth\20130320T004348.182606.Cam6_54.h5 excluding the last column from D:\AI\CSRNet\dataset\test\annotations\20130320T004348.182606.Cam6_54.csv
Created .h5 file at D:\AI\CSRNet\dataset\test\ground_truth\20130320T004349.135000.Cam6_11.h5 excluding the last column from D:\AI\CSRNet\dataset\test\annotations\20130320T004349.135000.Cam6_11.csv
Created .h5 file at D:\AI\CSRNet\dataset\test\ground_truth\20130320T004353.706594.Cam6_22.h5 excluding the last column from D:\AI\CSRNet\dataset\test\annotations\20130320T004353.706594.Cam6_22.csv
Created .h5 file at D:\AI\CSRNet\dataset\test\ground_truth\20130320T004359.801846.Cam6_21.h5 excluding the last column from D:\AI\CSRNet\dataset\test\annotations\20130320T004359.801846.Cam6_21.csv
Created .h5 file at D:\AI\CSRNet\dataset\test\ground_truth\20130320T004402.659157.Cam6_43.h5 excluding the last column from D:\AI\CSRNet\dataset\test\annotations\20130320T004402.659157.Cam6_43.csv
Created .h5 fil

You should expect to see ```ground_truth``` populated with ```.h5``` files around 1-2kb in size. Now move to ```make_dataset.ipynb```