In [2]:
import pandas as pd
import os
import scipy.io as sio
import numpy as np

In [3]:
# Define the path to the HIHD dataset
dataset_path = r"C:\Users\hp\Downloads\HR_IMU_falldetection_dataset-master\HR_IMU_falldetection_dataset-master"
dataset = []
missing_files = []  # Track missing/corrupt files
invalid_files = []  # Track files missing required data
total_expected_files = 0  # Track expected file count

In [4]:
def extract_features(ax, ay, az, heart):
    return {
        'ax_mean': np.mean(ax), 'ay_mean': np.mean(ay), 'az_mean': np.mean(az),
        'ax_std': np.std(ax), 'ay_std': np.std(ay), 'az_std': np.std(az),
        'smv': np.mean(np.sqrt(ax**2 + ay**2 + az**2)),  # Signal Magnitude Vector
        'heart_mean': np.mean(heart), 'heart_std': np.std(heart)
    }


In [5]:
file_count = 0  # Track processed files

# Traverse subjects
for subject in os.listdir(dataset_path):
    subject_path = os.path.join(dataset_path, subject)
    if os.path.isdir(subject_path):  
        for label_folder in ['fall', 'non-fall']:  # Ensure lowercase folder names
            label_path = os.path.join(subject_path, label_folder)
            
            if os.path.isdir(label_path):
                # Count how many .mat files exist
                mat_files = [f for f in os.listdir(label_path) if f.endswith('.mat')]
                total_expected_files += len(mat_files)  # Track total expected files
                
                for scenario_file in mat_files:
                    scenario_path = os.path.join(label_path, scenario_file)
                    try:
                        data = sio.loadmat(scenario_path)

                        # Extract required data
                        ax = data.get('ax', np.array([])).flatten()
                        ay = data.get('ay', np.array([])).flatten()
                        az = data.get('az', np.array([])).flatten()
                        heart = data.get('heart', np.array([])).flatten()

                        # Check if all necessary data exists
                        if ax.size > 0 and ay.size > 0 and az.size > 0 and heart.size > 0:
                            features = extract_features(ax, ay, az, heart)
                            features['label'] = 1 if label_folder == 'fall' else 0
                            features['subject'] = subject
                            features['scenario'] = scenario_file
                            dataset.append(features)
                            file_count += 1
                        else:
                            invalid_files.append(scenario_path)  # Log incomplete files

                    except Exception as e:
                        print(f"Error processing {scenario_path}: {e}")
                        missing_files.append(scenario_path)  # Log unreadable files

In [6]:
# Convert the dataset list to a DataFrame
df = pd.DataFrame(dataset)

# Save the dataset as a CSV file
output_csv_path = r"C:\Users\hp\Downloads\HR_IMU_falldetection_dataset-master\HR_IMU_falldetection_dataset-master\HIHD.csv"
df.to_csv(output_csv_path, index=False)

# Debug: Print missing/corrupt file report
print(f"Total Expected Files: {total_expected_files}")
print(f"Total Processed Files: {file_count}")
print(f"Missing/Unreadable Files: {len(missing_files)}")
print(f"Invalid Data Files (Missing ax, ay, az, or heart): {len(invalid_files)}")

if missing_files:
    print("Unreadable Files:", missing_files)
if invalid_files:
    print("Files Missing Data:", invalid_files)

Total Expected Files: 349
Total Processed Files: 349
Missing/Unreadable Files: 0
Invalid Data Files (Missing ax, ay, az, or heart): 0
