In [7]:
import os
import numpy as np
import pandas as pd
import wfdb

# Path to the dataset
data_files = ["mit-bih-malignant-ventricular-ectopy-database/" + file 
              for file in os.listdir("mit-bih-malignant-ventricular-ectopy-database/") if ".dat" in file]

dfs_ecg = []
dfs_rhythms = []

for participant, file in enumerate(data_files):
    print(f"Participant: {participant + 1}/{len(data_files)}")

    # Read ECG signal
    record_name = file[:-4]  # Remove the ".dat" extension to get the record name
    sample, fields = wfdb.rdsamp(record_name)
    data = pd.DataFrame({"ECG": sample[:, 1]})  # Assuming single-lead ECG
    
    # Initialize gender and age as unknown
    gender = "Unknown"
    age = "Unknown"

    # Parse gender and age from comments if available
    if 'comments' in fields:
        comments = fields['comments']
        print(comments)
        try:
            # Assuming comments are structured like "25 Male" or "30 Female"
            age = int(comments[0].split(' ')[0])
            gender = comments[0].split(' ')[1]
        except (IndexError, ValueError):
            print(f"Warning: Could not parse age or gender in {record_name}.hea")

    # Add participant information
    data["Participant"] = f"malignant_{participant:02d}"
    data["Sample"] = range(len(data))
    data["Sampling_Rate"] = 250  # Update this if the sampling rate is different
    data["Database"] = "malignant"
    data["Gender"] = gender
    data["Age"] = age

    # Read rhythm annotations
    anno = wfdb.rdann(record_name, 'atr')
    rhythm_annotations = pd.DataFrame({
        "Rpeaks": anno.sample,  # Time of rhythm changes
        "Rhythm": anno.aux_note  # Rhythm labels
    })
    rhythm_annotations["Participant"] = f"malignant_{participant:02d}"
    rhythm_annotations["Sampling_Rate"] = 250
    rhythm_annotations["Database"] = "malignant"
    rhythm_annotations["Gender"] = gender
    rhythm_annotations["Age"] = age

    # Store with the rest
    dfs_ecg.append(data)
    dfs_rhythms.append(rhythm_annotations)

# Save results
df_ecg = pd.concat(dfs_ecg)
df_ecg.to_csv("malignant_ECGs.csv", index=False)

print("Processing complete. Files saved as 'malignant_ECGs.csv' and 'malignant_Rhythms.csv'.")


Participant: 1/22
[]
Participant: 2/22
[]
Participant: 3/22
[]
Participant: 4/22
[]
Participant: 5/22
[]
Participant: 6/22
[]
Participant: 7/22
[]
Participant: 8/22
[]
Participant: 9/22
[]
Participant: 10/22
[]
Participant: 11/22
[]
Participant: 12/22
[]
Participant: 13/22
[]
Participant: 14/22
[]
Participant: 15/22
[]
Participant: 16/22
[]
Participant: 17/22
[]
Participant: 18/22
[]
Participant: 19/22
[]
Participant: 20/22
[]
Participant: 21/22
[]
Participant: 22/22
[]
Processing complete. Files saved as 'malignant_ECGs.csv' and 'malignant_Rhythms.csv'.


In [10]:
import os

# 路径到数据集
folder_path = "mit-bih-malignant-ventricular-ectopy-database/"  # 替换为实际路径

# 列出文件夹中所有 .hea 文件
hea_files = [file for file in os.listdir(folder_path) if file.endswith(".hea")]

# 打印每个 .hea 文件的内容
for file in hea_files:
    file_path = os.path.join(folder_path, file)
    print(f"Contents of {file}:\n")
    with open(file_path, 'r') as f:
        content = f.read()
        print(content)
        print("-" * 50)  # 分隔线


Contents of 430.hea:

430 2 250 525000
430.dat 212 200 12 0 -72 30566 0 ECG
430.dat 212 200 12 0 -45 29259 0 ECG

--------------------------------------------------
Contents of 424.hea:

424 2 250 525000
424.dat 212 200 12 0 15 22752 0 ECG
424.dat 212 200 12 0 28 22930 0 ECG

--------------------------------------------------
Contents of 425.hea:

425 2 250 525000
425.dat 212 200 12 0 75 6731 0 ECG
425.dat 212 200 12 0 94 24939 0 ECG

--------------------------------------------------
Contents of 610.hea:

610 2 250 525000
610.dat 212 200 12 0 117 -3714 0 ECG
610.dat 212 200 12 0 127 -24049 0 ECG

--------------------------------------------------
Contents of 611.hea:

611 2 250 525000
611.dat 212 200 12 0 -8 -31739 0 ECG
611.dat 212 200 12 0 3 -13132 0 ECG

--------------------------------------------------
Contents of 427.hea:

427 2 250 525000
427.dat 212 200 12 0 -128 21014 0 ECG
427.dat 212 200 12 0 17 31678 0 ECG

--------------------------------------------------
Contents of 429