In [None]:
# ============================================================
# 0. 설정 및 라이브러리
# ============================================================
import h5py
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import json

# 분석할 HDF5 파일 경로
h5_path = "/home/coder/workspace/data/tykim/holter_h5_nosim/psvt/202006_69_3276007.h5"
segment_index = 0  # 분석할 세그먼트 번호

In [None]:
# Patient 확인
def print_patient_info(h5_path):
    with h5py.File(h5_path, 'r') as f:
        patient = f["patient"]
        print("🧑‍⚕️ Patient Info:")
        for attr in patient.attrs:
            print(f" - {attr}: {patient.attrs[attr]}")
print_patient_info(h5_path)

In [None]:
# h5 파일 구조 출력
import h5py

def print_tree_structure(h5_path):
    with h5py.File(h5_path, "r") as f:
        segment_count = 0

        def print_item(name, obj):
            indent = "  " * name.count('/')
            if name.startswith("ECG/segments/"):
                parts = name.split('/')
                if len(parts) == 3:
                    # segment 번호까지 → 세그먼트 개수만 집계
                    segment_count_list.append(parts[2])
                    if parts[2] != '0':
                        return  # segment/0만 출력
                elif '0' not in name:
                    return  # 0번 segment 외엔 출력 안 함

            print(f"{indent}- {name.split('/')[-1]} ({'Group' if isinstance(obj, h5py.Group) else 'Dataset'})")

        segment_count_list = []
        print(f"📂 HDF5 구조 (예: segments/0 만 출력):\n")
        f.visititems(print_item)
        print(f"\n🧩 총 세그먼트 개수: {len(set(segment_count_list))}")

# 사용 예시
print_tree_structure(h5_path)


In [None]:
# ============================================================
# 1. HDF5 파일 구조 확인
# ============================================================
def explore_structure(h5_path):
    with h5py.File(h5_path, 'r') as f:
        def print_structure(name, obj):
            print(f"{name} - {'Group' if isinstance(obj, h5py.Group) else 'Dataset'}")
        f.visititems(print_structure)

explore_structure(h5_path)



In [None]:
# lead 목록 확인
import h5py


with h5py.File(h5_path, 'r') as f:
    segment_key = str(segment_index)
    signal_group = f["ECG"]["segments"][segment_key]["signal"]
    
    print("✅ signal_group의 하위 키들:")
    for key in signal_group.keys():
        print(" -", key)


In [None]:

# ============================================================
# 2. Signal 시각화 (leads)
# ============================================================


with h5py.File(h5_path, 'r') as f:
    segment_key = str(segment_index)
    signal_group = f["ECG"]["segments"][segment_key]["signal"]

    # 실제 존재하는 리드 이름 확인
    lead_names = list(signal_group.keys())  # ["II", "V1", "V5"]

    # 신호 추출
    signal = np.array([signal_group[lead][:] for lead in lead_names])

# 시각화
plt.figure(figsize=(12, 6))
for i, lead in enumerate(lead_names):
    plt.subplot(len(lead_names), 1, i + 1)
    plt.plot(signal[i], linewidth=0.8)
    plt.title(f"Lead {lead}")
    plt.tight_layout()
plt.suptitle("Available Leads - Segment 0", fontsize=15, y=1.02)

In [None]:
#Annotation 확인
def print_beat_annotation(h5_path, segment_index):
    with h5py.File(h5_path, 'r') as f:
        seg_key = str(segment_index)
        beat_ann = f["ECG"]["segments"][seg_key]["beat_annotation"]
        r_peaks = beat_ann["sample"][:]
        symbols = [s.decode() for s in beat_ann["symbol"][:]]

    print("🔴 beat_annotation 목록:")
    for i, (s, sym) in enumerate(zip(r_peaks, symbols)):
        print(f"  {i:2d}: sample = {s}, symbol = {sym}")
print_beat_annotation(h5_path, segment_index)


In [None]:
# beat_annotation 전체 로드 및 출력
import h5py

def print_beat_annotation_full(h5_path, segment_index, limit=10):
    with h5py.File(h5_path, 'r') as f:
        seg_key = str(segment_index)
        group = f["ECG"]["segments"][seg_key]["beat_annotation"]

        print(f"🔍 beat_annotation 항목 목록 (segment {segment_index}):")
        for key in group.keys():
            val = group[key][:]  # 전체 로드
            # 디코딩 필요 여부 확인
            if val.dtype.char == 'S':  # byte string
                val = [v.decode() for v in val]
            print(f" - {key}:", val[:limit], f"... (총 {len(val)}개)")

# 실행 예시
print_beat_annotation_full(h5_path, segment_index)


In [None]:
# annotation 시각화
import matplotlib.pyplot as plt

def plot_rpeaks(h5_path, segment_index, signal, lead_name="II"):
    with h5py.File(h5_path, 'r') as f:
        seg_key = str(segment_index)
        lead_idx = lead_names.index(lead_name)
        lead_signal = signal[lead_idx]

        beat_ann = f["ECG"]["segments"][seg_key]["beat_annotation"]
        r_peaks = beat_ann["sample"][:]
        symbols = [s.decode() for s in beat_ann["symbol"][:]]

        best_exists = "best_annotation" in f["ECG"]["segments"][seg_key]
        if best_exists:
            best_ann = f["ECG"]["segments"][seg_key]["best_annotation"]
            best_peaks = best_ann["sample"][:]
            best_symbols = [s.decode() for s in best_ann["symbol"][:]]

    # 시각화
    plt.figure(figsize=(15, 4))
    plt.plot(lead_signal, label=f"Lead {lead_name}", linewidth=0.8)
    plt.scatter(r_peaks, lead_signal[r_peaks], color='red', s=30, label="beat_annotation")
    for idx, sym in zip(r_peaks, symbols):
        plt.text(idx, lead_signal[idx] + 0.15, sym, color="blue", fontsize=8, ha='center')

    if best_exists:
        plt.scatter(best_peaks, lead_signal[best_peaks], color='green', marker='x', s=40, label="best_annotation")

    plt.title(f"R-peak Annotation on Lead {lead_name}")
    plt.legend()
    plt.xlabel("Samples")
    plt.ylabel("Amplitude")
    plt.tight_layout()
    plt.show()

# 실행
plot_rpeaks(h5_path, segment_index, signal, lead_name="II")


In [None]:
#4. Fiducial_point
def print_fiducial_summary(h5_path, segment_index):
    with h5py.File(h5_path, 'r') as f:
        seg_key = str(segment_index)
        labels = f["ECG"]["segments"][seg_key]["fiducial_point"]["fiducial"][:]
        labels = [lbl.decode() for lbl in labels]

        indices = f["ECG"]["segments"][seg_key]["fiducial_point"]["fsample"][:]

    print(f"📍 Fiducial Points (segment {segment_index}):")
    for label, idx in zip(labels, indices):
        print(f" - {label:<10}: {idx}")
print_fiducial_summary(h5_path, segment_index)


In [None]:
# fiducial points 시각화
import matplotlib.pyplot as plt
import numpy as np
import h5py

def plot_fiducial_points(h5_path, segment_index, signal, lead_name="II"):
    lead_names = ["II", "V1", "V5"]
    color_dict = {
        "ECG_P_Onsets": "#1f77b4",
        "ECG_P_Peaks": "#2ca02c",
        "ECG_P_Offsets": "#17becf",
        "ECG_Q_Peaks": "#8c564b",
        "ECG_R_Peaks": "#9467bd",
        "ECG_S_Peaks": "#e377c2",
        "ECG_T_Onsets": "#7f7f7f",
        "ECG_T_Peaks": "#ff7f0e",
        "ECG_T_Offsets": "#bcbd22",
    }

    with h5py.File(h5_path, 'r') as f:
        seg_key = str(segment_index)
        group = f["ECG"]["segments"][seg_key]["fiducial_point"]
        labels = [s.decode() for s in group["fiducial"][:]]
        fsample = group["fsample"][:]

    lead_idx = lead_names.index(lead_name)
    lead_signal = signal[lead_idx]

    plt.figure(figsize=(15, 4))
    plt.plot(lead_signal, label=f"Lead {lead_name}", linewidth=0.8, color="black")

    plotted_labels = set()
    for label, idx in zip(labels, fsample):
        if 0 <= idx < len(lead_signal):
            color = color_dict.get(label, "gray")
            if label not in plotted_labels:
                plt.scatter(idx, lead_signal[idx], label=label, color=color, s=30)
                plotted_labels.add(label)
            else:
                plt.scatter(idx, lead_signal[idx], color=color, s=30)

    plt.title(f"Fiducial Points on Lead {lead_name} (Segment {segment_index})")
    plt.xlabel("Samples")
    plt.ylabel("Amplitude")
    plt.legend(bbox_to_anchor=(1.05, 1), loc="upper left")
    plt.tight_layout()
    plt.show()

plot_fiducial_points(h5_path, segment_index, signal, lead_name="II")


In [None]:
#5. fiducial_feature
def print_fiducial_feature(h5_path, segment_index):
    with h5py.File(h5_path, 'r') as f:
        seg_key = str(segment_index)
        feature_grp = f["ECG"]["segments"][seg_key].get("fiducial_feature", None)

        if feature_grp is None:
            print(f"⚠️ fiducial_feature가 segment {segment_index}에 존재하지 않습니다.")
            return

        print(f"📐 fiducial_feature (segment {segment_index}):")
        for key, value in feature_grp.attrs.items():
            print(f" - {key:<10}: {value}")
print_fiducial_feature(h5_path, segment_index)


In [None]:
#6. signal_quality
def print_signal_quality(h5_path, segment_index):
    with h5py.File(h5_path, 'r') as f:
        q_group = f["ECG"]["segments"][str(segment_index)]["signal_quality"]
        
        print("📊 Signal Quality")
        print(" - nan_ratio:", q_group["nan_ratio"][:])

        amp = q_group["amplitude"]
        print(" - amp_mean:", amp["amp_mean"][:])
        print(" - amp_std:", amp["amp_std"][:])
        print(" - amp_skewness:", amp["amp_skewness"][:])
        print(" - amp_kurtosis:", amp["amp_kurtosis"][:])

        if "beat_similarity" in q_group:
            sim = q_group["beat_similarity"]
            print(" - bs_correlation:", sim["bs_correlation"][:])
            print(" - bs_dtw:", sim["bs_dtw"][:])
print_signal_quality(h5_path, segment_index)

In [None]:
# best similarity 있는 세그먼트 탐색
def find_segments_with_beat_similarity(h5_path):
    found = []
    with h5py.File(h5_path, 'r') as f:
        segments = f["ECG"]["segments"]
        for seg_key in segments:
            q_path = f["ECG"]["segments"][seg_key]["signal_quality"]
            if "beat_similarity" in q_path:
                sim = q_path["beat_similarity"]
                corr = sim["bs_correlation"][:]
                dtw = sim["bs_dtw"][:]
                if not np.all(np.isnan(corr)) or not np.all(np.isnan(dtw)):
                    found.append(seg_key)
    return found
segments_with_similarity = find_segments_with_beat_similarity(h5_path)
print("✅ beat similarity 값이 존재하는 세그먼트:")
print(segments_with_similarity)


In [None]:
# 임의 샘플 유사도 확인
def debug_some_similarity(h5_path, sample_count=5):
    with h5py.File(h5_path, 'r') as f:
        segments = f["ECG"]["segments"]
        print("🛠️ 유사도 확인:")
        for i, seg_key in enumerate(segments):
            if i >= sample_count:
                break
            sq = segments[seg_key]["signal_quality"]
            if "beat_similarity" in sq:
                sim = sq["beat_similarity"]
                corr = sim["bs_correlation"][:]
                dtw = sim["bs_dtw"][:]
                print(f" - Segment {seg_key} → corr: {corr}, dtw: {dtw}")
            else:
                print(f" - Segment {seg_key} → beat_similarity 없음")
debug_some_similarity(h5_path, sample_count=10)


In [None]:
# 7. report_annotation 확인
def print_report_annotation_summary(h5_path):
    with h5py.File(h5_path, 'r') as f:
        ra = f["ECG"]["annotation"]
        
        print("📄 Report Annotation Summary:")
        print(" - ann_len:", ra.attrs.get("ann_len", "N/A"))
        print(" - NoisePercentage:", ra.attrs.get("NoisePercentage", "N/A"))
        print(" - AFAFLPercentage:", ra.attrs.get("AFAFLPercentage", "N/A"))

        beat_count = ra["beat_count"]

        print("\n🫀 Ventricular Beats:")
        vb = beat_count["VentricularBeat"]
        for k, v in vb.attrs.items():
            print(f"   - {k}: {v}")
        if "Runs" in vb:
            print("   ➤ Runs:")
            for k, v in vb["Runs"].attrs.items():
                print(f"     • {k}: {v}")

        print("\n🫀 Supraventricular Beats:")
        svb = beat_count["SupraventricularBeat"]
        for k, v in svb.attrs.items():
            print(f"   - {k}: {v}")
        if "Runs" in svb:
            print("   ➤ Runs:")
            for k, v in svb["Runs"].attrs.items():
                print(f"     • {k}: {v}")
print_report_annotation_summary(h5_path)


In [None]:
# 8. metadata 확인
def print_metadata_summary(h5_path):
    with h5py.File(h5_path, 'r') as f:
        meta = f["ECG"]["metadata"]

        print("🧾 Metadata Summary:")
        for attr in meta.attrs:
            print(f" - {attr}: {meta.attrs[attr]}")

        print("\n📈 Dataset 내용:")
        for ds in meta:
            data = meta[ds][:]
            if data.dtype.char == 'S':  # 문자열 배열
                data = [s.decode() for s in data]
            print(f" - {ds}: {data}")
print_metadata_summary(h5_path)


In [None]:
# 9. Patient 확인
def print_patient_info(h5_path):
    with h5py.File(h5_path, 'r') as f:
        patient = f["patient"]
        print("🧑‍⚕️ Patient Info:")
        for attr in patient.attrs:
            print(f" - {attr}: {patient.attrs[attr]}")
print_patient_info(h5_path)

In [None]:
# 종합 요약
import h5py
import matplotlib.pyplot as plt
import numpy as np
import json

def plot_segment_summary(h5_path, segment_index, lead_names=["II", "V1", "V5"]):
    with h5py.File(h5_path, 'r') as f:
        seg_key = str(segment_index)
        segment = f["ECG"]["segments"][seg_key]

        # signal
        signal_data = {}
        for lead in lead_names:
            signal_data[lead] = segment["signal"][lead][()]

        # beat_annotation
        beat_ann = segment["beat_annotation"]
        r_peaks = beat_ann["sample"][:]
        r_syms = [s.decode() for s in beat_ann["symbol"][:]]

        # fiducial_point
        if "fiducial_point" in segment:
            fids = segment["fiducial_point"]
            fid_locs = fids["fsample"][:]
            fid_labels = [s.decode() for s in fids["fiducial"][:]]
        else:
            fid_locs, fid_labels = [], []

        # fiducial_feature
        feats = {}
        if "fiducial_feature" in segment:
            try:
                feat_grp = segment["fiducial_feature"]
                if isinstance(feat_grp, h5py.Group):
                    feats = dict(feat_grp.attrs.items())
            except Exception as e:
                print(f"⚠️ fiducial_feature 오류: {e}")
        

        # signal_quality
        quality = segment["signal_quality"]
        nan_ratio = quality["nan_ratio"][:]
        amp = quality["amplitude"]
        amp_stats = {
            "mean": amp["amp_mean"][:],
            "std": amp["amp_std"][:],
            "skew": amp["amp_skewness"][:],
            "kurt": amp["amp_kurtosis"][:],
        }

        # beat similarity
        if "beat_similarity" in quality:
            bs = quality["beat_similarity"]
            corr = bs["bs_correlation"][:]
            dtw = bs["bs_dtw"][:]
        else:
            corr = dtw = [np.nan] * len(lead_names)

    # 1. Signal Plot
    plt.figure(figsize=(15, len(lead_names) * 2.5))
    for i, lead in enumerate(lead_names):
        plt.subplot(len(lead_names), 1, i + 1)
        sig = signal_data[lead]
        plt.plot(sig, label=f"Lead {lead}", linewidth=0.8)

        # R-peaks
        r_idx = r_peaks
        plt.scatter(r_idx, sig[r_idx], color='red', s=30, label="R-peaks")
        for idx, sym in zip(r_idx, r_syms):
            plt.text(idx, sig[idx] + 0.1, sym, fontsize=7, color="blue", ha="center")

        # fiducials
        if len(fid_locs) > 0:
            for fidx, flabel in zip(fid_locs, fid_labels):
                plt.axvline(x=fidx, color='green', linestyle='--', linewidth=0.8)
                plt.text(fidx, sig[int(fidx)], flabel, fontsize=6, color="green")

        plt.title(f"Signal: {lead}")
        plt.legend()
        plt.tight_layout()

    plt.suptitle(f"ECG Segment {segment_index}: Signal + Annotations", fontsize=16, y=1.02)
    plt.show()

    # 2. Feature Summary
    print(f"\n📍 Fiducial Features (segment {segment_index})")
    for k, v in feats.items():
        print(f" - {k}: {v:.3f}" if isinstance(v, float) else f" - {k}: {v}")

    print(f"\n📊 Signal Quality")
    for i, lead in enumerate(lead_names):
        print(f" - {lead}: nan_ratio={nan_ratio[i]:.3f}, mean={amp_stats['mean'][i]:.3f}, std={amp_stats['std'][i]:.3f}, "
              f"skew={amp_stats['skew'][i]:.3f}, kurt={amp_stats['kurt'][i]:.3f}, corr={corr[i]}, dtw={dtw[i]}")
plot_segment_summary(h5_path, segment_index, lead_names=["II", "V1", "V5"])
