In [5]:
import pandas as pd
import numpy as np
import requests
from tqdm import tqdm

# --- Constants ---
CASE_URL = "https://api.vitaldb.net/cases"
TRACK_URL = "https://api.vitaldb.net/trks"
DATA_URL = "https://api.vitaldb.net/{}"

PARAMETERS = {
    'Solar8000/HR': 'Heart Rate',
    'Solar8000/ART_MBP': 'Arterial BP',
    'Solar8000/PLETH_SPO2': 'Oxygen Saturation',
    'Solar8000/ETCO2': 'End-Tidal CO2',
    'Solar8000/RR': 'Respiratory Rate',
    'Solar8000/PLETH_HR': 'Pleth HR',
    'Solar8000/ART_SBP': 'Systolic BP',
    'Solar8000/ART_DBP': 'Diastolic BP',
    'Solar8000/NIBP_MBP': 'NIBP Mean BP',
    'Solar8000/CVP': 'Central Venous Pressure',
    'Vigileo/CO': 'Cardiac Output',
    'Vigileo/SV': 'Stroke Volume',
    'Vigilance/HR_AVG': 'Average HR',
    'CardioQ/HR': 'CardioQ HR'
}

# --- Load case and track metadata ---
cases_df = pd.read_csv(CASE_URL)
all_caseids = cases_df['caseid'].unique().tolist()[:500]  # Limit to 500 cases


tracks_df = pd.read_csv(TRACK_URL)
filtered_tracks = tracks_df[
    (tracks_df['caseid'].isin(all_caseids)) &
    (tracks_df['tname'].isin(PARAMETERS.keys()))
]

grouped = filtered_tracks.groupby('caseid')

results = []

for caseid, group in tqdm(grouped, desc="Analyzing Correlations"):
    dfs = {}
    for _, row in group.iterrows():
        tname = row['tname']
        label = PARAMETERS[tname]
        try:
            df = pd.read_csv(DATA_URL.format(row['tid']), header=None, names=['time', 'value'])
            df = df.dropna()
            df['time'] = pd.to_numeric(df['time'], errors='coerce')
            df['value'] = pd.to_numeric(df['value'], errors='coerce')
            df = df.dropna()
            df['bin'] = (df['time'] // 900).astype(int)
            df = df.groupby('bin')['value'].mean().reset_index()
            dfs[label] = df.set_index('bin')['value']
        except:
            continue

    if 'Heart Rate' not in dfs or len(dfs) < 2:
        continue

    merged_df = pd.DataFrame(dfs).dropna()
    if merged_df.shape[0] < 2:
        continue

    corrs = merged_df.corr()['Heart Rate'].drop('Heart Rate', errors='ignore')
    if corrs.empty:
        continue

    strongest = corrs.abs().max()
    results.append((caseid, strongest))

# --- Return top 20 strongest correlations ---
top20 = sorted(results, key=lambda x: x[1], reverse=True)[:50]
top20_caseids = [caseid for caseid, _ in top20]
print("Top 20 correlated case IDs:", top20_caseids)


Analyzing Correlations: 100%|██████████| 500/500 [04:13<00:00,  1.97it/s]

Top 20 correlated case IDs: [59, 353, 163, 421, 279, 58, 183, 145, 221, 419, 425, 460, 92, 242, 384, 405, 413, 427, 96, 380, 60, 297, 375, 416, 435, 68, 215, 385, 458, 229, 150, 116, 139, 256, 239, 266, 203, 25, 161, 175, 135, 142, 323, 291, 137, 314, 243, 478, 2, 477]





In [None]:
# vital_trend_plot.py
import requests
import pandas as pd
import matplotlib.pyplot as plt
from io import BytesIO
from tqdm import tqdm
import os
import numpy as np

# --- Constants ---
CASE_URL = "https://api.vitaldb.net/cases"
TRACK_URL = "https://api.vitaldb.net/trks"
DATA_URL = "https://api.vitaldb.net/{}"
OUTPUT_DIR = "case_15min_avg_trends"

# --- Parameter Selection ---
PARAMETERS = {
    'Solar8000/HR': 'Heart Rate',
    'Solar8000/ART_MBP': 'Arterial BP',
    'Solar8000/PLETH_SPO2': 'Oxygen Saturation',
    'Solar8000/ETCO2': 'End-Tidal CO2',
    'Solar8000/RR': 'Respiratory Rate',
    'Solar8000/PLETH_HR': 'Pleth HR',
    'Solar8000/ART_SBP': 'Systolic BP',
    'Solar8000/ART_DBP': 'Diastolic BP',
    'Solar8000/NIBP_MBP': 'NIBP Mean BP',
    'Solar8000/CVP': 'Central Venous Pressure',
    'Vigileo/CO': 'Cardiac Output',
    'Vigileo/SV': 'Stroke Volume',
    'Vigilance/HR_AVG': 'Average HR',
    'CardioQ/HR': 'CardioQ HR'
}

# --- Color Mapping ---
COLOR_MAP = {
    'Heart Rate': 'red',
    'Arterial BP': 'green',
    'Oxygen Saturation': 'blue',
    'End-Tidal CO2': 'purple',
    'Respiratory Rate': 'orange',
    'Pleth HR': 'brown',
    'Systolic BP': 'darkgreen',
    'Diastolic BP': 'lightgreen',
    'NIBP Mean BP': 'cyan',
    'Central Venous Pressure': 'gray',
    'Cardiac Output': 'magenta',
    'Stroke Volume': 'teal',
    'Average HR': 'pink',
    'CardioQ HR': 'gold'
}

# --- Create Output Directory ---
os.makedirs(OUTPUT_DIR, exist_ok=True)

# --- Top 20 Case IDs by Correlation ---
target_caseids = [25, 58, 68, 92, 96, 116, 135, 137, 142, 161, 203, 229, 239, 243, 256, 266, 279, 323, 375, 384, 413, 416, 458, 460, 478]

# --- Fetch Case and Track Data ---
cases_df = pd.read_csv(CASE_URL)
tracks_df = pd.read_csv(TRACK_URL)
filtered_tracks = tracks_df[
    (tracks_df['caseid'].isin(target_caseids)) &
    (tracks_df['tname'].isin(PARAMETERS.keys()))
]

# --- Group by caseid ---
case_grouped_tracks = filtered_tracks.groupby('caseid')

# --- Generate Trend Plots ---
for caseid, case_tracks in tqdm(case_grouped_tracks, desc="Rendering 15-Min Trend Plots"):
    fig, ax = plt.subplots(figsize=(12, 5))
    fig.suptitle(f"Case {caseid}: 15-Minute Averaged Vital Trends")

    for _, row in case_tracks.iterrows():
        tid = row['tid']
        label = PARAMETERS[row['tname']]
        try:
            df = pd.read_csv(DATA_URL.format(tid), header=None, names=['time', 'value'])
            df['time'] = pd.to_numeric(df['time'], errors='coerce')
            df['value'] = pd.to_numeric(df['value'], errors='coerce')
            df = df.dropna()
            df['bin'] = (df['time'] // 900).astype(int)
            df_avg = df.groupby('bin')['value'].mean().reset_index()
            df_avg['time'] = df_avg['bin'] * 900

            ax.plot(df_avg['time'], df_avg['value'], label=label, color=COLOR_MAP.get(label, 'black'))
        except Exception as e:
            print(f"Error loading TID {tid} for case {caseid}: {e}")

    ax.set_xlabel("Time (s)")
    ax.set_ylabel("Value")
    ax.set_title("Vitals (Averaged Every 15 Minutes)")
    ax.legend(loc='upper right')
    fig.tight_layout(rect=[0, 0.03, 1, 0.95])
    fig.savefig(os.path.join(OUTPUT_DIR, f"case_{caseid}_trends_top_corr.png"))
    plt.close(fig)


Rendering 15-Min Trend Plots: 100%|██████████| 50/50 [00:22<00:00,  2.21it/s]
