In [None]:
import pandas as pd
import json
import os
from tqdm import tqdm
import urllib.request

PARAMETERS = {
    'Solar8000/HR': 'Heart Rate',
    'Solar8000/ART_MBP': 'Arterial BP',
    'Solar8000/PLETH_SPO2': 'Oxygen Saturation',
    'Solar8000/ETCO2': 'End-Tidal CO2',
    'Solar8000/RR': 'Respiratory Rate',
    'Solar8000/PLETH_HR': 'Pleth HR',
    'Solar8000/ART_SBP': 'Systolic BP',
    'Solar8000/ART_DBP': 'Diastolic BP',
    'Solar8000/NIBP_MBP': 'NIBP Mean BP',
    'Solar8000/CVP': 'Central Venous Pressure',
    'Vigileo/CO': 'Cardiac Output',
    'Vigileo/SV': 'Stroke Volume',
    'Vigilance/HR_AVG': 'Average HR',
    'CardioQ/HR': 'CardioQ HR'
}

CASE_IDS = [25, 58, 68, 92, 96, 116, 135, 137, 142, 161, 203, 229, 239,
            243, 256, 266, 279, 323, 375, 384, 413, 416, 458, 460, 478]

TRKS_CSV_URL = 'https://api.vitaldb.net/trks'
TRACK_DATA_URL = 'https://api.vitaldb.net/{}'


def interpolate_per_second(df):
    df['sec'] = df['time'].astype(int)
    df = df.drop_duplicates(subset='sec', keep='last')
    return df[['sec', 'value']].rename(columns={'sec': 'time'}).to_dict(orient='records')


def read_csv_with_headers(url):
    import gzip
    import io
    req = urllib.request.Request(url, headers={
        'User-Agent': 'Mozilla/5.0 (compatible; VitalScraper/1.0)',
        'Accept-Encoding': 'gzip'
    })
    with urllib.request.urlopen(req) as response:
        with gzip.GzipFile(fileobj=io.BytesIO(response.read())) as gz:
            return pd.read_csv(gz, names=['time', 'value'], header=None)

tracks_df = pd.read_csv(TRKS_CSV_URL)
filtered_tracks = tracks_df[
    (tracks_df['caseid'].isin(CASE_IDS)) &
    (tracks_df['tname'].isin(PARAMETERS.keys()))
]

result = {}
for case_id in tqdm(CASE_IDS, desc="Cases"):
    case_tracks = filtered_tracks[filtered_tracks['caseid'] == case_id]
    if case_tracks.empty:
        print(f"[Case {case_id}] No matching parameters. Skipping.")
        continue

    case_data = {}
    for _, row in case_tracks.iterrows():
        label = PARAMETERS[row['tname']]
        tid = row['tid']
    
        df = read_csv_with_headers(TRACK_DATA_URL.format(tid))
        df.dropna(inplace=True)
        df['time'] = pd.to_numeric(df['time'], errors='coerce')
        df['value'] = pd.to_numeric(df['value'], errors='coerce')
        df.dropna(inplace=True)
        values = interpolate_per_second(df)
        case_data[label] = values

    result[str(case_id)] = case_data

with open('vital_data.json', 'w') as f:
    json.dump(result, f, indent=2)









📥 Downloading track metadata...


Cases: 100%|██████████| 25/25 [01:34<00:00,  3.79s/it]


✅ vital_data.json saved.
