In [5]:
import pandas as pd
import json
import os
from tqdm import tqdm
import urllib.request

DRUG_CHANNELS = {
    'IVPump/Propofol': 'Propofol',
    'IVPump/Remifentanil': 'Remifentanil',
    'IVPump/Norepinephrine': 'Norepinephrine',
    'IVPump/Phenylephrine': 'Phenylephrine',
    'IVPump/Midazolam': 'Midazolam',
    'IVPump/Fentanyl': 'Fentanyl'
}

CASE_IDS = [25, 58, 68, 92, 96, 116, 135, 137, 142, 161, 203, 229, 239,
            243, 256, 266, 279, 323, 375, 384, 413, 416, 458, 460, 478]

TRKS_CSV_URL = 'https://api.vitaldb.net/trks'
TRACK_DATA_URL = 'https://api.vitaldb.net/{}'


def interpolate_per_second(df):
    df['sec'] = df['time'].astype(int)
    df = df.drop_duplicates(subset='sec', keep='last')
    return df[['sec', 'value']].rename(columns={'sec': 'time'}).to_dict(orient='records')


def read_csv_with_headers(url):
    import gzip
    import io
    req = urllib.request.Request(url, headers={
        'User-Agent': 'Mozilla/5.0 (compatible; VitalScraper/1.0)',
        'Accept-Encoding': 'gzip'
    })
    with urllib.request.urlopen(req) as response:
        with gzip.GzipFile(fileobj=io.BytesIO(response.read())) as gz:
            return pd.read_csv(gz, names=['time', 'value'], header=None)


tracks_df = pd.read_csv(TRKS_CSV_URL)
filtered_tracks = tracks_df[
    (tracks_df['caseid'].isin(CASE_IDS)) &
    (tracks_df['tname'].isin(DRUG_CHANNELS.keys()))
]

result = {}
for case_id in tqdm(CASE_IDS, desc="Cases"):
    case_tracks = filtered_tracks[filtered_tracks['caseid'] == case_id]
    if case_tracks.empty:
        print(f"[Case {case_id}] No matching drug tracks. Skipping.")
        continue

    case_data = {}
    for _, row in case_tracks.iterrows():
        label = DRUG_CHANNELS[row['tname']]
        tid = row['tid']

        df = read_csv_with_headers(TRACK_DATA_URL.format(tid))
        df.dropna(inplace=True)
        df['time'] = pd.to_numeric(df['time'], errors='coerce')
        df['value'] = pd.to_numeric(df['value'], errors='coerce')
        df.dropna(inplace=True)
        values = interpolate_per_second(df)
        case_data[label] = values

    result[str(case_id)] = case_data

with open('drug_data.json', 'w') as f:
    json.dump(result, f, indent=2)



Cases: 100%|██████████| 25/25 [00:00<00:00, 12212.63it/s]

[Case 25] No matching drug tracks. Skipping.
[Case 58] No matching drug tracks. Skipping.
[Case 68] No matching drug tracks. Skipping.
[Case 92] No matching drug tracks. Skipping.
[Case 96] No matching drug tracks. Skipping.
[Case 116] No matching drug tracks. Skipping.
[Case 135] No matching drug tracks. Skipping.
[Case 137] No matching drug tracks. Skipping.
[Case 142] No matching drug tracks. Skipping.
[Case 161] No matching drug tracks. Skipping.
[Case 203] No matching drug tracks. Skipping.
[Case 229] No matching drug tracks. Skipping.
[Case 239] No matching drug tracks. Skipping.
[Case 243] No matching drug tracks. Skipping.
[Case 256] No matching drug tracks. Skipping.
[Case 266] No matching drug tracks. Skipping.
[Case 279] No matching drug tracks. Skipping.
[Case 323] No matching drug tracks. Skipping.
[Case 375] No matching drug tracks. Skipping.
[Case 384] No matching drug tracks. Skipping.
[Case 413] No matching drug tracks. Skipping.
[Case 416] No matching drug tracks. Ski




In [8]:
import pandas as pd
import json
import os
from tqdm import tqdm
import urllib.request
import io
import gzip

# Tracks that reflect intervention-like effects
PROXY_CHANNELS = {
    'Primus/MAC': 'MAC',
    'Primus/MV': 'Minute Ventilation',
    'Primus/ETCO2': 'ETCO2',
    'Primus/PEEP_MBAR': 'PEEP',
    'Primus/TV': 'Tidal Volume',
    'Primus/SET_FIO2': 'FiO2 Setting',
    'Primus/SET_INSP_TM': 'Insp Time Setting'
}

CASE_IDS = [25, 58, 68, 92, 96, 116, 135, 137, 142, 161, 203, 229, 239,
            243, 256, 266, 279, 323, 375, 384, 413, 416, 458, 460, 478]

TRKS_CSV_URL = 'https://api.vitaldb.net/trks'
TRACK_DATA_URL = 'https://api.vitaldb.net/{}'

def interpolate_per_second(df):
    df['sec'] = df['time'].astype(int)
    df = df.drop_duplicates(subset='sec', keep='last')
    return df[['sec', 'value']].rename(columns={'sec': 'time'}).to_dict(orient='records')

def read_csv_with_headers(url):
    req = urllib.request.Request(url, headers={
        'User-Agent': 'Mozilla/5.0 (compatible; VitalScraper/1.0)',
        'Accept-Encoding': 'gzip'
    })
    with urllib.request.urlopen(req) as response:
        with gzip.GzipFile(fileobj=io.BytesIO(response.read())) as gz:
            return pd.read_csv(gz, names=['time', 'value'], header=None)

# Load all tracks
tracks_df = pd.read_csv(TRKS_CSV_URL)
filtered_tracks = tracks_df[
    (tracks_df['caseid'].isin(CASE_IDS)) &
    (tracks_df['tname'].isin(PROXY_CHANNELS.keys()))
]

# Extract data
result = {}
for case_id in tqdm(CASE_IDS, desc="Extracting Proxy Drug Data"):
    case_tracks = filtered_tracks[filtered_tracks['caseid'] == case_id]
    if case_tracks.empty:
        continue

    case_data = {}
    for _, row in case_tracks.iterrows():
        label = PROXY_CHANNELS[row['tname']]
        tid = row['tid']

        try:
            df = read_csv_with_headers(TRACK_DATA_URL.format(tid))
            df.dropna(inplace=True)
            df['time'] = pd.to_numeric(df['time'], errors='coerce')
            df['value'] = pd.to_numeric(df['value'], errors='coerce')
            df.dropna(inplace=True)
            values = interpolate_per_second(df)
            case_data[label] = values
        except Exception as e:
            print(f"[Case {case_id} - {label}] Error: {e}")

    if case_data:
        result[str(case_id)] = case_data

# Save as proxy_drug_data.json
with open('proxy_drug_data.json', 'w') as f:
    json.dump(result, f, indent=2)



Extracting Proxy Drug Data: 100%|██████████| 25/25 [01:51<00:00,  4.46s/it]
