In [1]:
import pandas as pd
import numpy as np
import vitaldb
import os
#import matplotlib.pyplot as plt

df_trks = pd.read_csv("https://api.vitaldb.net/trks")

# find cases which have both ART_MBP and SNUADC/ART
caseids = list(
    set(df_trks[df_trks['tname'] == 'Solar8000/ART_SBP']['caseid']) & 
    set(df_trks[df_trks['tname'] == 'Solar8000/ART_DBP']['caseid']) & 
    set(df_trks[df_trks['tname'] == 'Solar8000/ART_MBP']['caseid']) & 
    set(df_trks[df_trks['tname'] == 'SNUADC/ART']['caseid'])
)

all_sbps = []
all_dbps = []
all_mbps = []
all_art_sbps = []
all_art_dbps = []
all_art_mbps = []

output = 'caseid\tlen\tsbp\tsbp%\tdbp\tdbp%\tmbp\tmbp%\n'
print(output, end='')

fo = open('qa.tsv', 'w')
fo.write(output)

for caseid in caseids:
    if caseid < 4640:
        continue
        
    sbp_tid = df_trks[(df_trks['caseid'] == caseid) & (df_trks['tname'] == 'Solar8000/ART_SBP')]['tid'].values[0]
    dbp_tid = df_trks[(df_trks['caseid'] == caseid) & (df_trks['tname'] == 'Solar8000/ART_DBP')]['tid'].values[0]
    mbp_tid = df_trks[(df_trks['caseid'] == caseid) & (df_trks['tname'] == 'Solar8000/ART_MBP')]['tid'].values[0]
    art_tid = df_trks[(df_trks['caseid'] == caseid) & (df_trks['tname'] == 'SNUADC/ART')]['tid'].values[0]

    SRATE = 100
    SEGLEN = 30
    vals = vitaldb.load_trks([sbp_tid, dbp_tid, mbp_tid, art_tid], 1 / SRATE)

    # extract segements
    case_sbps = []
    case_dbps = []
    case_mbps = []
    case_art_sbps = []
    case_art_dbps = []
    case_art_mbps = []

    raw_sbps_list = []
    raw_dbps_list = []
    raw_mbps_list = []
    raw_arts_list = []
    for i in range(0, len(vals) - SEGLEN * SRATE, SEGLEN * SRATE):
        raw_sbps = vals[i:i + SRATE * SEGLEN, 0]
        raw_dbps = vals[i:i + SRATE * SEGLEN, 1]
        raw_mbps = vals[i:i + SRATE * SEGLEN, 2]
        raw_arts = vals[i:i + SRATE * SEGLEN, 3]

        sbps = raw_sbps[~np.isnan(raw_sbps)]
        dbps = raw_dbps[~np.isnan(raw_dbps)]
        mbps = raw_mbps[~np.isnan(raw_mbps)]
        arts = raw_arts[~np.isnan(raw_arts)]

        # exclude artifacts
        if len(sbps) < SEGLEN * 0.5 * 0.9:
            #print('sbp nan > 10%')
            continue
        elif len(dbps) < SEGLEN * 0.5 * 0.9:
            #print('dbp nan > 10%')
            continue
        elif len(mbps) < SEGLEN * 0.5 * 0.9:
            #print('mbp nan > 10%')
            continue
        elif len(arts) < SEGLEN * SRATE * 0.9:
            #print('art nan > 10%')
            continue
        elif np.max(mbps) > 200 or np.min(mbps) < 30:
            #print('mbp max > 200 or mbp min < 30')
            continue
        elif np.max(arts) > 200 or np.min(arts) < 30:
            #print('art max > 200 or art min < 30')
            continue
        elif (np.abs(np.diff(mbps)) > 10).any():
            #print('abs diff > 10')
            continue

        raw_sbps_list.append(raw_sbps)
        raw_dbps_list.append(raw_dbps)
        raw_mbps_list.append(raw_mbps)
        raw_arts_list.append(raw_arts)

        case_sbps.append(np.max(sbps))
        case_dbps.append(np.min(dbps))
        case_mbps.append(np.mean(mbps))
        case_art_sbps.append(np.max(arts))
        case_art_dbps.append(np.min(arts))
        case_art_mbps.append(np.mean(arts))

    if len(case_sbps) < 10:
        continue
    elif len(case_dbps) < 10:
        continue
    elif len(case_mbps) < 10:
        continue
    
    all_sbps.extend(case_sbps)
    all_dbps.extend(case_dbps)
    all_mbps.extend(case_mbps)
    all_art_sbps.extend(case_art_sbps)
    all_art_dbps.extend(case_art_dbps)
    all_art_mbps.extend(case_art_mbps)

    err_sbp = np.mean(np.abs(np.array(case_sbps) - np.array(case_art_sbps)))
    err_dbp = np.mean(np.abs(np.array(case_dbps) - np.array(case_art_dbps)))
    err_mbp = np.mean(np.abs(np.array(case_mbps) - np.array(case_art_mbps)))
    err_sbp_perc = 100 * err_sbp / np.mean(case_sbps)
    err_dbp_perc = 100 * err_dbp / np.mean(case_dbps)
    err_mbp_perc = 100 * err_mbp / np.mean(case_mbps)

    # if err_sbp_perc > 10 or err_dbp_perc > 10 or err_mbp_perc > 10:
    #     imgdir = 'imgs/' + str(caseid)
    #     if not os.path.exists(imgdir):
    #         os.makedirs(imgdir)

    #     for i in range(len(raw_sbps_list)):
    #         plt.figure(figsize=(20,5))
    #         plt.plot(raw_sbps_list[i], 'go')
    #         plt.plot(raw_dbps_list[i], 'go')
    #         plt.plot(raw_mbps_list[i], 'go')
    #         plt.plot(raw_arts_list[i], 'r-')
    #         plt.savefig(imgdir + "/" + str(i))
    #         plt.close()

    output = '{}\t{}\t{:.1f}\t{:.1f}%\t{:.1f}\t{:.1f}%\t{:.1f}\t{:.1f}%\n'.format(
        caseid, len(case_sbps), err_sbp, err_sbp_perc, err_dbp, err_dbp_perc, err_mbp, err_mbp_perc)
    fo.write(output)
    print(output, end='')

fo.close()

caseid	len	sbp	sbp%	dbp	dbp%	mbp	mbp%
4640	211	8.7	7.6%	5.0	7.9%	5.4	6.2%
4644	934	2.9	2.8%	1.3	2.2%	1.3	1.6%
4646	221	2.5	1.9%	3.6	7.8%	3.1	4.1%
4647	275	4.1	4.3%	0.8	2.1%	1.4	2.4%
4648	114	4.3	4.2%	6.1	14.6%	5.3	8.9%
4649	1805	7.4	7.6%	3.7	6.9%	4.9	6.9%
4650	385	4.7	4.1%	6.0	11.2%	5.4	7.6%
4652	589	7.2	7.1%	3.8	7.0%	4.9	6.9%
4653	403	3.2	2.9%	6.0	11.9%	4.4	6.2%
4654	435	2.9	2.4%	7.7	11.8%	3.9	4.5%
4655	167	4.4	3.9%	8.8	16.4%	5.4	7.0%
4656	327	2.6	2.3%	5.4	10.5%	4.1	5.4%
4657	316	3.6	2.9%	1.1	2.0%	1.6	2.3%
4658	314	4.8	3.7%	1.7	2.9%	2.0	2.3%
4660	463	4.2	3.6%	1.9	3.2%	2.5	3.1%
4662	310	5.4	4.4%	8.4	13.5%	6.7	8.1%
4665	562	1.8	1.5%	2.7	4.4%	0.8	1.0%
4666	734	3.4	2.5%	0.8	1.2%	1.5	1.6%
4670	897	4.8	4.2%	8.7	14.3%	6.2	7.6%
4673	181	7.5	6.1%	3.9	5.9%	4.7	5.4%
4678	274	7.7	5.9%	4.5	6.4%	5.3	5.7%
4683	257	2.2	2.0%	5.1	9.9%	2.8	4.1%
4684	706	7.1	6.2%	3.7	6.7%	5.0	6.9%
4686	265	7.4	5.6%	3.2	5.3%	4.4	5.3%
4687	228	2.9	2.4%	6.6	9.9%	4.5	5.1%
4690	86	3.7	2.7%	6.6	8.8%	3.8	3.8%
4694	502	3.4	2.4%	