In [1]:
import pandas as pd
import numpy as np
import vitaldb
import os
#import matplotlib.pyplot as plt

df_trks = pd.read_csv("https://api.vitaldb.net/trks")

# find cases which have both ART_MBP and SNUADC/ART
caseids = list(
    set(df_trks[df_trks['tname'] == 'Solar8000/ART_SBP']['caseid']) & 
    set(df_trks[df_trks['tname'] == 'Solar8000/ART_DBP']['caseid']) & 
    set(df_trks[df_trks['tname'] == 'Solar8000/ART_MBP']['caseid']) & 
    set(df_trks[df_trks['tname'] == 'SNUADC/ART']['caseid'])
)

all_sbps = []
all_dbps = []
all_mbps = []
all_art_sbps = []
all_art_dbps = []
all_art_mbps = []

output = 'caseid\tlen\tsbp\tsbp%\tdbp\tdbp%\tmbp\tmbp%\n'
print(output, end='')

fo = open('qa.tsv', 'w')
fo.write(output)

for caseid in caseids:
    if caseid < 4640:
        continue
        
    sbp_tid = df_trks[(df_trks['caseid'] == caseid) & (df_trks['tname'] == 'Solar8000/ART_SBP')]['tid'].values[0]
    dbp_tid = df_trks[(df_trks['caseid'] == caseid) & (df_trks['tname'] == 'Solar8000/ART_DBP')]['tid'].values[0]
    mbp_tid = df_trks[(df_trks['caseid'] == caseid) & (df_trks['tname'] == 'Solar8000/ART_MBP')]['tid'].values[0]
    art_tid = df_trks[(df_trks['caseid'] == caseid) & (df_trks['tname'] == 'SNUADC/ART')]['tid'].values[0]

    SRATE = 100
    SEGLEN = 30
    vals = vitaldb.load_trks([sbp_tid, dbp_tid, mbp_tid, art_tid], 1 / SRATE)

    # extract segements
    case_sbps = []
    case_dbps = []
    case_mbps = []
    case_art_sbps = []
    case_art_dbps = []
    case_art_mbps = []

    raw_sbps_list = []
    raw_dbps_list = []
    raw_mbps_list = []
    raw_arts_list = []
    for i in range(0, len(vals) - SEGLEN * SRATE, SEGLEN * SRATE):
        raw_sbps = vals[i:i + SRATE * SEGLEN, 0]
        raw_dbps = vals[i:i + SRATE * SEGLEN, 1]
        raw_mbps = vals[i:i + SRATE * SEGLEN, 2]
        raw_arts = vals[i:i + SRATE * SEGLEN, 3]

        sbps = raw_sbps[~np.isnan(raw_sbps)]
        dbps = raw_dbps[~np.isnan(raw_dbps)]
        mbps = raw_mbps[~np.isnan(raw_mbps)]
        arts = raw_arts[~np.isnan(raw_arts)]

        # exclude artifacts
        if len(sbps) < SEGLEN * 0.5 * 0.9:
            #print('sbp nan > 10%')
            continue
        elif len(dbps) < SEGLEN * 0.5 * 0.9:
            #print('dbp nan > 10%')
            continue
        elif len(mbps) < SEGLEN * 0.5 * 0.9:
            #print('mbp nan > 10%')
            continue
        elif len(arts) < SEGLEN * SRATE * 0.9:
            #print('art nan > 10%')
            continue
        elif np.max(mbps) > 200 or np.min(mbps) < 30:
            #print('mbp max > 200 or mbp min < 30')
            continue
        elif np.max(arts) > 200 or np.min(arts) < 30:
            #print('art max > 200 or art min < 30')
            continue
        elif (np.abs(np.diff(mbps)) > 10).any():
            #print('abs diff > 10')
            continue

        raw_sbps_list.append(raw_sbps)
        raw_dbps_list.append(raw_dbps)
        raw_mbps_list.append(raw_mbps)
        raw_arts_list.append(raw_arts)

        case_sbps.append(np.max(sbps))
        case_dbps.append(np.min(dbps))
        case_mbps.append(np.mean(mbps))
        case_art_sbps.append(np.max(arts))
        case_art_dbps.append(np.min(arts))
        case_art_mbps.append(np.mean(arts))

    if len(case_sbps) < 10:
        continue
    elif len(case_dbps) < 10:
        continue
    elif len(case_mbps) < 10:
        continue
    
    all_sbps.extend(case_sbps)
    all_dbps.extend(case_dbps)
    all_mbps.extend(case_mbps)
    all_art_sbps.extend(case_art_sbps)
    all_art_dbps.extend(case_art_dbps)
    all_art_mbps.extend(case_art_mbps)

    err_sbp = np.mean(np.abs(np.array(case_sbps) - np.array(case_art_sbps)))
    err_dbp = np.mean(np.abs(np.array(case_dbps) - np.array(case_art_dbps)))
    err_mbp = np.mean(np.abs(np.array(case_mbps) - np.array(case_art_mbps)))
    err_sbp_perc = 100 * err_sbp / np.mean(case_sbps)
    err_dbp_perc = 100 * err_dbp / np.mean(case_dbps)
    err_mbp_perc = 100 * err_mbp / np.mean(case_mbps)

    # if err_sbp_perc > 10 or err_dbp_perc > 10 or err_mbp_perc > 10:
    #     imgdir = 'imgs/' + str(caseid)
    #     if not os.path.exists(imgdir):
    #         os.makedirs(imgdir)

    #     for i in range(len(raw_sbps_list)):
    #         plt.figure(figsize=(20,5))
    #         plt.plot(raw_sbps_list[i], 'go')
    #         plt.plot(raw_dbps_list[i], 'go')
    #         plt.plot(raw_mbps_list[i], 'go')
    #         plt.plot(raw_arts_list[i], 'r-')
    #         plt.savefig(imgdir + "/" + str(i))
    #         plt.close()

    output = '{}\t{}\t{:.1f}\t{:.1f}%\t{:.1f}\t{:.1f}%\t{:.1f}\t{:.1f}%\n'.format(
        caseid, len(case_sbps), err_sbp, err_sbp_perc, err_dbp, err_dbp_perc, err_mbp, err_mbp_perc)
    fo.write(output)
    print(output, end='')

fo.close()

caseid	len	sbp	sbp%	dbp	dbp%	mbp	mbp%
1	298	3.7	2.90%	1	1.90%	1	1.20%
4	623	3.5	3.20%	1.1	2.10%	1.9	2.60%
7	411	5	4.00%	7.4	11.60%	6.3	7.20%
10	624	4.8	3.70%	1.1	1.90%	1.8	2.30%
12	610	1	1.00%	3.4	7.00%	1.7	2.60%
13	252	6.1	5.50%	9.2	15.70%	7	8.80%
16	373	8.7	6.80%	3.1	5.40%	4.9	6.10%
17	619	3.6	3.20%	8.8	17.20%	5.1	6.90%
19	842	5.6	4.90%	2.5	5.40%	3.5	5.20%
20	794	7.9	7.40%	4.4	8.60%	5.8	8.00%
22	420	1.4	1.10%	3.8	5.60%	2	2.20%
24	168	3.2	2.50%	1.1	1.50%	1.7	1.90%
25	431	6.1	5.20%	2.8	4.50%	4.2	5.00%
26	231	6	5.30%	3.3	5.90%	3.7	4.70%
27	532	3.5	3.10%	1	1.90%	1.6	2.00%
28	798	7.6	6.60%	3.9	6.50%	5.1	6.30%
29	619	7.6	6.40%	3.5	5.70%	4.8	5.70%
31	279	8	7.50%	4.5	7.90%	5.7	7.50%
34	680	3.6	2.80%	1.3	2.00%	1.7	1.90%
38	332	4.9	4.40%	2.8	5.30%	3.2	4.60%
43	432	3.2	2.70%	4.7	9.00%	3.9	5.10%
44	412	7.7	7.10%	4.4	6.80%	5.4	6.50%
46	267	8	6.40%	4.5	7.40%	5.7	6.50%
49	256	4.1	3.20%	5.8	9.70%	4.2	4.90%
50	473	3.6	3.20%	1.2	2.20%	1.7	2.20%
51	222	3.2	2.40%	1.4	1.90%	1.7	1.80%
52	461	4.4	3.60%	6	1