# Experiments on real-world data

In [7]:
%load_ext autoreload
%autoreload 2

from utils.train_medical import run_medical_experiments
from utils.results import (
    get_joint_medical_coverages, 
    get_medical_interval_widths, 
    load_medical_results, 
    get_uncorrected_medical_results
)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


To obtain the results as presented in the paper, run the following three sections.

## COVID-19 dataset

In [8]:
for baseline in ['CFRNN']:
    for seed in range(5):
        run_medical_experiments(dataset='covid', 
                                baseline=baseline,
                                save_model=True, 
                                save_results=True,
                                seed=seed)

Training CFRNN
Epoch: 0	Train loss: 19849.4248046875
Epoch: 50	Train loss: 14987.1103515625
Epoch: 100	Train loss: 12784.7021484375
Epoch: 150	Train loss: 16437.63427734375
Epoch: 200	Train loss: 11722.685546875
Epoch: 250	Train loss: 15581.306396484375
Epoch: 300	Train loss: 10658.34228515625
Epoch: 350	Train loss: 8696.23583984375
Epoch: 400	Train loss: 9109.50390625
Epoch: 450	Train loss: 10849.78076171875
Epoch: 500	Train loss: 11897.37255859375
Epoch: 550	Train loss: 8026.652099609375
Epoch: 600	Train loss: 7797.9775390625
Epoch: 650	Train loss: 6970.69287109375
Epoch: 700	Train loss: 10888.747314453125
Epoch: 750	Train loss: 5506.3414306640625
Epoch: 800	Train loss: 5750.919677734375
Epoch: 850	Train loss: 6214.250732421875
Epoch: 900	Train loss: 6035.7333984375
Epoch: 950	Train loss: 10334.299072265625
Training CFRNN
Epoch: 0	Train loss: 22146.2509765625
Epoch: 50	Train loss: 18750.36279296875
Epoch: 100	Train loss: 16809.369140625
Epoch: 150	Train loss: 14085.14453125
Epoch: 20

In [9]:
for baseline in ['CFRNN']:
    print(baseline)
    coverages_mean, coverages_std = get_joint_medical_coverages(baseline, 'covid', seeds=range(5))
    
    print('{:.1f} \\(\\pm\\) {:.1f}\\%'.format(coverages_mean, coverages_std))
    print()

CFRNN
89.0 \(\pm\) 6.0\%

QRNN
9.5 \(\pm\) 1.9\%

DPRNN
0.5 \(\pm\) 0.6\%



In [10]:
for baseline in ['CFRNN']:
    print(baseline)
    widths_mean, widths_std = get_medical_interval_widths(baseline, 'covid', seeds=range(5))
    
    print(widths_mean)
    print(widths_std)
    print()

CFRNN
741.2711116027832
580.8353769385714

DPRNN
62.15974764251709
32.4004625364606

QRNN
126.6640302886963
56.191443533133125



## Ablation: Uncorrected calibration scores

#### COVID-19

In [13]:
coverages_mean, coverages_std = get_joint_medical_coverages('CFRNN', 'covid', seeds=range(5), correct_conformal=True)
    
print('{:.1f} \\(\\pm\\) {:.1f}\\%'.format(coverages_mean, coverages_std))

89.0 \(\pm\) 6.0\%


In [14]:
coverages_mean, coverages_std = get_joint_medical_coverages('CFRNN', 'covid', seeds=range(5), correct_conformal=False)
    
print('{:.1f} \\(\\pm\\) {:.1f}\\%'.format(coverages_mean, coverages_std))

54.5 \(\pm\) 10.0\%


In [15]:
for seed in range(5):
    results = load_medical_results(dataset='covid', baseline='CFRNN', seed=seed)
    independent_coverages = results['Mean independent coverage']
    print('[{:.1f}\\%, {:.1f}\\%]'.format(independent_coverages.min() * 100, independent_coverages.max() * 100))

[97.5\%, 100.0\%]
[93.8\%, 100.0\%]
[96.2\%, 100.0\%]
[88.8\%, 98.8\%]
[96.2\%, 100.0\%]


In [16]:
for seed in range(5):
    uncorrected_mimic_results = get_uncorrected_medical_results(dataset='covid', seed=seed)
    independent_coverages = uncorrected_mimic_results['Mean independent coverage']
    print('[{:.1f}\\%, {:.1f}\\%]'.format(independent_coverages.min() * 100, independent_coverages.max() * 100))

[77.5\%, 98.8\%]
[82.5\%, 98.8\%]
[86.2\%, 98.8\%]
[72.5\%, 95.0\%]
[88.8\%, 97.5\%]
