# Experiments on real-world data

In [23]:
%load_ext autoreload
%autoreload 2

from utils.train_medical import run_medical_experiments
from utils.results import (
    get_joint_medical_coverages, 
    get_medical_interval_widths, 
    load_medical_results, 
    get_uncorrected_medical_results
)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


To obtain the results as presented in the paper, run the following three sections.

## COVID-19 dataset

In [24]:
for baseline in ['CFRNN']:
    for seed in range(5):
        run_medical_experiments(dataset='electricity', 
                                baseline=baseline,
                                save_model=True, 
                                save_results=True,
                                seed=seed)

Training CFRNN
Epoch: 0	Train loss: 1896881.875
Epoch: 50	Train loss: 1867673.125
Epoch: 100	Train loss: 1839294.125
Epoch: 150	Train loss: 1812218.5
Epoch: 200	Train loss: 1785856.0
Epoch: 250	Train loss: 1760019.25
Epoch: 300	Train loss: 1734625.125
Epoch: 350	Train loss: 1709628.625
Epoch: 400	Train loss: 1685001.0
Epoch: 450	Train loss: 1660723.25
Epoch: 500	Train loss: 1636781.25
Epoch: 550	Train loss: 1613164.0
Epoch: 600	Train loss: 1589862.25
Epoch: 650	Train loss: 1566868.875
Epoch: 700	Train loss: 1544177.0
Epoch: 750	Train loss: 1521781.0
Epoch: 800	Train loss: 1499676.0
Epoch: 850	Train loss: 1477856.375
Epoch: 900	Train loss: 1456318.375
Epoch: 950	Train loss: 1435057.875
Epoch: 1000	Train loss: 1414070.75
Epoch: 1050	Train loss: 1393353.375
Epoch: 1100	Train loss: 1372902.375
Epoch: 1150	Train loss: 1352714.375
Epoch: 1200	Train loss: 1332786.125
Epoch: 1250	Train loss: 1313114.625
Epoch: 1300	Train loss: 1293696.75
Epoch: 1350	Train loss: 1274529.875
Epoch: 1400	Train lo

In [32]:
for baseline in ['CFRNN']:
    print(baseline)
    coverages_mean, coverages_std = get_joint_medical_coverages(baseline, 'electricity', seeds=range(5))
    
    print('{:.1f} \\(\\pm\\) {:.1f}\\%'.format(coverages_mean, coverages_std))
    print()

CFRNN
30.0 \(\pm\) 40.0\%



In [31]:
for baseline in ['CFRNN']:
    print(baseline)
    widths_mean, widths_std = get_medical_interval_widths(baseline, 'electricity', seeds=range(5))
    
    print(widths_mean)
    print(widths_std)
    print()

CFRNN
2664.8687811279297
3014.737054807446



## Ablation: Uncorrected calibration scores

#### Electricity Consumption

In [33]:
coverages_mean, coverages_std = get_joint_medical_coverages('CFRNN', 'electricity', seeds=range(5), correct_conformal=True)
    
print('{:.1f} \\(\\pm\\) {:.1f}\\%'.format(coverages_mean, coverages_std))

30.0 \(\pm\) 40.0\%


In [34]:
coverages_mean, coverages_std = get_joint_medical_coverages('CFRNN', 'electricity', seeds=range(5), correct_conformal=False)
    
print('{:.1f} \\(\\pm\\) {:.1f}\\%'.format(coverages_mean, coverages_std))

30.0 \(\pm\) 40.0\%


In [35]:
for seed in range(5):
    results = load_medical_results(dataset='electricity', baseline='CFRNN', seed=seed)
    independent_coverages = results['Mean independent coverage']
    print('[{:.1f}\\%, {:.1f}\\%]'.format(independent_coverages.min() * 100, independent_coverages.max() * 100))

[100.0\%, 100.0\%]
[0.0\%, 0.0\%]
[0.0\%, 50.0\%]
[0.0\%, 50.0\%]
[50.0\%, 50.0\%]


In [36]:
for seed in range(5):
    uncorrected_mimic_results = get_uncorrected_medical_results(dataset='electricity', seed=seed)
    independent_coverages = uncorrected_mimic_results['Mean independent coverage']
    print('[{:.1f}\\%, {:.1f}\\%]'.format(independent_coverages.min() * 100, independent_coverages.max() * 100))

[100.0\%, 100.0\%]
[0.0\%, 0.0\%]
[0.0\%, 50.0\%]
[0.0\%, 50.0\%]
[50.0\%, 50.0\%]
