<h1>Simple and Scalable Predictive Uncertainty Estimation using Deep Ensembles<h1/>

Balaji Lakshminarayanan Alexander Pritzel Charles Blundell

<h2>Classification with Wide ResNet and CIFAR10<h2/>

In [None]:
import os
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import torch
import torch.nn as nn
import torch.nn.functional as F
m = nn.Softplus()

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
class Block(nn.Module):
    def __init__(self, in_channels, out_channels, dropout_rate, stride=1):
        """
        Args:
          in_channels:  Number of input channels.
          out_channels: Number of output channels.
          dropout_rate:  Dropout Rate
          stride:       Controls the stride.
        """
        super(Block, self).__init__()
        self.conv = nn.Sequential(
            nn.BatchNorm2d(in_channels),
            nn.ReLU(inplace = True),
            nn.Conv2d(in_channels, out_channels, kernel_size=3, bias=False, padding = 1),
            nn.Dropout(p = dropout_rate),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace = True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, bias=False, stride = stride, padding = 1)
        )
        self.skip = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.skip = nn.Sequential(
               nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
            )

    def forward(self, x):
        out = self.conv(x)
        out += self.skip(x)
        return out

class GroupOfBlocks(nn.Module):
    def __init__(self, in_channels, out_channels, n_blocks, dropout_rate, stride=1):
        super(GroupOfBlocks, self).__init__()
        strides = [stride] + [1]*(int(n_blocks) - 1)
        self.in_channels = in_channels
        group = []

        for stride in strides:
            group.append(Block(self.in_channels, out_channels, dropout_rate, stride))
            self.in_channels = out_channels

        self.group = nn.Sequential(*group)

    def forward(self, x):
        return self.group(x)

class WideResNet(nn.Module):
    def __init__(self, depth, widen_factor, dropout_rate, num_classes=10):
        super(WideResNet, self).__init__()
        assert ((depth-4)%6 == 0), "Depth should be 6n+4."
        n = (depth - 4)/6
        k = widen_factor
        nStages = [16, 16*k, 32*k, 64*k]

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=nStages[0], kernel_size=3, stride=1, padding=1, bias=False)
        self.group1 = GroupOfBlocks(nStages[0], nStages[1], n, dropout_rate)
        self.group2 = GroupOfBlocks(nStages[1], nStages[2], n, dropout_rate, stride=2)
        self.group3 = GroupOfBlocks(nStages[2], nStages[3], n, dropout_rate, stride=2)
        self.bn1 = nn.BatchNorm2d(nStages[3])

        self.relu = nn.ReLU(inplace=True)
        self.fc = nn.Linear(nStages[3], num_classes)
        self.nStage3 = nStages[3]

        # Initialize weights
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, np.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def forward(self, x):
        x = self.conv1(x)

        x = self.group1(x)
        x = self.group2(x)
        x = self.group3(x)
        x = self.relu(self.bn1(x))
        x = F.avg_pool2d(x, 8)
        x = x.view(-1, self.nStage3)
        return self.fc(x)

# This function computes the accuracy on the test dataset
def compute_accuracy_ood(net, testloader):
    net.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            images = images.clone().detach().requires_grad_(True)
            images = images + eps*(torch.sign(images))
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total

def compute_accuracy(net, testloader):
    net.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(device), labels.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total

In [None]:
import torchvision
import torchvision.transforms as transforms
data_dir = '/content/drive/My Drive/AALTO/cs4875-research/data/'
transform = transforms.Compose([
    transforms.ToTensor(),  # Transform to tensor
    transforms.Normalize((0.5,), (0.5,))  # Min-max scaling to [-1, 1]
])

trainset = torchvision.datasets.CIFAR10(root=data_dir, train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root=data_dir, train=False, download=True, transform=transform)

classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=5, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


<h2>Ensemble without adversarial training<h2/>

In [None]:
device = torch.device('cuda:0')
loss_func = nn.CrossEntropyLoss()
m = nn.LogSoftmax(dim=1)
eps = 0.01*2 # input ranges from (-1, 1)
learning_rate = 0.01

def compute_brier_score(p, y):
  brier_score = torch.mean((y-torch.argmax(p, 1).float())**2)
  return brier_score

def ensembleWithoutAdversarial(model, optimizer):
  running_loss = 0.0
  running_brier = 0.0
  model.train()
  for epoch in range(numEpochs):
    brier_score = 0.0
    total = 0
    for x, y in trainloader:
        x, y = x.to(device), y.to(device)
        x = x.clone().detach().requires_grad_(True)
        optimizer.zero_grad()
        output = model(x)
        batch_brier_score = compute_brier_score(output, y)
        brier_score += torch.sum(batch_brier_score, 0).cpu().numpy().item()
        loss = loss_func(output, y)
        loss.backward()
        optimizer.step()
        total += y.size(0)
    if epoch == (numEpochs-1):
      running_loss = loss.item()
    if epoch == (numEpochs-1):
      print('Loss at epoch {} is {}'.format(epoch, loss.item()))
      print('Brier score at epoch {} is {}'.format(epoch, brier_score/total))
  return running_loss, brier_score/total


numEpochs = 40
training_loss = []
training_brier = []
accuracys = []
accuracy_oods = []
time_all = []
for i in range(4):
  model = WideResNet(28, 4, 0.5)
  model.to(device)
  optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
  t0 = time.time()
  loss, brier = ensembleWithoutAdversarial(model, optimizer)
  time_one = time.time() - t0
  training_loss.append(loss)
  training_brier.append(brier)
  accuracy = compute_accuracy(model, testloader)
  accuracy_ood = compute_accuracy_ood(model, testloader)

  accuracys.append(accuracy)
  accuracy_oods.append(accuracy_ood)
  time_all.append(time_one)
  print('Accuracy of the network on the test images: %.3f' % accuracy)
  print('Accuracy of the network on the OOD test images: %.3f' % accuracy_ood)
  print('NLL Loss is {}'.format(loss))
  print('Brier score is {}'.format(brier))
  print('Training time: {} seconds'.format(time_one))
print('Mean:')
print('Accuracy of the network on the test images: %.3f' % np.mean(accuracys))
print('Accuracy of the network on the OOD test images: %.3f' % np.mean(accuracy_oods))
print('NLL Loss is {}'.format(np.mean(training_loss)))
print('Brier score is {}'.format(np.mean(training_brier)))
print('Training time: {} seconds'.format(np.sum(time_all)))


Loss at epoch 0 is 1.8651790618896484
Brier score at epoch 0 is 0.399615
Accuracy of the network on the test images: 0.167
Accuracy of the network on the OOD test images: 0.168
NLL Loss is [1.8651790618896484]
Brier score is [0.399615]
Training time: 250.71372509002686 seconds


KeyboardInterrupt: ignored

<h2>Ensemble with adversarial training<h2/>

In [None]:
device = torch.device('cuda:0')
loss_func = nn.CrossEntropyLoss()
m = nn.LogSoftmax(dim=1)
eps = 0.01*2 # input ranges from (-1, 1)
learning_rate = 0.01

def compute_brier_score(p, y):
  brier_score = torch.mean((y-torch.argmax(p, 1).float())**2)
  return brier_score

def ensembleWithAdversarial(model, optimizer):
  running_loss = 0.0
  running_brier = 0.0
  model.train()
  for epoch in range(numEpochs):
    brier_score = 0.0
    total = 0
    for x, y in trainloader:
        x, y = x.to(device), y.to(device)
        x = x.clone().detach().requires_grad_(True)
        optimizer.zero_grad()
        output = model(x)
        batch_brier_score = compute_brier_score(output, y)
        brier_score += torch.sum(batch_brier_score, 0).cpu().numpy().item()
        loss = loss_func(output, y)
        loss.backward(retain_graph=True)
        x_prime = x + eps*(torch.sign(x.grad.data))
        optimizer.zero_grad()
        output_prime = model(x_prime)
        loss = loss_func(output, y) + loss_func(output_prime, y)
        loss.backward()
        optimizer.step()
        total += y.size(0)
    if epoch == (numEpochs-1):
      running_loss = loss.item()
    if epoch == (numEpochs-1):
      print('Loss at epoch {} is {}'.format(epoch, loss.item()))
      print('Brier score at epoch {} is {}'.format(epoch, brier_score/total))
  return running_loss, brier_score/total


numEpochs = 40
training_loss = []
training_brier = []
accuracys = []
accuracy_oods = []
time_all = []
for i in range(4):
  model = WideResNet(28, 4, 0.5)
  model.to(device)
  optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
  t0 = time.time()
  loss, brier = ensembleWithAdversarial(model, optimizer)
  time_one = time.time() - t0
  training_loss.append(loss)
  training_brier.append(brier)
  accuracy = compute_accuracy(model, testloader)
  accuracy_ood = compute_accuracy_ood(model, testloader)
  accuracys.append(accuracy)
  accuracy_oods.append(accuracy_ood)
  time_all.append(time_one)
  print('Accuracy of the network on the test images: %.3f' % accuracy)
  print('Accuracy of the network on the OOD test images: %.3f' % accuracy_ood)
  print('NLL Loss is {}'.format(loss))
  print('Brier score is {}'.format(brier))
  print('Training time: {} seconds'.format(time_one))
print('Mean:')
print('Accuracy of the network on the test images: %.3f' % np.mean(accuracys))
print('Accuracy of the network on the OOD test images: %.3f' % np.mean(accuracy_oods))
print('NLL Loss is {}'.format(np.mean(training_loss)))
print('Brier score is {}'.format(np.mean(training_brier)))
print('Training time: {} seconds'.format(np.sum(time_all)))


<h2>Time series prediction with MIMIC3 and LSTM<h2/>

In [None]:
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from mydatasets import calculate_num_features, VisitSequenceWithLabelDataset, visit_collate_fn
# !pip3 install pickle5
import pickle5 as pickle
from torch.utils.data import DataLoader
import torch.optim as optim

Collecting pickle5
[?25l  Downloading https://files.pythonhosted.org/packages/f7/4c/5c4dd0462c8d3a6bc4af500a6af240763c2ebd1efdc736fc2c946d44b70a/pickle5-0.0.11.tar.gz (132kB)
[K     |██▌                             | 10kB 12.5MB/s eta 0:00:01[K     |█████                           | 20kB 18.3MB/s eta 0:00:01[K     |███████▍                        | 30kB 10.8MB/s eta 0:00:01[K     |██████████                      | 40kB 9.1MB/s eta 0:00:01[K     |████████████▍                   | 51kB 4.5MB/s eta 0:00:01[K     |██████████████▉                 | 61kB 5.0MB/s eta 0:00:01[K     |█████████████████▍              | 71kB 5.0MB/s eta 0:00:01[K     |███████████████████▉            | 81kB 5.5MB/s eta 0:00:01[K     |██████████████████████▎         | 92kB 5.8MB/s eta 0:00:01[K     |████████████████████████▉       | 102kB 6.1MB/s eta 0:00:01[K     |███████████████████████████▎    | 112kB 6.1MB/s eta 0:00:01[K     |█████████████████████████████▊  | 122kB 6.1MB/s eta 0:00:01

In [None]:
# Data preprocessing and training process refers to https://github.com/jiaweizhu830/Time-Series-Mortality-Prediction-in-ICU-via-PyTorch
# Train : test = 8:2

In [None]:
class AverageMeter(object):
	"""Computes and stores the average and current value"""

	def __init__(self):
		self.reset()

	def reset(self):
		self.val = 0
		self.avg = 0
		self.sum = 0
		self.count = 0

	def update(self, val, n=1):
		self.val = val
		self.sum += val * n
		self.count += n
		self.avg = self.sum / self.count

def compute_brier_score(p, y):
  brier_score = torch.mean((y-torch.argmax(p, 1).float())**2)
  return brier_score

def compute_batch_accuracy(output, target):
	"""Computes the accuracy for a batch"""
	with torch.no_grad():

		batch_size = target.size(0)
		_, pred = output.max(1)
		correct = pred.eq(target).sum()

		return correct * 100.0 / batch_size

In [None]:

torch.manual_seed(0)
if torch.cuda.is_available():
	torch.cuda.manual_seed(0)

# Set a correct path to the data files that you preprocessed
PATH_TRAIN_SEQS = "/content/drive/My Drive/AALTO/cs4875-research/data/features/train/mortality.seqs.train"
PATH_TRAIN_LABELS = "/content/drive/My Drive/AALTO/cs4875-research/data/features/train/mortality.labels.train"
PATH_TEST_SEQS = "/content/drive/My Drive/AALTO/cs4875-research/data/features/test/mortality.seqs.test"
PATH_TEST_LABELS = "/content/drive/My Drive/AALTO/cs4875-research/data/features/test/mortality.labels.test"
PATH_OUTPUT = "/content/drive/My Drive/AALTO/cs4875-research/output/"

NUM_EPOCHS = 1
BATCH_SIZE = 32
USE_CUDA = False  # Set 'True' if you want to use GPU
NUM_WORKERS = 0

# Data loading
print('===> Loading entire datasets')
train_seqs = pickle.load(open(PATH_TRAIN_SEQS, 'rb'))
train_labels = pickle.load(open(PATH_TRAIN_LABELS, 'rb'))
test_seqs = pickle.load(open(PATH_TEST_SEQS, 'rb'))
test_labels = pickle.load(open(PATH_TEST_LABELS, 'rb'))
print('===> done Loading')
num_features = calculate_num_features(train_seqs)
print(num_features)

train_dataset = VisitSequenceWithLabelDataset(train_seqs, train_labels, num_features)
test_dataset = VisitSequenceWithLabelDataset(test_seqs, test_labels, num_features)
print('===> done datasets')

train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=visit_collate_fn, num_workers=NUM_WORKERS)
test_loader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False, collate_fn=visit_collate_fn, num_workers=NUM_WORKERS)




===> Loading entire datasets
===> done Loading
5067
===> done datasets


In [None]:
class MyLSTM(nn.Module):
    def __init__(self, dim_input, dropout_rate=0.1):
        super(MyLSTM, self).__init__()
        self.fc1 = nn.Linear(in_features = dim_input, out_features = 64)
        self.lstm = nn.LSTM(input_size = 64, hidden_size = 64, num_layers = 1, dropout = dropout_rate, batch_first = True)
        #self.dropout = nn.Dropout(p = dropout_rate)
        self.fc2 = nn.Linear(in_features = 64, out_features = 2)

    def forward(self, x, lengths):
      # x, lengths = input_sequence
      lengths = lengths.long()
      batch_size, seq_len, num_features = x.size()
      # print('1 ' + str(x.size()))
      x = self.fc1(x)
      # print('1.5 ' + str(x.size()))
      x = torch.sigmoid(x)
      # print('2 ' + str(x.size()))
      x = pack_padded_sequence(x, lengths, batch_first = True)
      # print('3 ' + str(x.batch_sizes))
      x, _ = self.lstm(x)
      # print('4 ' + str(x.batch_sizes))
      x, _ = pad_packed_sequence(x, batch_first = True, total_length = seq_len)
      # print('5 ' + str(x.size()))
      y = torch.zeros(batch_size, 64).float()
      for i in range(batch_size):
        y[i, :] = x[i, lengths[i]-1, :]
      # print('5 y ' + str(y.size()))
      x = self.fc2(y)
      # print('6 ' + str(x.size()))
      # assert (False), "Stop here."
      return x

In [None]:

def train(model, device, data_loader, criterion, optimizer, epoch, print_freq=10):
	batch_time = AverageMeter()
	data_time = AverageMeter()
	losses = AverageMeter()
	accuracy = AverageMeter()
	total = 0
	brier_score = 0.0
	model.train()

	end = time.time()
	for i, (input, target) in enumerate(data_loader):
		# measure data loading time
		data_time.update(time.time() - end)

		seqs, lengths = input
		seqs = seqs.to(device)
		target = target.to(device)

		optimizer.zero_grad()
		output = model(seqs, lengths)
		batch_brier_score = compute_brier_score(output, target)
		brier_score+= torch.sum(batch_brier_score, 0).cpu().numpy().item()
		total += target.size(0)
		loss = criterion(output, target)
		assert not np.isnan(loss.item()), 'Model diverged with loss = NaN'

		loss.backward()
		optimizer.step()

		# measure elapsed time
		batch_time.update(time.time() - end)
		end = time.time()

		losses.update(loss.item(), target.size(0))
		accuracy.update(compute_batch_accuracy(output, target).item(), target.size(0))

		# if i % print_freq == 0:
		# 	print('Epoch: [{0}][{1}/{2}]\t'
		# 		  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
		# 		  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
		# 		  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
		# 		  'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format(
		# 		epoch, i, len(data_loader), batch_time=batch_time,
		# 		data_time=data_time, loss=losses, acc=accuracy))

	return losses.avg, accuracy.avg, brier_score/total

def advTrain(model, device, data_loader, criterion, optimizer, epoch, print_freq=10):
	batch_time = AverageMeter()
	data_time = AverageMeter()
	losses = AverageMeter()
	accuracy = AverageMeter()
	eps = 5067*0.01
	total = 0
	brier_score = 0.0

	model.train()

	end = time.time()

	for i, (input, target) in enumerate(data_loader):
		# measure data loading time
		data_time.update(time.time() - end)

		seqs, lengths = input
		seqs = seqs.to(device)
		seqs = seqs.clone().detach().requires_grad_(True)
    
		target = target.to(device)

		optimizer.zero_grad()
		output = model(seqs, lengths)
		batch_brier_score = compute_brier_score(output, target)
		brier_score+= torch.sum(batch_brier_score, 0).cpu().numpy().item()
		total += target.size(0)
		loss = criterion(output, target)
		assert not np.isnan(loss.item()), 'Model diverged with loss = NaN'

		loss.backward(retain_graph=True)

		seqs_prime = seqs + eps*(torch.sign(seqs.grad.data))
    
		optimizer.zero_grad()
		output_prime = model(seqs_prime, lengths)
		loss = criterion(output, target) + criterion(output_prime, target)
		loss.backward()
		optimizer.step()

		# measure elapsed time
		batch_time.update(time.time() - end)
		end = time.time()

		losses.update(loss.item(), target.size(0))
		accuracy.update(compute_batch_accuracy(output, target).item(), target.size(0))

		# if i % print_freq == 0:
		# 	print('Epoch: [{0}][{1}/{2}]\t'
		# 		  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
		# 		  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
		# 		  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
		# 		  'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format(
		# 		epoch, i, len(data_loader), batch_time=batch_time,
		# 		data_time=data_time, loss=losses, acc=accuracy))

	return losses.avg, accuracy.avg, brier_score/total

In [None]:
for i in range(1, 4):
  for i in range(1, 5):
    model = MyLSTM(num_features)
    criterion = nn.CrossEntropyLoss()
    NUM_EPOCHS = 20
    device = torch.device("cuda" if torch.cuda.is_available() and USE_CUDA else "cpu")
    model.to(device)
    criterion.to(device)

    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay = 0.0004)
    best_val_acc = 0.0
    train_losses, train_accuracies = [], []
    valid_losses, valid_accuracies = [], []
    training_brier = []
    t0 = time.time()
    for epoch in range(NUM_EPOCHS):

      train_loss, train_accuracy, brier = advTrain(model, device, train_loader, criterion, optimizer, epoch, print_freq = len(train_loader)-1)


      train_losses.append(train_loss)
      training_brier.append(brier)
      train_accuracies.append(train_accuracy)
    time_one = time.time() - t0
    print('NLL Loss is {}'.format(np.mean(train_losses)))
    print('Brier score is {}'.format(np.mean(training_brier)))
    print('Training time: {} seconds'.format(time_one))
    test_loss, test_accuracy, test_results = advEvaluate(model, device, test_loader, criterion, print_freq = len(test_loader)-1)
    print('OOD accuracy {}'.format(test_accuracy))
    test_loss, test_accuracy, test_results = evaluate(model, device, test_loader, criterion, print_freq = len(test_loader)-1)
    print('test accuracy {}'.format(test_accuracy))


  "num_layers={}".format(dropout, num_layers))


NLL Loss is 0.31624441407166676
Brier score is 0.0033095868338740573
Training time: 1049.9844000339508 seconds
Test: [0/9404]	Time 0.001 (0.001)	Loss 0.0000 (0.0000)	Accuracy 100.000 (100.000)
Test: [9403/9404]	Time 0.001 (0.001)	Loss 0.2669 (0.7036)	Accuracy 100.000 (77.393)
OOD accuracy 77.39259889408763
Test: [0/9404]	Time 0.002 (0.002)	Loss 0.0004 (0.0004)	Accuracy 100.000 (100.000)
Test: [9403/9404]	Time 0.001 (0.001)	Loss 0.1301 (0.3286)	Accuracy 100.000 (86.963)
test accuracy 86.9629944704381
NLL Loss is 0.3505539423413424
Brier score is 0.0033423735019666736
Training time: 1029.8340990543365 seconds
Test: [0/9404]	Time 0.002 (0.002)	Loss 0.0001 (0.0001)	Accuracy 100.000 (100.000)
Test: [9403/9404]	Time 0.001 (0.001)	Loss 0.0150 (0.6621)	Accuracy 100.000 (79.785)
OOD accuracy 79.78519778817524
Test: [0/9404]	Time 0.002 (0.002)	Loss 0.0008 (0.0008)	Accuracy 100.000 (100.000)
Test: [9403/9404]	Time 0.001 (0.001)	Loss 0.0732 (0.3244)	Accuracy 100.000 (86.559)
test accuracy 86.55891

In [None]:

def evaluate(model, device, data_loader, criterion, print_freq=10):
	batch_time = AverageMeter()
	losses = AverageMeter()
	accuracy = AverageMeter()

	results = []

	model.eval()

	with torch.no_grad():
		end = time.time()
		for i, (input, target) in enumerate(data_loader):
			seqs, lengths = input
			seqs = seqs.to(device)
      
			target = target.to(device)

			output = model(seqs, lengths)
			loss = criterion(output, target)

			# measure elapsed time
			batch_time.update(time.time() - end)
			end = time.time()

			losses.update(loss.item(), target.size(0))
			accuracy.update(compute_batch_accuracy(output, target).item(), target.size(0))

			y_true = target.detach().to('cpu').numpy().tolist()
			y_pred = output.detach().to('cpu').max(1)[1].numpy().tolist()
			results.extend(list(zip(y_true, y_pred)))

			if i % print_freq == 0:
				print('Test: [{0}/{1}]\t'
					  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
					  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
					  'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format(
					i, len(data_loader), batch_time=batch_time, loss=losses, acc=accuracy))

	return losses.avg, accuracy.avg, results

def advEvaluate(model, device, data_loader, criterion, print_freq=10):
	batch_time = AverageMeter()
	losses = AverageMeter()
	accuracy = AverageMeter()
	eps = 5067*0.01
	results = []

	model.eval()

	with torch.no_grad():
		end = time.time()
		for i, (input, target) in enumerate(data_loader):
			seqs, lengths = input
			seqs = seqs.to(device) 
			seqs = seqs + eps*(torch.sign(seqs))
			target = target.to(device)

			output = model(seqs, lengths)
			loss = criterion(output, target)

			# measure elapsed time
			batch_time.update(time.time() - end)
			end = time.time()

			losses.update(loss.item(), target.size(0))
			accuracy.update(compute_batch_accuracy(output, target).item(), target.size(0))

			y_true = target.detach().to('cpu').numpy().tolist()
			y_pred = output.detach().to('cpu').max(1)[1].numpy().tolist()
			results.extend(list(zip(y_true, y_pred)))

			if i % print_freq == 0:
				print('Test: [{0}/{1}]\t'
					  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
					  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
					  'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format(
					i, len(data_loader), batch_time=batch_time, loss=losses, acc=accuracy))

	return losses.avg, accuracy.avg, results

In [None]:
test_loss, test_accuracy, test_results = evaluate(model, device, test_loader, criterion, print_freq = len(test_loader)-1)
test_accuracy

Test: [0/9404]	Time 0.002 (0.002)	Loss 0.0005 (0.0005)	Accuracy 100.000 (100.000)
Test: [9403/9404]	Time 0.001 (0.001)	Loss 0.5152 (0.3427)	Accuracy 100.000 (86.102)


86.10165886856657

In [None]:
for i in range(1, 4):
  for i in range(1, 5):
    model = MyLSTM(num_features)
    criterion = nn.CrossEntropyLoss()
    NUM_EPOCHS = 20
    device = torch.device("cuda" if torch.cuda.is_available() and USE_CUDA else "cpu")
    model.to(device)
    criterion.to(device)

    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay = 0.0004)
    best_val_acc = 0.0
    train_losses, train_accuracies = [], []
    valid_losses, valid_accuracies = [], []
    training_brier = []
    t0 = time.time()
    for epoch in range(NUM_EPOCHS):

      train_loss, train_accuracy, brier = train(model, device, train_loader, criterion, optimizer, epoch, print_freq = len(train_loader)-1)


      train_losses.append(train_loss)
      training_brier.append(brier)
      train_accuracies.append(train_accuracy)
    time_one = time.time() - t0
    print('NLL Loss is {}'.format(np.mean(train_losses)))
    print('Brier score is {}'.format(np.mean(training_brier)))
    print('Training time: {} seconds'.format(time_one))
    test_loss, test_accuracy, test_results = advEvaluate(model, device, test_loader, criterion, print_freq = len(test_loader)-1)
    print('OOD accuracy {}'.format(test_accuracy))
    test_loss, test_accuracy, test_results = evaluate(model, device, test_loader, criterion, print_freq = len(test_loader)-1)
    print('test accuracy {}'.format(test_accuracy))

  "num_layers={}".format(dropout, num_layers))


NLL Loss is 0.24228817570935463
Brier score is 0.0031832573028838566
Training time: 407.3228073120117 seconds
Test: [0/9404]	Time 0.002 (0.002)	Loss 0.0000 (0.0000)	Accuracy 100.000 (100.000)
Test: [9403/9404]	Time 0.001 (0.001)	Loss 0.5987 (0.6799)	Accuracy 100.000 (78.126)
OOD accuracy 78.12632922160783
Test: [0/9404]	Time 0.001 (0.001)	Loss 0.0001 (0.0001)	Accuracy 100.000 (100.000)
Test: [9403/9404]	Time 0.001 (0.001)	Loss 0.0245 (0.3427)	Accuracy 100.000 (86.803)
test accuracy 86.80348787749894
NLL Loss is 0.23818524594632504
Brier score is 0.0031351280397109217
Training time: 407.1355719566345 seconds
Test: [0/9404]	Time 0.001 (0.001)	Loss 0.0000 (0.0000)	Accuracy 100.000 (100.000)
Test: [9403/9404]	Time 0.001 (0.001)	Loss 0.0007 (0.7287)	Accuracy 100.000 (77.435)
OOD accuracy 77.43513398553807
Test: [0/9404]	Time 0.002 (0.002)	Loss 0.0001 (0.0001)	Accuracy 100.000 (100.000)
Test: [9403/9404]	Time 0.001 (0.001)	Loss 0.0277 (0.3383)	Accuracy 100.000 (86.857)
test accuracy 86.85665

In [None]:
test_loss, test_accuracy, test_results = advEvaluate(model, device, test_loader, criterion, print_freq = len(test_loader)-1)
test_accuracy

Test: [0/9404]	Time 0.002 (0.002)	Loss 0.0000 (0.0000)	Accuracy 100.000 (100.000)
Test: [9403/9404]	Time 0.001 (0.001)	Loss 0.0955 (0.6803)	Accuracy 100.000 (78.754)


78.75372182050191

In [None]:
test_loss, test_accuracy, test_results = evaluate(model, device, test_loader, criterion, print_freq = len(test_loader)-1)
test_accuracy

Test: [0/9404]	Time 0.002 (0.002)	Loss 0.0003 (0.0003)	Accuracy 100.000 (100.000)
Test: [9403/9404]	Time 0.001 (0.001)	Loss 0.0121 (0.3304)	Accuracy 100.000 (87.069)


87.06933219906423