<a href="https://colab.research.google.com/github/yoheikikuta/DeepGamblers-tensorflow/blob/master/colab/experiment_CIFAR10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Prerequisites

- Set up to connect to Google Drive
- Tensorflow preparation

In [0]:
# Install the PyDrive wrapper & import libraries.
# This only needs to be done once per notebook.
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
# This only needs to be done once per notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [2]:
from google import colab
colab.drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


In [3]:
from __future__ import absolute_import, division, print_function, unicode_literals

try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

TensorFlow 2.x selected.
Num GPUs Available:  1


In [4]:
tf.executing_eagerly()

True

In [0]:
import os

import numpy as np
import tensorflow as tf
from absl import app, flags, logging
from tensorflow.keras import layers

## Model Definition

In [0]:
class VGGBlock(layers.Layer):
    """VGG model building block
    if last_layer of the block:
        conv -> relu -> batchnorm -> pooling
    else:
        conv -> relu -> batchnorm -> dropout
    """
    def __init__(self, name=None, num_filter=32,
                 dropout_rate=0.3, is_last=False, **kwargs):
        super(VGGBlock, self).__init__(name=name, **kwargs)
        self.conv = layers.Conv2D(filters=num_filter, kernel_size=3, padding="same",
                                  kernel_regularizer=tf.keras.regularizers.l2(5e-4))
        self.relu = layers.ReLU()
        self.batchnorm = layers.BatchNormalization(epsilon=1e-5)
        self.dropout = layers.Dropout(dropout_rate)
        self.pooling = layers.MaxPool2D(pool_size=(2, 2))
        self.is_last = is_last

    def call(self, inputs, training=True):
        x = self.conv(inputs)
        x = self.relu(x)
        x = self.batchnorm(x, training)
        if self.is_last:
            x = self.pooling(x)
        else:
            x = self.dropout(x, training)
        return x


class VGGBuilder(tf.keras.Model):
    """VGG model builder"""
    def __init__(self, name="vgg16", **kwargs):
        super(VGGBuilder, self).__init__(name=name, **kwargs)
        if name == "vgg16":
            # block num, filter num, dropout rate
            self.structures = {"stage1": [2, 64, 0.3],
                               "stage2": [2, 128, 0.4],
                               "stage3": [3, 256, 0.4],
                               "stage4": [3, 512, 0.4],
                               "stage5": [3, 512, 0.4]}
            self.blocks = []
            for key in self.structures:
                num_block, num_filter, dropout_rate = self.structures[key]
                for idx, block in enumerate(range(num_block), start=1):
                    is_last = (num_block == idx)
                    self.blocks.append(VGGBlock(f"{key}_{idx}", num_filter,
                                                dropout_rate, is_last))
        self.dropout = layers.Dropout(0.5)
        self.flatten = layers.Flatten()
        self.dense = layers.Dense(512,
                                  kernel_regularizer=tf.keras.regularizers.l2(5e-4))
        self.relu = layers.ReLU()
        self.batchnorm = layers.BatchNormalization(epsilon=1e-5)

    def call(self, inputs, training=True):
        x = inputs
        for layer in self.blocks:
            x = layer(x, training)
        x = self.dropout(x, training)
        x = self.flatten(x)
        x = self.dense(x)
        x = self.relu(x)
        x = self.batchnorm(x, training)
        return x


class VGGClassifier(tf.keras.Model):
    """Classifier that outputs softmax probabilities"""
    def __init__(self, name="vgg16_classifier",
                 input_shapes=(32, 32, 3), num_classes=10, **kwargs):
        super(VGGClassifier, self).__init__(name=name, **kwargs)
        self.input_layer = layers.InputLayer(input_shape=input_shapes)
        self.vgg = VGGBuilder()
        self.dropout = layers.Dropout(0.5)
        self.dense = layers.Dense(num_classes,
                                  kernel_regularizer=tf.keras.regularizers.l2(5e-4))
        self.softmax = layers.Softmax()

    def call(self, inputs, training=True):
        x = self.input_layer(inputs)
        x = self.vgg(x, training)
        x = self.dropout(x, training)
        x = self.dense(x)
        # x = self.softmax(x)
        return x

In [0]:
# Gambler loss that proposed in the paper
def gambler_loss(model, x, y, o):
    # \sum_{i=1}^{m} y_i log(p_i + (1 / o) p_{m+1})
    EPS = 1e-5
    logit = model(x)
    prob = tf.nn.softmax(logit)
    prob = tf.clip_by_value(prob, EPS, 1.0 - EPS)
    class_pred, abstention = tf.split(prob, [prob.shape[1] - 1, 1], 1)
    abstention /= o
    weighted_prob = tf.concat([class_pred, abstention], 1)

    label_shape = y.shape
    extended_label = tf.concat([y, tf.constant(1.0, shape=[label_shape[0], 1])], 1)

    log_arg = tf.reduce_sum(extended_label * weighted_prob, 1)
    loss = -tf.reduce_mean(tf.math.log(log_arg))

    return loss


def cross_entropy_loss(model, x, y):
    logit = model(x)
    class_logit, abstention = tf.split(logit, [logit.shape[1] - 1, 1], 1)
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=class_logit))

    return loss


def train(model, optimizer, trainset, o, epoch, pretrain_num):
    for step, (x_batch_train, y_batch_train) in enumerate(trainset):
        with tf.GradientTape() as tape:
            if epoch <= pretrain_num:
                loss = cross_entropy_loss(model, x_batch_train, y_batch_train)
            else:
                loss = gambler_loss(model, x_batch_train, y_batch_train, o)
            # print(loss)

        grads = tape.gradient(loss, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))


def evaluate(model, testset):
    training = False
    predictions = np.array([], dtype=np.float32).reshape(0, 11)
    answers = np.array([], dtype=np.int8).reshape(0)
    for (x_batch_test, y_batch_test) in testset:
        preds = model(x_batch_test, training)
        predictions = np.vstack([predictions, preds.numpy()])
        answers = np.hstack([answers, y_batch_test.numpy().flatten()])

    return predictions, answers


def data_augmentation(x):
    x = tf.image.random_flip_left_right(x)
    x = tf.pad(x, tf.constant([[2, 2], [2, 2], [0, 0]]), "REFLECT")
    x = tf.image.random_crop(x, size=[32, 32, 3])
    return x


def load_dataset():
    MEAN = tf.constant([0.4914, 0.4822, 0.4465], dtype=tf.float32)
    STD = tf.constant([0.2023, 0.1994, 0.2010], dtype=tf.float32)
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
    trainset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
    trainset = trainset.map(
        lambda image, label: (
            data_augmentation((tf.cast(image, tf.float32) / 255.0) - MEAN / STD),
            tf.squeeze(tf.cast(tf.one_hot(label, depth=10), tf.float32)))
    ).shuffle(buffer_size=1024).batch(128)

    testset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
    testset = testset.map(
        lambda image, label: (
            (tf.cast(image, tf.float32) / 255.0) - MEAN / STD,
            label)
    ).batch(128)

    return trainset, testset

## Training

In [0]:
DRIVE_DIR = "gdrive/My Drive/DeepGamblers/"

if not os.path.exists(DRIVE_DIR):
    os.makedirs(DRIVE_DIR)

In [0]:
trainset, testset = load_dataset()
vgg16 = VGGClassifier(num_classes=10 + 1)  # +1 is for abstention class
optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2, momentum=0.9)
decay_rate = 0.5
root = tf.train.Checkpoint(optimizer=optimizer, model=vgg16)

for epoch in range(300):
    print(f"Start of epoch {epoch + 1}")
    if (epoch + 1) in [25, 50, 75, 100, 125, 150, 175, 200, 225, 250, 275]:
        optimizer.lr = optimizer.lr * decay_rate
    train(vgg16, optimizer, trainset, o=2.2, epoch = epoch + 1, pretrain_num = 100)
    if (epoch + 1) % 25 == 0:
        root.save(os.path.join(DRIVE_DIR, "ckpt"))

Start of epoch 1
Start of epoch 2
Start of epoch 3
Start of epoch 4
Start of epoch 5
Start of epoch 6
Start of epoch 7
Start of epoch 8
Start of epoch 9
Start of epoch 10
Start of epoch 11
Start of epoch 12
Start of epoch 13
Start of epoch 14
Start of epoch 15
Start of epoch 16
Start of epoch 17
Start of epoch 18
Start of epoch 19
Start of epoch 20
Start of epoch 21
Start of epoch 22
Start of epoch 23
Start of epoch 24
Start of epoch 25
Start of epoch 26
Start of epoch 27
Start of epoch 28
Start of epoch 29
Start of epoch 30
Start of epoch 31
Start of epoch 32
Start of epoch 33
Start of epoch 34
Start of epoch 35
Start of epoch 36
Start of epoch 37
Start of epoch 38
Start of epoch 39
Start of epoch 40
Start of epoch 41
Start of epoch 42
Start of epoch 43
Start of epoch 44
Start of epoch 45
Start of epoch 46
Start of epoch 47
Start of epoch 48
Start of epoch 49
Start of epoch 50
Start of epoch 51
Start of epoch 52
Start of epoch 53
Start of epoch 54
Start of epoch 55
Start of epoch 56
S

## Evaluation

In [9]:
CKPT_PATH = "gdrive/My Drive/DeepGamblers/ckpt-12"

optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2, momentum=0.9)
vgg16 = VGGClassifier(num_classes=10 + 1)  # +1 is for abstention class

ckpt = tf.train.Checkpoint(optimizer=optimizer, model=vgg16)
ckpt.restore(CKPT_PATH)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fcd0135d588>

In [10]:
trainset, testset = load_dataset()

predictions, answers = evaluate(ckpt.model, testset)
print(sum([np.argmax(elem) for elem in predictions] == answers))

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
8415


In [11]:
predictions.shape

(10000, 11)

In [0]:
probs = tf.nn.softmax(predictions).numpy()
result = np.hstack([probs, answers.reshape(len(probs), 1)])
result = result[result[:,-2].argsort()]  # Sort by abstention score.

In [0]:
coverage_list = [1.0, 0.95, 0.90, 0.85, 0.80, 0.75, 0.70]

In [55]:
for coverage in coverage_list:
    sub_result = result[:int(len(result) * coverage)]
    acc = sum([np.argmax(elem) for elem in sub_result[:, :-2]] == sub_result[:, -1])
    print(f"Coverage: {coverage:.2f}, Error: {(1.0 - acc / len(sub_result)) * 100:.2f}%")

Coverage: 1.00, Error: 12.93%
Coverage: 0.95, Error: 10.59%
Coverage: 0.90, Error: 8.58%
Coverage: 0.85, Error: 7.06%
Coverage: 0.80, Error: 5.93%
Coverage: 0.75, Error: 4.88%
Coverage: 0.70, Error: 4.06%


# Trial and Errors

In [0]:
optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2, momentum=0.9)
vgg16 = VGGClassifier(num_classes=10 + 1)  # +1 is for abstention class

test = tf.train.Checkpoint(optimizer=optimizer, model=vgg16)

In [13]:
test.restore("./gdrive/My Drive/DeepGamblers/ckpt-4")

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fcb8004c080>

In [14]:
test.model

<__main__.VGGClassifier at 0x7fcb803cb0f0>

In [15]:
test.optimizer.lr

<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.000625>

In [59]:
predictions, answers = evaluate(test.model, testset)
print(sum([np.argmax(elem) for elem in predictions[:, :-1]] == answers) / len(predictions) )

0.8593
