In [0]:
%tensorflow_version 1.x

import numpy as np
import keras
from keras.datasets import mnist
import sys
import scipy
#%matplotlib inline
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.regularizers import l2
from keras import backend as K


from google.colab import drive
drive.mount("/content/gdrive", force_remount=True)

Mounted at /content/gdrive


In [0]:


def predict_with_uncertainty(f, x, n_iter=100):
    """Function generating non-deterministic predictions using MC dropout and returning the mean of these predictions
    Adapted from: https://stackoverflow.com/questions/43529931/how-to-calculate-prediction-uncertainty-using-keras
    #Arguments:
        f: function mapping model input and Keras backend learning_phase flag to model output
        x: input
        n_iter: number of repreated MC dropout predictions per point
    #Returns:
        Mean of MC dropout predictions
    """
    result = np.zeros((n_iter,x.shape[0], 10))
    for i in range(n_iter):
        predictions = np.array(f((x, 1))[0])
        result[i,:, :] = predictions
    prediction = result.mean(axis=0)
    return prediction

def create_dropout_masks(dim, num_masks, drop_prob):
    """Creates dropout masks that are fixed across all model inputs for use in estimation of Var(Y_{sample})
    Note: this function is specific to keras models used in this experiment
    #Arguments
        dim: the numpy array shape of values entering the generated dropout masks for all points across all J fixed dropout masks
        num_masks: the number of fixed dropout masks to be created (J)
        drop_prob: the dropout probability
    #Returns
        dropout masks used in estimation of Var(p_{sample})
    """
    if (len(dim)==4):
        return 1/(1-drop_prob)*np.random.choice(2, size=((dim[0], 1, 1, dim[3])), p=[drop_prob, 1-drop_prob])
    return 1/(1-drop_prob)*np.random.choice(2, size=((num_masks, 1, dim[2], dim[3], dim[4])), p=[drop_prob, 1-drop_prob])

def apply_dropout_masks(a,b):
    return np.squeeze(np.multiply(a,b))

def multi_mask_predict(layer_fn, multi_mask_input):
    """Applies Keras model layers to multiple arrays of layer inputs for all points across J fixed dropout masks
    #Arguments
        layer_fn: keras backend function applying the mapping corresponding to keras model layers
        multi_mask_input: numpy array of layer inputs for all points across J fixed dropout masks
    #Returns
        predictions from layer_fn applied to multi_mask_input as a numpy array
    """
    layer_output = []
    for mask_num in range(multi_mask_input.shape[0]):
        layer_output.append(layer_fn((multi_mask_input[mask_num], 1)))
        #layer_output.append(layer_fn([multi_mask_input[mask_num]]))
    return np.array(layer_output)

def fixed_mask_forward_pass (model, forward_pass_input, num_masks, dropout_prob, conv_masks, dense_masks):
      """Makes model predictions with J dropout masks that are fixed across points to enable estimation of Var(p_{sample})
    Function is specific to the given Keras model.
    #Arguments
        model: keras model
        forward_pass_input: X_{sample}
        num_masks: J, the number of dropout masks being used in estimation of Var(p_{sample}) and calculation of the EI acquisition function
        dropout_prob: dropout probability 
        conv_masks: first set of dropout masks, applied after MaxPooling2D layer
        dense_masks: second set of dropout masks, applied after the first Dense layer
    #Returns
        MC dropout predictions across sample points enabling estimation of Var(p_{sample}), i.e. \hat{p}_{sample}
    """
    conv = K.function([model.layers[0].input, K.learning_phase()],
                    [model.layers[4].output])

    dense_1 = K.function([model.layers[6].input, K.learning_phase()],
                   [model.layers[8].output])

    dense_2 = K.function([model.layers[10].input, K.learning_phase()],
                   [model.layers[11].output])
    arr_input = np.array(forward_pass_input)
    num_points = arr_input.shape[0]
    if len(arr_input.shape) == 3:
      forward_pass_input = np.expand_dims(forward_pass_input, axis=1)
    conv_output = np.array(conv((forward_pass_input, 1)))
    dense_1_input = apply_dropout_masks(conv_output, conv_masks)
    if num_points == 1:
        dense_1_input = np.expand_dims(dense_1_input, axis=1)
    dense_1_output = multi_mask_predict(dense_1, dense_1_input)
    dense_2_input = apply_dropout_masks(dense_1_output, dense_masks)
    if num_points == 1:
        dense_2_input = np.expand_dims(dense_2_input, axis=1)
    dense_2_output = multi_mask_predict(dense_2, dense_2_input)
    return np.squeeze(dense_2_output)

def run_model (train_data_indices):
  """Trains and Keras model with the training points specified by train_data_indices and evaluates model on test data
  #Arguments:
      train_data_indices: indices of current training points within X_train_All
  #Returns:
      Test accuracy and trained Keras model
  """
  X_train = np.expand_dims(X_train_All[train_data_indices], axis=1)
  y_train = y_train_All[train_data_indices]
  y_train = keras.utils.to_categorical(y_train, num_classes=10)
  train_size = y_train.shape[0]
  Weight_Decay = 0.01/train_size
  dropout_prob = 0.25
  nb_filters = 40
  nb_pool = 3
  nb_conv = 4
  img_rows = img_cols = 28
  nb_classes = 10
  model = Sequential()
  model.add(Convolution2D(nb_filters, kernel_size=nb_conv, strides=1, data_format="channels_first", input_shape=(1, img_rows, img_cols)))
  model.add(Activation('relu'))
  model.add(Convolution2D(nb_filters, kernel_size=nb_conv, strides=2, data_format = "channels_first"))
  model.add(Activation('relu'))
	
  model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool), data_format="channels_first"))
  model.add(Dropout(dropout_prob))

  model.add(Flatten())
  model.add(Dense(128, W_regularizer=l2(Weight_Decay)))
  model.add(Activation('relu'))
  model.add(Dropout(dropout_prob))
  model.add(Dense(nb_classes, W_regularizer=l2(Weight_Decay)))
  model.add(Activation('softmax'))

  model.compile(loss='categorical_crossentropy', optimizer='adam')
  model.fit(X_train, y_train, epochs=300, batch_size=128, verbose=0)
  f = K.function([model.layers[0].input,K.learning_phase()],[model.layers[-1].output])
  y_test_output = predict_with_uncertainty(f, X_test, n_iter=100)
  y_test_predictions = np.argmax(y_test_output, axis=1)
  return [np.sum(y_test_predictions==y_test_original)/(y_test_original.shape[0]), model]

def ei_batch_acquisition_unweighted(forward_pass_results, num_pool_samples, num_training_samples, tau_inverse, batch_size):
  """Given Var(p_{sample}), applies batch-mode EI active learning to query points
  #Arguments
    forward_pass_results: Fixed-mask predictions across sample points used in estimating Var(p_{sample}) (shape: [num_masks, num_points, num_classes])
    num_pool_samples: number of pool points in D_{sample}
    num_training_samples: number of training points in D_{sample}
    tau_inverse: smoothing parameter
    batch_size: number of queried points per batch
  #Returns
    the variance reductions for queried points and the indices of queried pool points as points are arranged in univ_covariance
  """
  #First setting tau inv based on tau inv prop
  num_total_points = forward_pass_results.shape[1]
  num_masks = forward_pass_results.shape[0]
  flattened_forward_pass_results = forward_pass_results.reshape((num_masks, 10*num_total_points))
  univ_cov = np.cov(flattened_forward_pass_results, rowvar=False) + (tau_inverse*np.identity(10*num_total_points))
  delta_identity = 10.0**(-15.0)
  univ_covariance = univ_cov + (delta_identity * np.identity(univ_cov.shape[0]))
  #Now starting batch mode acquisition loop
  print('univ covariance shape: ' + str(univ_covariance.shape))
  acq_ind = []
  acq_vals = []
  for acq_num in range(batch_size):
    all_acq_values = np.zeros(num_pool_samples)
    for new_pt_ind in range(num_pool_samples):
      if new_pt_ind in acq_ind:
        all_acq_values[new_pt_ind] = 0.0
      else:
        start_ind = 10*(num_training_samples+new_pt_ind) 
        new_pt_cov_inv = np.linalg.inv(univ_covariance[start_ind:start_ind+10, start_ind:start_ind+10]) 
        covariance_matrix = univ_covariance[:, start_ind:start_ind+10] 
        first_matrix = np.matmul(covariance_matrix, new_pt_cov_inv)
        trace_reduction = np.sum(np.multiply(first_matrix, covariance_matrix)) 
        all_acq_values[new_pt_ind] = trace_reduction
    sorted_top_ind = np.flip(np.argsort(all_acq_values))
    found_new_ind = False
    top_ind_ctr = -1
    while (found_new_ind == False):
       top_ind_ctr += 1
       new_top_ind = sorted_top_ind[top_ind_ctr]
       if new_top_ind not in acq_ind:
         acq_ind.append(new_top_ind)
         acq_vals.append(all_acq_values[new_top_ind])
         found_new_ind = True
    update_start_ind = 10*(num_training_samples+acq_ind[-1])
    top_cov_matrix = univ_covariance[:, update_start_ind:update_start_ind+10]
    top_pt_cov_inv = np.linalg.inv(univ_covariance[update_start_ind:update_start_ind+10, update_start_ind:update_start_ind+10]) 
    first_matrix = np.matmul(top_cov_matrix, top_pt_cov_inv)
    univ_covariance = univ_covariance - np.matmul(first_matrix, top_cov_matrix.T) + (delta_identity * np.identity(univ_covariance.shape[0])) 
  return [acq_vals, acq_ind]




In [0]:
#Active learning parameters/settings
num_cand = 2000
batch_size = 100
dropout_prob = 0.25
num_experiments = 3
num_acquisitions = 1000 
num_masks = 50

#Note: tau_inverse is set to 0.00003 in paper results
tau_inverse = 0.0003

#Keras Model Parameters
num_classes = 10
nb_filters = 40
nb_pool = 2
nb_conv = 4
img_rows = img_cols = 28

In [0]:
#Loading data

data_path = "/content/gdrive/My Drive/FINAL_PAPER_ACTIVE_LEARNING_EXP/MNIST/"
train_data = np.loadtxt(data_path + "mnist_train.csv", 
		  delimiter=",")
test_data = np.loadtxt(data_path + "mnist_test.csv", 
		 delimiter=",") 
y_train_All = train_data[:,0]
y_test = test_data[:,0]
X_train_All = train_data[:,1:].reshape((60000,28,28))
X_test = test_data[:,1:].reshape((10000,28,28)) 
train_ind = np.arange(60000)
test_ind = np.arange(10000)

y_test_original = y_test
y_test = keras.utils.to_categorical(y_test, num_classes=10)
X_test = np.expand_dims(X_test[test_ind], axis=1)
folder_path = "/content/gdrive/My Drive/FINAL_PAPER_ACTIVE_LEARNING_EXP/MNIST/MultiClass_AL_Scripts_v1/"
out_folder_path = "/content/gdrive/My Drive/FINAL_PAPER_ACTIVE_LEARNING_EXP/MNIST/MultiClass_AL_Results_v1/"

print('Running')	  
all_tr_ind = []
all_acc = []

#Iterating across experiments, each of which begins with a different training set (that is balanced across classes)

for e in range(num_experiments):
  num_acquisitions = 1000
  acc_file = "EIAccBS"+str(batch_size)+"Ind"+str(e+1)+".npy"
  ind_file = "EIIndBS"+str(batch_size)+"Ind"+str(e+1)+".npy"
  exp_acc = []
  #exp_acc = list(np.load(out_folder_path+acc_file))
  #train_data_indices = list(np.load(out_folder_path+ind_file))
  train_data_indices = list(np.load(folder_path + 'trainindices' + str(e+1) + '.npy'))
  pool_indices = [i for i in train_ind if i not in train_data_indices]
  #Evaluating initial model
  model_results = run_model(train_data_indices)
  print('trained model')
  exp_acc.append(model_results[0])
  all_acc.append(exp_acc)
  all_tr_ind.append(train_data_indices)
  print('Initial Acc: ' + str(all_acc))
  #num_acquisitions = num_acquisitions - batch_size * (len(exp_acc)-1)
  #Looping over acquisition iterations
  for acq in range(num_acquisitions//batch_size):
    #Randomly selecting sample points
    all_ind_ind = np.random.choice(len(pool_indices)+len(train_data_indices), int(num_cand*1.2), replace=False)
    pool_sample_ratio = len(all_ind_ind[all_ind_ind>=len(train_data_indices)])/num_cand
    train_sample_size = len(all_ind_ind[all_ind_ind<len(train_data_indices)])
    train_ind_ind = all_ind_ind[all_ind_ind<len(train_data_indices)][0:int(train_sample_size//pool_sample_ratio)]
    pool_ind_ind = (all_ind_ind[all_ind_ind>=len(train_data_indices)] - len(train_data_indices))[0:num_cand]
    pool_ind_sample = np.array(pool_indices)[pool_ind_ind]
    train_ind_sample = np.array(train_data_indices)[train_ind_ind]
    X_sample = np.concatenate((np.expand_dims(X_train_All[train_ind_sample],axis=1), np.expand_dims(X_train_All[pool_ind_sample], axis=1)))
    conv_masks = 1/(1-dropout_prob)*np.random.choice(2, size=((num_masks, 1, 40,3,3)), p=[dropout_prob, 1-dropout_prob])
    dense_masks = 1/(1-dropout_prob)*np.random.choice(2, size=((num_masks, 1, 1, 128)), p=[dropout_prob, 1-dropout_prob])
    pool_forward_pass_results = fixed_mask_forward_pass(model_results[1], X_sample, num_masks, dropout_prob, conv_masks, dense_masks)  
    #EI batch-mode acquisition
    batch_results = ei_batch_acquisition_unweighted(pool_forward_pass_results, len(pool_ind_sample), len(train_ind_sample), tau_inverse, batch_size)
    batch_ind = pool_ind_sample[batch_results[1]]
    #Acquiring new points, retraining model, and saving results
    train_data_indices = list(train_data_indices) + list(batch_ind)
    pool_indices = list(set(pool_indices) - set(batch_ind))      
    model_results = run_model(train_data_indices)
    exp_acc.append(model_results[0])
    all_acc[-1] = exp_acc
    all_tr_ind[-1] = train_data_indices
    np.save(out_folder_path+acc_file, np.array(exp_acc))
    np.save(out_folder_path+ind_file, np.array(train_data_indices))
    print('Exp ' + str(e+1)+', Exp Acc: ' + str(exp_acc) + ', num training points: ' + str(len(train_data_indices)))
    #print('All Ind: ' + str(all_tr_ind))
  print('All Acc: ' + str(all_acc))



Running




trained model
Initial Acc: [[0.5465]]
avg variance: 0.03276441785917519
univ covariance shape: (20000, 20000)




Exp 1, Exp Acc: [0.5465, 0.6711], num training points: 130
avg variance: 0.019706579659053477
univ covariance shape: (20000, 20000)




Exp 1, Exp Acc: [0.5465, 0.6711, 0.852], num training points: 230
avg variance: 0.014720808856502796
univ covariance shape: (20040, 20040)




Exp 1, Exp Acc: [0.5465, 0.6711, 0.852, 0.8793], num training points: 330
avg variance: 0.013212270576527277
univ covariance shape: (20130, 20130)




Exp 1, Exp Acc: [0.5465, 0.6711, 0.852, 0.8793, 0.9191], num training points: 430
avg variance: 0.011004467864818507
univ covariance shape: (20150, 20150)




Exp 1, Exp Acc: [0.5465, 0.6711, 0.852, 0.8793, 0.9191, 0.9097], num training points: 530
avg variance: 0.011349830316401409
univ covariance shape: (20190, 20190)




Exp 1, Exp Acc: [0.5465, 0.6711, 0.852, 0.8793, 0.9191, 0.9097, 0.9347], num training points: 630
avg variance: 0.008498124516325025
univ covariance shape: (20260, 20260)




Exp 1, Exp Acc: [0.5465, 0.6711, 0.852, 0.8793, 0.9191, 0.9097, 0.9347, 0.9448], num training points: 730
avg variance: 0.009095591565141553
univ covariance shape: (20210, 20210)




Exp 1, Exp Acc: [0.5465, 0.6711, 0.852, 0.8793, 0.9191, 0.9097, 0.9347, 0.9448, 0.9507], num training points: 830
avg variance: 0.008186802442473745
univ covariance shape: (20330, 20330)




Exp 1, Exp Acc: [0.5465, 0.6711, 0.852, 0.8793, 0.9191, 0.9097, 0.9347, 0.9448, 0.9507, 0.9481], num training points: 930
avg variance: 0.008035829942750393
univ covariance shape: (20290, 20290)




Exp 1, Exp Acc: [0.5465, 0.6711, 0.852, 0.8793, 0.9191, 0.9097, 0.9347, 0.9448, 0.9507, 0.9481, 0.9525], num training points: 1030
All Acc: [[0.5465, 0.6711, 0.852, 0.8793, 0.9191, 0.9097, 0.9347, 0.9448, 0.9507, 0.9481, 0.9525]]




trained model
Initial Acc: [[0.5465, 0.6711, 0.852, 0.8793, 0.9191, 0.9097, 0.9347, 0.9448, 0.9507, 0.9481, 0.9525], [0.614]]
avg variance: 0.02993812783466699
univ covariance shape: (20000, 20000)




Exp 2, Exp Acc: [0.614, 0.5909], num training points: 130
avg variance: 0.02750636896233875
univ covariance shape: (20040, 20040)




Exp 2, Exp Acc: [0.614, 0.5909, 0.8434], num training points: 230
avg variance: 0.01831503749402304
univ covariance shape: (20050, 20050)




Exp 2, Exp Acc: [0.614, 0.5909, 0.8434, 0.8986], num training points: 330
avg variance: 0.013000892121270494
univ covariance shape: (20120, 20120)




Exp 2, Exp Acc: [0.614, 0.5909, 0.8434, 0.8986, 0.8979], num training points: 430
avg variance: 0.011884956047592298
univ covariance shape: (20110, 20110)




Exp 2, Exp Acc: [0.614, 0.5909, 0.8434, 0.8986, 0.8979, 0.9239], num training points: 530
avg variance: 0.011010424422673649
univ covariance shape: (20190, 20190)




Exp 2, Exp Acc: [0.614, 0.5909, 0.8434, 0.8986, 0.8979, 0.9239, 0.9391], num training points: 630
avg variance: 0.009863966500818253
univ covariance shape: (20210, 20210)




Exp 2, Exp Acc: [0.614, 0.5909, 0.8434, 0.8986, 0.8979, 0.9239, 0.9391, 0.9191], num training points: 730
avg variance: 0.011171212445273285
univ covariance shape: (20240, 20240)




Exp 2, Exp Acc: [0.614, 0.5909, 0.8434, 0.8986, 0.8979, 0.9239, 0.9391, 0.9191, 0.9416], num training points: 830
avg variance: 0.008887506716730786
univ covariance shape: (20330, 20330)




Exp 2, Exp Acc: [0.614, 0.5909, 0.8434, 0.8986, 0.8979, 0.9239, 0.9391, 0.9191, 0.9416, 0.9444], num training points: 930
avg variance: 0.008504692224848167
univ covariance shape: (20350, 20350)




Exp 2, Exp Acc: [0.614, 0.5909, 0.8434, 0.8986, 0.8979, 0.9239, 0.9391, 0.9191, 0.9416, 0.9444, 0.948], num training points: 1030
All Acc: [[0.5465, 0.6711, 0.852, 0.8793, 0.9191, 0.9097, 0.9347, 0.9448, 0.9507, 0.9481, 0.9525], [0.614, 0.5909, 0.8434, 0.8986, 0.8979, 0.9239, 0.9391, 0.9191, 0.9416, 0.9444, 0.948]]




trained model
Initial Acc: [[0.5465, 0.6711, 0.852, 0.8793, 0.9191, 0.9097, 0.9347, 0.9448, 0.9507, 0.9481, 0.9525], [0.614, 0.5909, 0.8434, 0.8986, 0.8979, 0.9239, 0.9391, 0.9191, 0.9416, 0.9444, 0.948], [0.5878]]
avg variance: 0.03217607389484551
univ covariance shape: (20000, 20000)




Exp 3, Exp Acc: [0.5878, 0.6973], num training points: 130
avg variance: 0.02259996522095504
univ covariance shape: (20050, 20050)




Exp 3, Exp Acc: [0.5878, 0.6973, 0.8759], num training points: 230
avg variance: 0.015183560987675905
univ covariance shape: (20080, 20080)




Exp 3, Exp Acc: [0.5878, 0.6973, 0.8759, 0.9229], num training points: 330
avg variance: 0.011437766481200356
univ covariance shape: (20100, 20100)




Exp 3, Exp Acc: [0.5878, 0.6973, 0.8759, 0.9229, 0.9012], num training points: 430
avg variance: 0.01237723466284134
univ covariance shape: (20160, 20160)




Exp 3, Exp Acc: [0.5878, 0.6973, 0.8759, 0.9229, 0.9012, 0.9169], num training points: 530
avg variance: 0.009604966772763547
univ covariance shape: (20170, 20170)




Exp 3, Exp Acc: [0.5878, 0.6973, 0.8759, 0.9229, 0.9012, 0.9169, 0.9381], num training points: 630
avg variance: 0.010269237592519706
univ covariance shape: (20160, 20160)




Exp 3, Exp Acc: [0.5878, 0.6973, 0.8759, 0.9229, 0.9012, 0.9169, 0.9381, 0.9359], num training points: 730
avg variance: 0.009566059040680792
univ covariance shape: (20270, 20270)




Exp 3, Exp Acc: [0.5878, 0.6973, 0.8759, 0.9229, 0.9012, 0.9169, 0.9381, 0.9359, 0.9523], num training points: 830
avg variance: 0.007180861921966668
univ covariance shape: (20280, 20280)




Exp 3, Exp Acc: [0.5878, 0.6973, 0.8759, 0.9229, 0.9012, 0.9169, 0.9381, 0.9359, 0.9523, 0.9497], num training points: 930
avg variance: 0.008586201198452787
univ covariance shape: (20370, 20370)




Exp 3, Exp Acc: [0.5878, 0.6973, 0.8759, 0.9229, 0.9012, 0.9169, 0.9381, 0.9359, 0.9523, 0.9497, 0.9414], num training points: 1030
All Acc: [[0.5465, 0.6711, 0.852, 0.8793, 0.9191, 0.9097, 0.9347, 0.9448, 0.9507, 0.9481, 0.9525], [0.614, 0.5909, 0.8434, 0.8986, 0.8979, 0.9239, 0.9391, 0.9191, 0.9416, 0.9444, 0.948], [0.5878, 0.6973, 0.8759, 0.9229, 0.9012, 0.9169, 0.9381, 0.9359, 0.9523, 0.9497, 0.9414]]
