In [0]:
!pip install -q tensorflow-gpu==1.14

In [0]:
import os
import shutil

import numpy as np
np.random.seed(seed=0)

import tensorflow as tf
import tensorflow.contrib.eager as tfe

tf.enable_eager_execution()
tf.set_random_seed(0)

import pandas as pd
import sklearn
import time

from IPython.display import clear_output

results_dir = './'
model_dir = './models/'

try:
  step_counter = tf.train.create_global_step()
except ValueError:
  step_counter.assign(0)

In [0]:
#@title CelebA dataset download
#https://gist.github.com/charlesreid1/4f3d676b33b95fce83af08e4ec261822
import requests

def download_file_from_google_drive(id, destination):
  def get_confirm_token(response):
    for key, value in response.cookies.items():
      if key.startswith('download_warning'):
        return value

    return None

  def save_response_content(response, destination):
    CHUNK_SIZE = 32768

    with open(destination, "wb") as f:
      for chunk in response.iter_content(CHUNK_SIZE):
        if chunk: # filter out keep-alive new chunks
          f.write(chunk)

  URL = "https://docs.google.com/uc?export=download"

  session = requests.Session()

  response = session.get(URL, params={'id': id }, stream=True)
  token = get_confirm_token(response)

  if token:
    params = {'id': id, 'confirm': token}
    response = session.get(URL, params = params, stream = True)

  save_response_content(response, destination)

files = {'readme.txt': '0B7EVK8r0v71pOXBhSUdJWU1MYUk',
         'celeba.zip': '0B7EVK8r0v71pZjFTYXZWM3FlRnM',
         'list_landmarks_align_celeba.txt': '0B7EVK8r0v71pd0FJY3Blby1HUTQ',
         'list_attr_celeba.txt': '0B7EVK8r0v71pblRyaVFSWGxPY0U',
         'list_eval_partition.txt': '0B7EVK8r0v71pY0NSMzRuSXJEVkk'}

for filename in files:
  download_file_from_google_drive(files[filename], filename)

commands = ['unzip -oq celeba.zip > /dev/null',
            "sed -i '1d' list_landmarks_align_celeba.txt",
            "sed -i 's/  */ /g' list_landmarks_align_celeba.txt",
            "sed -i '1d' list_attr_celeba.txt",
            "sed -i 's/  */ /g' list_attr_celeba.txt"]

for command in commands:
  get_ipython().system_raw(command)

The CelebA dataset is made available in a Google Drive folder which is subject to daily quotas. It might be temporarily unavailable until the quotas are replenished.

# Data preprocessing

In [0]:
attribute = 'Attractive'
iterations = 10

landmarks_df = pd.read_csv('list_landmarks_align_celeba.txt', sep=' ')
landmarks_df.iloc[:, 0::2] = landmarks_df.iloc[:, 0::2]/178
landmarks_df.iloc[:, 1::2] = landmarks_df.iloc[:, 1::2]/218

attributes_df = pd.read_csv('list_attr_celeba.txt', sep=' ')
attributes_df.columns = ['name'] + list(attributes_df.columns)[:-1]
attributes_df.set_index('name', inplace=True)
attributes_df = attributes_df[[attribute]]
attributes_df.replace(to_replace=-1, value=0, inplace=True)

eval_df = pd.read_csv('list_eval_partition.txt', sep=' ', names=['name', 'set'])
eval_df.set_index('name', inplace=True)

landmarks_train = landmarks_df[eval_df['set'] == 0].sample(n=10000,
                                                           random_state=0)
attributes_train = attributes_df.loc[landmarks_train.index]

landmarks_val = landmarks_df[eval_df['set'] == 1].sample(n=10000,
                                                         random_state=0)
attributes_val = attributes_df.loc[landmarks_val.index]

landmarks_test = landmarks_df[eval_df['set'] == 2].sample(n=10000,
                                                          random_state=0)
attributes_test = attributes_df.loc[landmarks_test.index]

def images(filenames):
  for filename in filenames:
    image = tf.read_file('img_align_celeba/' + filename)
    image = tf.image.decode_jpeg(image)
    image = tf.image.resize_bilinear(tf.expand_dims(image, 0), [40, 40])
    if image.shape[3].value == 3:
      image = tf.image.rgb_to_grayscale(image)
    try:
      result = tf.concat([result, image], axis=0)
    except NameError:
      result = image
  return tf.constant(result/255, dtype=tf.float32)

x_train = images(landmarks_train.index)
x_val = images(landmarks_val.index)
x_test = images(landmarks_test.index)

y_train = tf.constant(np.array(attributes_train), dtype=tf.float32)
y_val = tf.constant(np.array(attributes_val), dtype=tf.float32)
y_test = tf.constant(np.array(attributes_test), dtype=tf.float32)

z_train = tf.constant(np.array(landmarks_train), dtype=tf.float32)

ds_train = tf.data.Dataset.from_tensor_slices((x_train, y_train, z_train))
ds_train = ds_train.shuffle(1000).batch(100)

In [0]:
#@title Visualization of examples

import matplotlib
import matplotlib.cm as cm
import matplotlib.pyplot as plt
matplotlib.rcParams['figure.figsize'] = 12, 4

def show(element, title=True):  
  image = element[0].numpy()
  if len(element) > 1:
    lf = element[1]
    c = element[2]
    plt.plot(lf[0::2]*40,
             lf[1::2]*40,
             marker='o',
             markerfacecolor='g',
             markeredgecolor='g',
             markersize=5,
             linestyle = 'None')
    if title:
      plt.title('%s: %s' % (attribute.replace('_', ' '),
                            {1: 'Yes', 0: 'No'}[c[0]]),
                fontdict={'weight': 'bold'})
  if len(element) > 3:
    pred = element[3]
    plt.plot(pred[0::2]*40,
         pred[1::2]*40,
         marker='o',
         markerfacecolor='r',
         linestyle = 'None')
  plt.imshow(image.reshape([40, 40]), cmap=cm.gray)


  plt.axis('off')
  plt.grid(False)

gs = matplotlib.gridspec.GridSpec(1, 4, width_ratios=[178, 218, 178, 218]) 
i = 20
ax0 = plt.subplot(gs[0])
filename = landmarks_train.index[i]
image = tf.read_file('img_align_celeba/' + filename)
image = tf.image.decode_jpeg(image)
ax0.imshow(image)
ax0.axis('off')
ax0.grid(False)

ax1 = plt.subplot(gs[1])
image = x_train[i, :, : , :]
lf = np.array(landmarks_train)[i, :]
c = np.array(attributes_train)[i, :]
show((image, lf, c), title=False)

i = 105
ax0 = plt.subplot(gs[2])
filename = landmarks_train.index[i]
image = tf.read_file('img_align_celeba/' + filename)
image = tf.image.decode_jpeg(image)
ax0.imshow(image)
ax0.axis('off')
ax0.grid(False)

ax1 = plt.subplot(gs[3])
image = x_train[i, :, : , :]
lf = np.array(landmarks_train)[i, :]
c = np.array(attributes_train)[i, :]
show((image, lf, c), title=False)

plt.tight_layout()
plt.savefig('faces.png', bbox_inches='tight')

# Model architecture

In [0]:
class FaceModel(tf.keras.Model):
  def __init__(self):
    super(FaceModel, self).__init__()
    kwargs = {'activation': 'relu'}
    self.conv1 = tf.keras.layers.Conv2D(16, 5, **kwargs)
    self.conv2 = tf.keras.layers.Conv2D(48, 3, **kwargs)
    self.conv3 = tf.keras.layers.Conv2D(64, 3, **kwargs)
    self.conv4 = tf.keras.layers.Conv2D(64, 2, **kwargs)
    self.max = tf.keras.layers.MaxPooling2D()
    self.flatten = tf.keras.layers.Flatten()
    self.dense1 = tf.keras.layers.Dense(100, **kwargs)
    self.dense2_primary = tf.keras.layers.Dense(1, activation='sigmoid')
    self.dense2_auxiliary = tf.keras.layers.Dense(len(landmarks_df.columns),
                                                  activation='sigmoid')
  
  def call(self, img):
    x = self.max(self.conv1(img))
    x = self.max(self.conv2(x))
    x = self.max(self.conv3(x))
    x = self.conv4(x)
    x = self.flatten(x)
    x = self.dense1(x)
    return self.dense2_primary(x), self.dense2_auxiliary(x)

def cross_entropy(y, y_hat):
  eps = 1e-6
  return -tf.math.reduce_mean(y*tf.math.log((1 - eps)*y_hat + eps)
                              + (1-y)*tf.math.log((1 - eps)*(1 - y_hat) + eps))

In [0]:
import os
if not os.path.exists(model_dir):
  os.mkdir(model_dir)
  for i in range(iterations):
    model = FaceModel()
    _ = model(x_val[:1, :, : ,:])
    model.save_weights(model_dir + 'model_%i.h5' % i)
  del model

# Experiments with *Projection*, *Unweighted cosine*, *Weighted cosine* and *Orthogonal*

In [0]:
def censored_vector(u, v, mode):
  """Adjusts the auxiliary loss gradient
  
  Adjusts the auxiliary loss gradient before adding it to the primary loss
  gradient and using a gradient descent-based method
  
  Args:
    u: A tensorflow variable representing the auxiliary loss gradient
    v: A tensorflow variable representing the primary loss gradient
    mode: The method used for the adjustment:
      - Single task: the auxiliary loss gradient is ignored
      - Multitask: the auxiliary loss gradient is kept as it is
      - Unweighted cosine: cf. https://arxiv.org/abs/1812.02224
      - Weighted cosine: cf. https://arxiv.org/abs/1812.02224
      - Orthogonal: https://arxiv.org/abs/1801.07593
      - Projection: cf. ICML submission
    
  Returns:
    A tensorflow variable representing the adjusted auxiliary loss gradient
  """
  if mode == 'Single task' or u is None:
    return 0  
  if mode == 'Multitask' or v is None:
    return u
  if len(u.shape.as_list()) == 1:
    u_dot_v, l_u, l_v = tf.reduce_sum(u*v), tf.norm(u), tf.norm(v)
  else:
    a, b = tf.reshape(u, [-1]), tf.reshape(v, [-1])
    u_dot_v, l_u, l_v = tf.reduce_sum(a*b), tf.norm(a), tf.norm(b)
  if l_u.numpy() == 0 or l_v.numpy() == 0:
    return u
  if mode == 'Unweighted cosine':
    return u if u_dot_v > 0 else tf.zeros_like(u)
  if mode == 'Weighted cosine':
    return tf.maximum(u_dot_v, 0)*u/l_u/l_v
  if mode == 'Projection':
    return u - tf.minimum(u_dot_v, 0)*v/l_v/l_v
  if mode == 'Orthogonal':
    return u - u_dot_v*v/l_v/l_v

def combined_grads(primary_grad,
                   average_primary_grad,
                   auxiliary_grad,
                   mode,
                   overall=False,
                   lam=1):
  """Combines auxiliary loss gradients and primary loss gradients
  
  Combines a sequence of auxiliary loss gradients and a sequence of primary
  loss gradients before performing a gradient descent step
  
  Args:
    primary_grad: A list of tensorflow variables corresponding to the primary
    loss gradient for the network's Keras variables
    average_primary_grad: A list of tensorflow variables corresponding to
    exponential moving averages of the elements above
    auxiliary_grad: A list of tensorflow variables corresponding to the
    auxiliary loss gradient for the network's Keras variables
    mode: The method used for the adjustment:
      - Single task: the auxiliary loss gradient is ignored
      - Multitask: the auxiliary loss gradient is kept as it is
      - Unweighted cosine: cf. https://arxiv.org/abs/1812.02224
      - Weighted cosine: cf. https://arxiv.org/abs/1812.02224
      - Orthogonal: https://arxiv.org/abs/1801.07593
      - Projection: cf. ICML submission
    overall: True if the transformation takes place at the level of the whole
    parameter vector, i.e. the concatenation of all the Keras variables of the
    network
    lambda: Float balancing the primary loss and the auxiliary loss
    
  Returns:
    A list of tensorflow variables combining the primary loss gradients and the
    auxiliary loss gradients and that can directly be used for the next gradient
    descent step
  """
  result = [0]*len(primary_grad)
  a = tf.constant([], dtype=tf.float32)
  aa = tf.constant([], dtype=tf.float32)
  b = tf.constant([], dtype=tf.float32)
  shapes = []
  for i in range(len(primary_grad)):
    if auxiliary_grad[i] is None or mode == 'Single task':
      result[i] = primary_grad[i]
    elif primary_grad[i] is None:
      result[i] = lam*auxiliary_grad[i]
    elif mode == 'Multitask':
      result[i] = primary_grad[i] + lam*auxiliary_grad[i]
    elif not overall:
      if average_primary_grad is None:
        result[i] = (primary_grad[i]
                     + lam*censored_vector(auxiliary_grad[i],
                                           primary_grad[i],
                                           mode))
      else:
        result[i] = (primary_grad[i]
                     + lam*censored_vector(auxiliary_grad[i],
                                           average_primary_grad[i],
                                           mode))
    else:
      a = tf.concat([a, tf.reshape(primary_grad[i], [-1])], axis=0)
      if average_primary_grad is not None:
        aa = tf.concat([aa, tf.reshape(average_primary_grad[i], [-1])], axis=0)
      b = tf.concat([b, tf.reshape(auxiliary_grad[i], [-1])], axis=0)
      shapes.append((primary_grad[i].shape,
                     np.product(primary_grad[i].shape.as_list()),
                     i))

  if len(shapes) > 0:
    if average_primary_grad is None:
      c = a + lam*censored_vector(b, a, mode)
    else:
      c = a + lam*censored_vector(b, aa, mode)
    start = 0
    for i in range(len(shapes)):
      shape, length, index = shapes[i]
      result[index] = tf.reshape(c[start:start+length], shape)
      start += length
  return result

def train_iteration(model,
                    average_primary_grad,
                    alpha,
                    optimizer,
                    writer,
                    step_counter,
                    mode,
                    overall=False,
                    lam=1):
  """Trains the model for one epoch
   
  Args:
    model: The Keras model being trained
    average_primary_grad: An exponential moving average of the main loss gradient for each variable
    alpha: The factor for the exponential moving average
    optimizer: The optimizer being used
    writer: The writer collecting summaries
    step_counter: The global counter used by the optimizer
    mode: The method used for adjusting the auxiliary loss gradient:
      - Single task: the auxiliary loss gradient is ignored
      - Multitask: the auxiliary loss gradient is kept as it is
      - Unweighted cosine: cf. https://arxiv.org/abs/1812.02224
      - Weighted cosine: cf. https://arxiv.org/abs/1812.02224
      - Orthogonal: https://arxiv.org/abs/1801.07593
      - Projection: cf. ICML submission
    overall: A boolean indicating whether the previous method should be applied to the whole parameter vector
    lam: The weight of the auxiliary task  
    
  Returns:
    The updated value of the exponential moving average of the main loss gradient for each variable
  """
  with writer.as_default(), tf.contrib.summary.always_record_summaries():
    for x, y, z in ds_train.make_one_shot_iterator():
      with tf.GradientTape(persistent=True) as tape:
        y_hat, z_hat = model(x)
        primary_loss = cross_entropy(y, y_hat)
        if mode != 'Single task':
          auxiliary_loss = tf.reduce_mean((z_hat-z)**2)
      tf.contrib.summary.scalar('primary_loss', primary_loss)
     
      primary_grad = tape.gradient(primary_loss, model.variables)
      if mode == 'Single task':
        optimizer.apply_gradients(zip(primary_grad, model.variables),
                                  global_step=step_counter)
      else:
        tf.contrib.summary.scalar('auxiliary_loss', auxiliary_loss)
        auxiliary_grad = tape.gradient(auxiliary_loss, model.variables)
        
        if alpha != 1:
          if average_primary_grad is None:
            average_primary_grad = primary_grad
          else:
            for i in range(len(average_primary_grad)):
              if primary_grad[i] is not None:
                average_primary_grad[i] = ((1 - alpha)*average_primary_grad[i]
                                           + alpha*primary_grad[i])        
        
        grad = combined_grads(primary_grad,
                              average_primary_grad,
                              auxiliary_grad,
                              mode,
                              overall=overall,
                              lam=lam)
        optimizer.apply_gradients(zip(grad, model.variables),
                                  global_step=step_counter)
  return average_primary_grad

def get_metrics(dataset,
                model,
                writer,
                step_counter):
  x, y = (x_val, y_val) if dataset == 'val' else (x_test, y_test)
  with writer.as_default(), tf.contrib.summary.always_record_summaries():
    y_hat = model(x)[0]
    primary_loss = cross_entropy(y, y_hat)
    tf.contrib.summary.scalar('primary_loss', primary_loss)
    acc = sklearn.metrics.accuracy_score(y.numpy(), y_hat.numpy()>0.5)
    tf.contrib.summary.scalar('accuracy', acc)
    fpr, tpr, _ = sklearn.metrics.roc_curve(y.numpy(), y_hat.numpy())
    auc = sklearn.metrics.auc(fpr, tpr)
    tf.contrib.summary.scalar('AUC', auc)
  return primary_loss.numpy(), acc, auc

In [0]:
def run_experiment(name, model, alpha, mode, overall, lam, output, lr):
  """Trains the model until early stopping
   
  Args:
    name: The name to be used for the Tensorboard log files
    model: The Keras model being trained
    alpha: The factor for the exponential moving average
    mode: The method used for adjusting the auxiliary loss gradient:
      - Single task: the auxiliary loss gradient is ignored
      - Multitask: the auxiliary loss gradient is kept as it is
      - Unweighted cosine: cf. https://arxiv.org/abs/1812.02224
      - Weighted cosine: cf. https://arxiv.org/abs/1812.02224
      - Orthogonal: https://arxiv.org/abs/1801.07593
      - Projection: cf. ICML submission
    overall: A boolean indicating whether the previous method should be applied to the whole parameter vector
    lam: The weight of the auxiliary task
    output: The current output printed for the user during training
    lr: The learning rate for the optimizer
    
  Returns:
    The performance metrics on the test set for the best model on the validation set
  """
  train_writer = tf.contrib.summary.create_file_writer('./log/train/' + name,
                                                       flush_millis=10000)
  val_writer = tf.contrib.summary.create_file_writer('./log/val/' + name,
                                                       flush_millis=10000)
  test_writer = tf.contrib.summary.create_file_writer('./log/test/' + name,
                                                      flush_millis=10000)
  step_counter.assign(0)
  optimizer = tf.train.AdamOptimizer(learning_rate=lr)
  checkpoint_dir = 'model_celeba'
  shutil.rmtree(checkpoint_dir, ignore_errors=True)
  checkpoint_prefix = os.path.join(checkpoint_dir, 'model.ckpt')
  root = tf.contrib.eager.Checkpoint(optimizer=optimizer,
                                     model=model,
                                     optimizer_step=step_counter)
  
  average_primary_grad = None
  iteration, not_better, best_auc = 1, 0, 0
  while not_better < 10:
    average_primary_grad = train_iteration(model,
                                           average_primary_grad,
                                           alpha,
                                           optimizer,
                                           train_writer,
                                           step_counter,
                                           mode,
                                           overall=overall,
                                           lam=lam)
    val_loss, val_acc, val_auc = get_metrics('val',
                                             model,
                                             val_writer,
                                             step_counter)
    clear_output()
    print(output)
    print(iteration, val_loss, val_acc, val_auc)
    if val_auc > best_auc:
      not_better, best_auc = 0, val_auc
      root.save(file_prefix=checkpoint_prefix)
    else:
      not_better += 1
    iteration += 1

  root.restore(tf.train.latest_checkpoint(checkpoint_dir))
  metrics = get_metrics('test', model, test_writer, step_counter)
  return (best_auc, *metrics)

In [0]:
def run_experiments(configs, filename):
  """Trains models for a series of configurations
   
  Args:
    configs: The combinations of hyper-parameters to use in the experiments
    filename: The name of the file used for recording the experiments' results
    
  Returns:
    Nothing
  """
  already_done = !cat {filename.replace(' ', '\ ')} | wc -l
  already_done = int(already_done[0])

  output = ''

  for iteration, mode, overall, lam, alpha, lr in configs[already_done:]:

    start = time.time()

    model = FaceModel()
    _ = model(x_val[:1, :, : ,:])
    model.load_weights(model_dir + 'model_%i.h5' % iteration)

    output += 'Iteration #%d: %s (overall: %s, %d)\n' % (iteration,
                                                          mode,
                                                          overall,
                                                          lam)
    name = '%s-%s-%s-%d-%f' % (iteration, mode, overall, lam, alpha)
    best_auc, test_loss, test_acc, test_auc = run_experiment(name,
                                                   model,
                                                   alpha,
                                                   mode,
                                                   overall,
                                                   lam,
                                                   output,
                                                   lr)
    template = 'Loss: %.3f, accuracy: %.1f%%, AUC: %.3f (%d seconds)\n\n'
    output += template % (test_loss, test_acc*100, test_auc, time.time()-start)
    line = "%s,%s,%f,%d,%f,%f,%f,%f,%f" % (mode,
                                           overall,
                                           alpha,
                                           lam,
                                           lr,
                                           best_auc,
                                           test_loss,
                                           test_acc,
                                           test_auc)
    with open(filename, 'a') as file:
      file.write(f'\n{line}')

In [0]:
filename = results_dir + 'real_dataset_experiments.csv'
header = 'mode,overall,alpha,lam,lr,best_auc,test_loss,test_acc,test_auc'

if not os.path.isfile(filename):
    with open(filename, 'w') as file:
        file.write(header)

configs = [('Single task', True, 0, 1)]

configs += [('Multitask', True, lam, 1)
           for lam in [300, 1000, 3000, 10000, 30000]]

configs += [('Projection', overall, lam, 1)
           for overall in [True, False]
           for lam in [300, 1000, 3000, 10000, 30000]]

configs += [('Projection', True, lam, 0.01)
           for lam in [300, 1000, 3000, 10000, 30000]]

configs = [(iteration, mode, overall, lam, alpha, 1e-3)
           for iteration in range(iterations)
           for mode, overall, lam, alpha in configs]

configs2 = [(mode, overall, lam, 1)
           for overall in [True, False]
           for lam in [300, 1000, 3000, 10000, 30000]
           for mode in ['Unweighted cosine', 'Weighted cosine']]

configs2 = [(iteration, mode, overall, lam, alpha, 1e-3)
           for iteration in range(iterations)
           for mode, overall, lam, alpha in configs2]

configs3 = [(mode, overall, lam, alpha)
           for overall in [True, False]
           for lam in [300, 1000, 3000, 10000, 30000]
           for mode in ['Orthogonal']
           for alpha in [0.01, 1]]

configs3 = [(iteration, mode, overall, lam, alpha, 1e-3)
           for iteration in range(iterations)
           for mode, overall, lam, alpha in configs3]

configs = configs + configs2 + configs3
_ = run_experiments(configs, filename)

# Experiments with *Adaptive auxiliary task weighting*

In [0]:
def train_iteration_OL_AUX(model,
                           accumulator,
                           horizon,
                           lr,
                           beta,
                           optimizer,
                           writer,
                           step_counter,
                           log=False,
                           lam=1):
  """Trains the model for one epoch with Adaptive auxiliary task weighting
  (https://papers.nips.cc/paper/8724-adaptive-auxiliary-task-weighting-for-reinforcement-learning)
   
  Args:
    model: The Keras model being trained
    accumulator: The accumulated values of the dot products of the main loss gradients and the auxiliary loss gradients
    horizon: The horizon defined in the Adaptive auxiliary task weighting method
    lr: The learning rate of the optimizer
    beta: The learning rate for adjusting the weight of the auxiliary task
    optimizer: The optimizer being used
    writer: The writer collecting summaries
    step_counter: The global counter used by the optimizer
    log: A boolean indicating whether to apply a logarithm function on the losses
    lam: The weight of the auxiliary task  
    
  Returns:
    The updated value of the weight of the auxiliary task and the accumulator
  """
  with writer.as_default(), tf.contrib.summary.always_record_summaries():
    for x, y, z in ds_train.make_one_shot_iterator():
      with tf.GradientTape(persistent=True) as tape:
        y_hat, z_hat = model(x)
        if not log:
          primary_loss = cross_entropy(y, y_hat)
          auxiliary_loss = tf.reduce_mean((z_hat-z)**2)
        else:
          primary_loss = tf.log(cross_entropy(y, y_hat))
          auxiliary_loss = tf.log(tf.reduce_mean((z_hat-z)**2))
      tf.contrib.summary.scalar('primary_loss', primary_loss)
      tf.contrib.summary.scalar('auxiliary_loss', auxiliary_loss)

      primary_grad = tape.gradient(primary_loss, model.variables)
      auxiliary_grad = tape.gradient(auxiliary_loss, model.variables)
      
      primary_grad = [u if u is not None else 0 for u in primary_grad]
      auxiliary_grad = [u if u is not None else 0 for u in auxiliary_grad]
      
      total_grad = [primary_grad[i] + lam*auxiliary_grad[i] for i in range(len(primary_grad))]
      optimizer.apply_gradients(zip(total_grad, model.variables),
                                global_step=step_counter)
      
      for i in range(len(primary_grad)):
        if primary_grad[i] != 0 and auxiliary_grad[i] != 0:
          accumulator += tf.tensordot(primary_grad[i], auxiliary_grad[i], len(tf.shape(primary_grad[i]).numpy()))

      if (step_counter.value().numpy() + 1) % horizon == 0:
        lam -= lr*beta*accumulator
        accumulator = 0

  return accumulator, lam

def run_experiment_OL_AUX(name, model, horizon, beta, lam, output, lr, log=False):
  """Trains the model until early stopping with Adaptive auxiliary task weighting
  (https://papers.nips.cc/paper/8724-adaptive-auxiliary-task-weighting-for-reinforcement-learning)
   
  Args:
    name: The name to be used for the Tensorboard log files
    model: The Keras model being trained
    horizon: The horizon defined in the Adaptive auxiliary task weighting method
    beta: The learning rate for adjusting the weight of the auxiliary task
    lam: The initial weight of the auxiliary task
    output: The current output printed for the user during training
    lr: The learning rate for the optimizer
    log: A boolean indicating whether to apply a logarithm function on the losses
    
  Returns:
    The performance metrics on the test set for the best model on the validation set and the final lam
  """
  train_writer = tf.contrib.summary.create_file_writer('./log/train/' + name,
                                                       flush_millis=10000)
  val_writer = tf.contrib.summary.create_file_writer('./log/val/' + name,
                                                       flush_millis=10000)
  test_writer = tf.contrib.summary.create_file_writer('./log/test/' + name,
                                                      flush_millis=10000)
  step_counter.assign(0)
  optimizer = tf.train.AdamOptimizer(learning_rate=lr)
  checkpoint_dir = 'model_celeba'
  shutil.rmtree(checkpoint_dir, ignore_errors=True)
  checkpoint_prefix = os.path.join(checkpoint_dir, 'model.ckpt')
  root = tf.contrib.eager.Checkpoint(optimizer=optimizer,
                                     model=model,
                                     optimizer_step=step_counter)
  
  accumulator = 0
  iteration, not_better, best_auc = 1, 0, 0
  while not_better < 10:
    accumulator, lam = train_iteration_OL_AUX(model,
                                              accumulator,
                                              horizon,
                                              lr,
                                              beta,
                                              optimizer,
                                              train_writer,
                                              step_counter,
                                              log=False,
                                              lam=lam)
    val_loss, val_acc, val_auc = get_metrics('val',
                                             model,
                                             val_writer,
                                             step_counter)
    clear_output()
    print(output)
    print(iteration, val_loss, val_acc, val_auc)
    if val_auc > best_auc:
      not_better, best_auc = 0, val_auc
      root.save(file_prefix=checkpoint_prefix)
    else:
      not_better += 1
    iteration += 1

  root.restore(tf.train.latest_checkpoint(checkpoint_dir))
  metrics = get_metrics('test', model, test_writer, step_counter)
  return (best_auc, *metrics, lam)

def run_experiments_OL_AUX(configs, filename):
  """Trains models for a series of configurations
   
  Args:
    configs: The combinations of hyper-parameters to use in the experiments
    filename: The name of the file used for recording the experiments' results
    
  Returns:
    Nothing
  """
  already_done = !cat {filename.replace(' ', '\ ')} | wc -l
  already_done = int(already_done[0])

  output = ''

  for iteration, horizon, beta, lam, lr, log in configs[already_done:]:

    start = time.time()

    model = FaceModel()
    _ = model(x_val[:1, :, : ,:])
    model.load_weights(model_dir + 'model_%i.h5' % iteration)

    output += 'Iteration #%d, beta: %f, lam: %d\n' % (iteration,
                                                         beta,
                                                         lam)
    name = '%s-%d-%d-%d-%f' % (iteration, horizon, beta, lam, lr)
    best_auc, test_loss, test_acc, test_auc, final_lam = run_experiment_OL_AUX(name,
                                                                               model,
                                                                               horizon,
                                                                               beta,
                                                                               lam,
                                                                               output,
                                                                               lr,
                                                                               log)
    template = 'Loss: %.3f, accuracy: %.1f%%, AUC: %.3f, final Lam: %.3f (%d seconds)\n\n'
    output += template % (test_loss, test_acc*100, test_auc, final_lam, time.time()-start)
    line = "%f,%f,%f,%f,%s,%f,%f,%f,%f" % (lam,
                                        final_lam,
                                        beta,
                                        lr,
                                        log,
                                        best_auc,
                                        test_loss,
                                        test_acc,
                                        test_auc)
    with open(filename, 'a') as file:
      file.write(f'\n{line}')

filename = results_dir + 'real_dataset_experiments2.csv'
header = 'lam,final_lam,beta,lr,log,best_auc,test_loss,test_acc,test_auc'

if not os.path.isfile(filename):
    with open(filename, 'w') as file:
        file.write(header)

lams = [300, 1000, 3000, 10000, 30000]

configs = [(iteration, 5, beta, lam, 1e-3, False)
           for iteration in range(10)
           for beta in [100, 1000, 10000, 100000]
           for lam in lams]

_ = run_experiments_OL_AUX(configs, filename)

In [0]:
filename = results_dir + 'experiment-1.csv'
header = 'lam,final_lam,beta,lr,log,best_auc,test_loss,test_acc,test_auc'

if not os.path.isfile(filename):
    with open(filename, 'w') as file:
        file.write(header)

lams = [300, 1000, 3000, 10000, 30000]

configs = [(iteration, 5, beta, lam, 1e-3, True)
           for iteration in range(10)
           for beta in [100, 1000, 10000, 100000]
           for lam in lams]

_ = run_experiments_OL_AUX(configs, filename)