## Gradient Bidding.

In [1]:
import tensorflow as tf
import types
from utils import noisy_top_k_gating
from utils import SparseDispatcher
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("../MNIST_data/", one_hot=True)

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ../MNIST_data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting ../MNIST_data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting ../MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ../MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [2]:

# FFNN with biases.
def expert(i, x, hparams):
    with tf.compat.v1.variable_scope("expert"):
        sizes = [hparams.n_inputs] + [hparams.e_hidden for _ in range(hparams.e_layers)] + [hparams.n_embedding]
        for i in range(len(sizes) - 1):
            w = tf.Variable(tf.truncated_normal([sizes[i], sizes[i+1]], stddev=0.1))
            b = tf.Variable(tf.constant(0.1, shape=[sizes[i+1]]))
            x = tf.matmul(x, w) + b
    return x

# Cross entropy loss + accuracy.
def target_loss(embedding, targets, hparams):
    with tf.compat.v1.variable_scope("target_loss"):
        w = tf.Variable(tf.truncated_normal([hparams.n_embedding, hparams.n_targets], stddev=0.1))
        b = tf.Variable(tf.constant(0.1, shape=[hparams.n_targets])),
        logits = tf.add(tf.matmul(embedding, w), b)
        target_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=targets, logits=logits))
        correct = tf.equal(tf.argmax(logits, 1), tf.argmax(targets, 1))
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
        return target_loss, accuracy

In [3]:
# Incentive function inputs weights, outputs revenue.
# This the most basic, just takes the inloop weight as your score
def incentive_fn(weights, hparams):
    weights = tf.linalg.normalize(weights)
    return tf.slice(weights, [0], [1])

In [5]:
def model_fn(hparams):    
    x_inputs = tf.placeholder("float", [None, hparams.n_inputs], 'inputs')
    y_targets = tf.placeholder("float", [None, hparams.n_targets], 'targets')    
    
    # Sparsely gated mixture of experts with choice k. Produces an importance score 
    # for each x_input then chooses the topk. These children recieve the outgoing query.
    # expert_inputs is a list of tensors, inputs for each expert.
    gates, load = noisy_top_k_gating(x_inputs, hparams.n_experts, train = True, k = hparams.k)
    dispatcher = SparseDispatcher(hparams.n_experts, gates)
    expert_inputs = dispatcher.dispatch(x_inputs)

    # Basic importance scores can attained from the gating network by summing over the importance 
    # of each example. We choose a 'self-importance' score here which counts as the in loop in our
    # incentive function. The network should try to maximize this value.
    importance = tf.linalg.normalize(tf.reduce_sum(gates, 0))[0]
    self_weight = tf.Variable(tf.constant([1.0]))
    weights = tf.linalg.normalize(tf.concat([self_weight, importance], axis=0))[0]
    revenue = tf.slice(weights, [0], [1])
    
    # Dispatch the inputs to the experts. We mask the responses with a faux-bidding system,
    # here, we set a mask w.r.t the bids with a hparams.market_shift shifted relu. Bids that
    # drop bellow the market shift should zero out.
    expert_outputs = []
    expert_masks = []
    for i in range(hparams.n_experts):
        expert_output = expert(i, expert_inputs[i], hparams)
        
        # Apply mask to the output.
        expert_mask = tf.nn.relu(tf.slice(weights, [i], [1]) - hparams.market_shift)
        masked_output = expert_mask * expert_output
        
        expert_masks.append(expert_mask)
        expert_outputs.append(masked_output)
    expert_masks = tf.concat(expert_masks, axis=0)

    
    # Combine the expert_inputs.
    embedding = dispatcher.combine(expert_outputs)
        
    # Loss and accuracy stuff.
    loss, accuracy = target_loss(embedding, y_targets, hparams)
    
    # Run the step: optimize for loss and revenue. 
    train_step = tf.train.AdamOptimizer(hparams.learning_rate).minimize(loss - revenue)
    
    metrics = {
        'loss': loss,
        'revenue': revenue,
        'accuracy': accuracy,
        'importance': importance,
        'weights': weights,
        'masks': expert_masks,
    }
    return train_step, metrics

In [6]:
hparams = types.SimpleNamespace( 
    n_inputs = 784,
    n_targets = 10,
    k = 3,
    n_experts = 3,
    e_layers = 2,
    e_hidden = 256,
    n_embedding = 256,
    batch_size=256,
    learning_rate=1e-3,
    n_iterations = 10000,
    n_print = 100,
    market_shift = 0.2,
)

graph = tf.Graph()
session = tf.Session(graph=graph)
with graph.as_default():
    train_step, metrics = model_fn(hparams)
    session.run(tf.global_variables_initializer())

for i in range(hparams.n_iterations):
    batch_x, batch_y = mnist.train.next_batch(hparams.batch_size)
    feeds = {'inputs:0': batch_x, 'targets:0': batch_y}
    session.run(train_step, feeds)

    if i % hparams.n_print == 0:
        feeds = {'inputs:0': batch_x, 'targets:0': batch_y}
        train_metrics = session.run(metrics, feeds)
        for key in train_metrics:
            print (str(key) + ":  " + str(train_metrics[key]))
        print ('-')




Instructions for updating:
Use `tf.cast` instead.

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.


Instructions for updating:
Use `tf.cast` instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
loss:  1.8384356
revenue:  [0.70746005]
accuracy:  0.4609375
importance:  [0.48973477 0.6229136  0.6100317 ]
weights:  [0.70746005 0.34612164 0.44024622 0.4311419 ]
masks:  [0.50746006 0.14612164 0.24024622]
-
loss:  0.34438166
revenue:  [0.74051046]
accuracy:  0.91015625
importance:  [0.531048   0.82449776 0.19542626]
weights:  [0.74051046 0.3568881  0.554

loss:  0.09206778
revenue:  [0.9271562]
accuracy:  0.96875
importance:  [0.66779685 0.674556   0.31467697]
weights:  [0.9271562  0.2502068  0.25273928 0.11790159]
masks:  [0.7271562  0.0502068  0.05273928]
-
loss:  0.11937548
revenue:  [0.9283053]
accuracy:  0.9609375
importance:  [0.65988934 0.68136275 0.31668735]
weights:  [0.9283053  0.24535926 0.25334346 0.1177503 ]
masks:  [0.72830534 0.04535925 0.05334346]
-
loss:  0.055246532
revenue:  [0.92831767]
accuracy:  0.98828125
importance:  [0.6555793  0.69668967 0.29127207]
weights:  [0.92831767 0.24373652 0.25902086 0.10829146]
masks:  [0.7283177  0.04373652 0.05902086]
-
loss:  0.06056623
revenue:  [0.9292525]
accuracy:  0.984375
importance:  [0.6328451  0.72911555 0.2605716 ]
weights:  [0.9292525  0.23380159 0.26936823 0.09626693]
masks:  [0.7292525  0.03380159 0.06936823]
-
loss:  0.10721418
revenue:  [0.9298683]
accuracy:  0.96875
importance:  [0.72065014 0.63846797 0.27022588]
weights:  [0.9298683  0.26512188 0.23488766 0.0994141

loss:  0.1434359
revenue:  [0.9377591]
accuracy:  0.9609375
importance:  [0.85080904 0.49539033 0.17524987]
weights:  [0.9377591  0.29547435 0.1720423  0.06086189]
masks:  [0.7377591  0.09547435 0.        ]
-
loss:  0.1352494
revenue:  [0.93788004]
accuracy:  0.95703125
importance:  [0.83523214 0.53616846 0.12210899]
weights:  [0.93788004 0.28979197 0.1860289  0.04236691]
masks:  [0.73788005 0.08979197 0.        ]
-
loss:  0.18740657
revenue:  [0.9374582]
accuracy:  0.9453125
importance:  [0.8147385  0.5654317  0.12840565]
weights:  [0.9374582  0.28360865 0.1968255  0.04469772]
masks:  [0.7374582  0.08360864 0.        ]
-
loss:  0.026457489
revenue:  [0.93757105]
accuracy:  0.99609375
importance:  [0.7115098  0.681526   0.17110305]
weights:  [0.93757105 0.24745871 0.23703054 0.05950858]
masks:  [0.73757106 0.04745871 0.03703053]
-
loss:  0.0700033
revenue:  [0.9378682]
accuracy:  0.984375
importance:  [0.70942134 0.6839412  0.17013498]
weights:  [0.9378682  0.2461632  0.23732181 0.0590