<a href="https://colab.research.google.com/github/unconst/GradientBidding/blob/master/Multibidder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Multi-bidder

In [0]:
import tensorflow as tf
import types
from utils_moe import noisy_top_k_gating
from utils_moe import SparseDispatcher
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("../MNIST_data/", one_hot=True)

Extracting ../MNIST_data/train-images-idx3-ubyte.gz
Extracting ../MNIST_data/train-labels-idx1-ubyte.gz
Extracting ../MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ../MNIST_data/t10k-labels-idx1-ubyte.gz


In [0]:
# FFNN with biases.
def expert(i, xs, hparams):
    with tf.compat.v1.variable_scope("expert"):
        sizes = [hparams.n_inputs] + [hparams.e_hidden for _ in range(hparams.e_layers)] + [hparams.n_embedding]
        for i in range(len(sizes) - 1):    
            w = tf.Variable(tf.truncated_normal([sizes[i], sizes[i+1]], stddev=0.1))
            b = tf.Variable(tf.constant(0.1, shape=[sizes[i+1]]))
            for j in range(len(xs)):
                xs[j] = tf.matmul(xs[j], w) + b
    return xs

# Cross entropy loss + accuracy.
def target_loss(embedding, targets, hparams):
    with tf.compat.v1.variable_scope("target_loss"):
        w = tf.Variable(tf.truncated_normal([hparams.n_embedding, hparams.n_targets], stddev=0.1))
        b = tf.Variable(tf.constant(0.1, shape=[hparams.n_targets])),
        logits = tf.add(tf.matmul(embedding, w), b)
        target_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=targets, logits=logits))
        correct = tf.equal(tf.argmax(logits, 1), tf.argmax(targets, 1))
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
        return target_loss, accuracy

In [0]:
def model_fn(hparams):    
    x_inputs = tf.placeholder("float", [None, hparams.n_inputs], 'inputs')
    y_targets = tf.placeholder("float", [None, hparams.n_targets], 'targets')    
    
    
    expert_inputs = []
    client_revenue = []
    client_weights = []
    client_dispatchers = []
    for i in range(hparams.n_clients):
        # Sparsely gated mixture of experts with choice k. Produces an importance score 
        # for each x_input then chooses the topk. These children recieve the outgoing query.
        # expert_inputs is a list of tensors, inputs for each expert.
        gates, load = noisy_top_k_gating(x_inputs, hparams.n_experts, train = True, k = hparams.k)
        dispatcher = SparseDispatcher(hparams.n_experts, gates)
        dispactched_inputs = dispatcher.dispatch(x_inputs)
        
        
     
        
        # Basic importance scores can be attained from the gating network by summing over the importance 
        # of each example. We choose a 'self-importance' score here which counts as the in loop in our
        # incentive function. The network should try to maximize this value.
        importance = tf.linalg.normalize(tf.reduce_sum(gates, 0))[0]
        self_weight = tf.Variable(tf.constant([1.0]))
        weights = tf.linalg.normalize(tf.concat([self_weight, importance], axis=0))
    
        revenue = tf.slice(weights, [0], [1])
        
        # Save references.
        expert_inputs.append(dispactched_inputs)
        client_dispatchers.append(dispatcher)
        client_weights.append(weights)
        client_revenue.append(client_revenue)
        
    
    expert_outputs = []
    for i in range(hparams.n_experts):
        # Each expert takes a list of tensors one for each client and produces a list of tensors one
        # for each client.
        outputs = expert(i, expert_inputs[:][i], hparams)
        expert_outputs.append(outputs)
        
        
        
    client_embeddings = []
    for j in range(hparams.n_clients):
        masked_inputs = []
        for i in range(hparams.n_experts):
            # Each output is masked based on the bid.
            shift = tf.reduce_mean(client_weights[:][i])
            bid = tf.slice(client_weights[j][i], [0], [1])
            mask = tf.nn.relu(bid - shift)
            masked_input = mask * expert_outputs[i][j]
            masked_inputs.append(masked_input)
            
        # Combine the expert_inputs.
        embedding = client_dispatchers[j].combine(masked_inputs)
        client_embeddings.append(embedding)
        
        
    losses = []
    accuracies = []
    for j in range(hparams.n_clients):
        client_embedding = client_embeddings[j]
        client_revenue = client_revenues[j]
        
        # Loss and accuracy stuff.
        loss, accuracy = target_loss(client_embedding, y_targets, hparams)

        # Run the step: optimize for loss and revenue. 
        train_step = tf.train.AdamOptimizer(hparams.learning_rate).minimize(loss - client_revenue)
        losses.append(loss)
        accuracy.append(accuracy)
        
    
    metrics = {
        'losses': losses,
        'revenues': revenues,
        'accuracies': accuracies,
    }
    return train_step, metrics

In [0]:
hparams = types.SimpleNamespace( 
    n_inputs = 784,
    n_targets = 10,
    k = 3,
    n_experts = 3,
    n_clients = 3,
    e_layers = 2,
    e_hidden = 256,
    n_embedding = 256,
    batch_size=256,
    learning_rate=1e-3,
    n_iterations = 10000,
    n_print = 100,
    market_shift = 0.2,
)

graph = tf.Graph()
session = tf.Session(graph=graph)
with graph.as_default():
    train_step, metrics = model_fn(hparams)
    session.run(tf.global_variables_initializer())

for i in range(hparams.n_iterations):
    batch_x, batch_y = mnist.train.next_batch(hparams.batch_size)
    feeds = {'inputs:0': batch_x, 'targets:0': batch_y}
    session.run(train_step, feeds)

    if i % hparams.n_print == 0:
        feeds = {'inputs:0': batch_x, 'targets:0': batch_y}
        train_metrics = session.run(metrics, feeds)
        for key in train_metrics:
            print (str(key) + ":  " + str(train_metrics[key]))
        print ('-')


ValueError: Tried to convert 'input' to a tensor and failed. Error: Dimension 0 in both shapes must be equal, but are 4 and 1. Shapes are [4] and [1].
	From merging shape 0 with other shapes. for 'Slice/packed' (op: 'Pack') with input shapes: [4], [1].