<a href="https://colab.research.google.com/github/valeriodc95/Belief-Network/blob/master/bn_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import os, shutil
import tensorflow as tf
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

# tf.data.Dataset: input pipeline
Dataset loading, preparation of the input_fn which feeds the estimator with data and serving function declaration.

In [0]:
train, test = tf.keras.datasets.mnist.load_data()
tr_images, tr_labels = np.reshape(train[0], (60000, -1)), train[1]
tr_images = tr_images/255
tr_img, eval_img, tr_lab, eval_lab = train_test_split(tr_images, tr_labels, 
                                                      test_size=10000, 
                                                      random_state=42,
                                                      stratify=tr_labels)

In [0]:
def data_input_fn(images, labels=None, phase='eval'):
    if phase == 'train':
        def train_input_fn():
            dataset = tf.data.Dataset.from_tensor_slices(({'image': images}, labels))
            dataset = dataset.shuffle(buffer_size=5000, reshuffle_each_iteration=True)
            dataset = dataset.repeat()
            dataset = dataset.batch(batch_size=BATCH_SIZE)
            dataset = dataset.prefetch(buffer_size=-1) # autotunes the buffer size
            
            return dataset
        
        return train_input_fn
    
    if phase == 'eval':
        def eval_input_fn():
            if labels is not None:
                dataset = tf.data.Dataset.from_tensor_slices(({'image': images}, labels))
            else:
                dataset = tf.data.Dataset.from_tensor_slices(({'image': images}))
            dataset = dataset.batch(batch_size=len(images))
            dataset = dataset.repeat() 
            
            return dataset
        
        return eval_input_fn
    
    if phase == 'test':
        def test_input_fn():
            dataset = tf.data.Dataset.from_tensor_slices(({'image': images}))
            dataset = dataset.batch(batch_size=len(images))
            
            return dataset
        
        return test_input_fn

def serving_input_rec_fn():
    serving_features = {'image': tf.placeholder(shape=(None, 784), dtype=tf.float32)}
    return tf.estimator.export.ServingInputReceiver(features=serving_features,
                                                    receiver_tensors=serving_features)

# RBM Training config

In [4]:
BATCH_SIZE = 100 #@param {type:"integer"}
EPOCHS = 100 #@param {type:"integer"}
L_RATE = 0.01 #@param {type:'number'}
HIDDEN_NEURONS = 100 #@param {type:"integer"}
model_dir = "rbm_train_dir" #@param {type:"string"}
N_CLASSES = 10 #@param {type:"integer"}
steps_per_epoch = 50000 // BATCH_SIZE

distribute = tf.contrib.distribute.DistributeConfig(
    train_distribute=tf.contrib.distribute.ParameterServerStrategy(num_gpus_per_worker=1),
    eval_distribute=tf.contrib.distribute.ParameterServerStrategy(num_gpus_per_worker=1))

run_config = tf.estimator.RunConfig(save_summary_steps=10,
                                    save_checkpoints_steps=steps_per_epoch,
                                    experimental_distribute=distribute)


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

INFO:tensorflow:ParameterServerStrategy with compute_devices = ('/replica:0/task:0/device:GPU:0',), variable_device = '/device:GPU:0'
INFO:tensorflow:ParameterServerStrategy with compute_devices = ('/replica:0/task:0/device:GPU:0',), variable_device = '/device:GPU:0'
INFO:tensorflow:Initializing RunConfig with distribution strategies.
INFO:tensorflow:Not using Distribute Coordinator.


# Belief Network model_fn
Implementation of RBM inference, Contrastive Divergence for the parameters' update and train_op of the tf.estimator.

In [0]:
def untrained_rbm_inference(input_):
    
    v_input = tf.cast(input_['image'], dtype=tf.float32)
    
    with tf.variable_scope('rbm', reuse=tf.AUTO_REUSE):
        weights = tf.get_variable(name='weights',
                                  shape=(784, HIDDEN_NEURONS),
                                  initializer=tf.keras.initializers.RandomNormal(stddev=0.01, dtype=tf.float32),
                                  trainable=True)
        h_bias = tf.get_variable(name='bias',
                                 shape=(1, HIDDEN_NEURONS),
                                 initializer=tf.keras.initializers.Zeros(dtype=tf.float32),
                                 trainable=True)
        v_bias = tf.get_variable(name='tmp_v_bias',
                             shape=(1, 784),
                             initializer=tf.keras.initializers.Zeros(dtype=tf.float32),
                             trainable=True)
    # Computing the wake
    h_act = tf.math.sigmoid((v_input @ weights) + h_bias)
    wake = tf.expand_dims(v_input, axis=2) @ tf.expand_dims(h_act, axis=1)

    # Reconstructing input
    stoch_spike = tf.random_uniform(tf.shape(h_act), maxval=1, dtype=tf.float32)   
    h_spike = tf.math.ceil(h_act - stoch_spike)
    
    recon_input = tf.math.sigmoid((h_spike @ tf.transpose(weights)) + v_bias)


    # Computing the dream
    recon_h_act = tf.math.sigmoid((recon_input @ weights) + h_bias)   
    dream = tf.expand_dims(recon_input, axis=2) @ tf.expand_dims(recon_h_act, axis=1)

    return wake, dream, v_input, recon_input, h_act, recon_h_act

In [0]:
def rbm_train_model_fn(features, labels=None, mode=None, params=None):
    wake, dream, v_input, recon_input, h_act, recon_h_act = untrained_rbm_inference(features)
    err = tf.losses.mean_squared_error(v_input, recon_input)
    
    if mode == tf.estimator.ModeKeys.TRAIN:
        
        with tf.variable_scope('rbm', reuse=tf.AUTO_REUSE):
            weights = tf.get_variable('weights')
            h_bias = tf.get_variable('bias')
            v_bias = tf.get_variable('tmp_v_bias')

        with tf.control_dependencies([wake, dream]):
            w_update = tf.assign_add(weights, L_RATE * tf.reduce_mean(wake - dream, axis=0))
            hb_update = tf.assign_add(h_bias, L_RATE * tf.reduce_mean(h_act - recon_h_act, axis=0, keep_dims=True))
            vb_update = tf.assign_add(v_bias, L_RATE * tf.reduce_mean(v_input - recon_input, axis=0, keep_dims=True))
            gs_update = tf.assign_add(tf.train.get_or_create_global_step(), 1)
        
        rbm_train_op = tf.group(w_update, hb_update, vb_update, gs_update)
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=err,
                                          train_op=rbm_train_op)
    
    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=err)
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {'h_act': h_act}
        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=predictions)

# RBM tf.estimator construction and training
Now we put together everything together in order to wrap our model into the tf.estimator interface.

In [11]:
shutil.rmtree('rbm_train_dir')
rbm_estimator = tf.estimator.Estimator(model_fn=rbm_train_model_fn,
                                       model_dir=model_dir,
                                       config=run_config,
                                       params={},
                                       warm_start_from=None)
os.makedirs(rbm_estimator.eval_dir())

exporter = tf.estimator.BestExporter(name="best_rbm",
                                     serving_input_receiver_fn=serving_input_rec_fn,
                                     exports_to_keep=3)

early_stopping = tf.contrib.estimator.stop_if_no_decrease_hook(estimator=rbm_estimator,
                                                               metric_name='loss',
                                                               max_steps_without_decrease=5,
                                                               min_steps=5)

train_spec = tf.estimator.TrainSpec(input_fn=data_input_fn(tr_img, tr_lab, phase='train'),
                                    max_steps=EPOCHS * steps_per_epoch,
                                    hooks=[early_stopping])

eval_spec = tf.estimator.EvalSpec(input_fn=data_input_fn(eval_img, eval_lab, phase='eval'),
                                  steps=1,
                                  name='Validation',
                                  hooks=None,
                                  exporters=[exporter],
                                  start_delay_secs=0,
                                  throttle_secs=0)

tf.estimator.train_and_evaluate(estimator=rbm_estimator,
                                train_spec=train_spec,
                                eval_spec=eval_spec)

INFO:tensorflow:Using config: {'_model_dir': 'rbm_train_dir', '_tf_random_seed': None, '_save_summary_steps': 10, '_save_checkpoints_steps': 500, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': <tensorflow.contrib.distribute.python.parameter_server_strategy.ParameterServerStrategy object at 0x7f886e39a2b0>, '_device_fn': None, '_protocol': None, '_eval_distribute': <tensorflow.contrib.distribute.python.parameter_server_strategy.ParameterServerStrategy object at 0x7f886e39a400>, '_experimental_distribute': DistributeConfig(train_distribute=<tensorflow.contrib.distribute.python.parameter_server_strategy.ParameterServerStrategy object at 0x7f886e39a2b0>, eval_distribute=<tensorflow.contrib.distribute.python.parameter_server_strategy.ParameterServerStrategy object a

({'global_step': 50000, 'loss': 0.016702455},
 [b'rbm_train_dir/export/best_rbm/1556352400'])

In [85]:
BATCH_SIZE = 100 #@param {type:"integer"}
EPOCHS = 100 #@param {type:"integer"}
L_RATE = 0.01 #@param {type:'number'}
MOMENTUM = 0.9 #@param {type:"number"}
DEC_RATE = 0.99 #@param {type:"number"}
model_dir = "belief_net_dir" #@param {type:"string"}
checkpoint = 'rbm_train_dir/model.ckpt-50000'
steps_per_epoch = 50000 // BATCH_SIZE

run_config = tf.estimator.RunConfig(save_summary_steps=10,
                                    save_checkpoints_steps=steps_per_epoch,
                                    experimental_distribute=distribute)

INFO:tensorflow:Initializing RunConfig with distribution strategies.
INFO:tensorflow:Not using Distribute Coordinator.


# Belief Network model_fn
Implementation of the belief network with the trained RBM.

In [0]:
def belief_net_inference(input_):
    
    v_input = tf.cast(input_['image'], dtype=tf.float32)
    
    with tf.variable_scope('rbm', reuse=tf.AUTO_REUSE):
        weights = tf.get_variable(name='weights',
                                  shape=(784, HIDDEN_NEURONS),
                                  initializer=tf.keras.initializers.RandomNormal(stddev=0.01, dtype=tf.float32),
                                  trainable=True)
        h_bias = tf.get_variable(name='bias',
                                 shape=(1, HIDDEN_NEURONS),
                                 initializer=tf.keras.initializers.Zeros(dtype=tf.float32),
                                 trainable=True)

    h_act = tf.math.sigmoid((v_input @ weights) + h_bias)
    
    with tf.variable_scope('softmax'):
        logits = tf.layers.Dense(units=N_CLASSES)(h_act)

    return logits

In [0]:
def belief_net_model_fn(features, labels=None, mode=None, params=None):    
    
    logits = belief_net_inference(features)
    
    if mode == tf.estimator.ModeKeys.TRAIN:
        cross_entropy = tf.losses.softmax_cross_entropy(tf.one_hot(labels, N_CLASSES), logits=logits)
        lr = tf.train.exponential_decay(learning_rate=L_RATE,
                                        global_step=tf.train.get_or_create_global_step(),
                                        decay_steps=steps_per_epoch,
                                        decay_rate=DEC_RATE,
                                        staircase=True)
        
        nn_opt = tf.train.MomentumOptimizer(learning_rate=lr, momentum=MOMENTUM)
        nn_train_op = nn_opt.minimize(loss=cross_entropy,
                                      global_step=tf.train.get_or_create_global_step(),
                                      var_list=tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                                                 scope='softmax'))

        accuracy = tf.metrics.accuracy(labels=tf.cast(labels, tf.int32),
                                       predictions=tf.argmax(logits, axis=1),
                                       name='accuracy')
        eval_metric = {
            'accuracy': accuracy
        }
        return tf.estimator.EstimatorSpec(mode=tf.estimator.ModeKeys.TRAIN,
                                          loss=cross_entropy,
                                          train_op=nn_train_op,
                                          eval_metric_ops=eval_metric)
    
    predictions = {'classes': tf.argmax(logits),
                   'probabilities': tf.nn.softmax(logits)
                  }
    if mode == tf.estimator.ModeKeys.PREDICT:
        export_outputs = {
            'predict_output': tf.estimator.export.PredictOutput(predictions)
        }
        return tf.estimator.EstimatorSpec(mode=tf.estimator.ModeKeys.PREDICT,
                                          predictions=predictions,
                                          export_outputs=export_outputs)

    if mode == tf.estimator.ModeKeys.EVAL:
        cross_entropy = tf.losses.softmax_cross_entropy(tf.one_hot(labels, N_CLASSES), logits=logits)
        accuracy = tf.metrics.accuracy(labels=tf.cast(labels, tf.int32),
                                       predictions=tf.argmax(logits, axis=1),
                                       name='accuracy')
        eval_metric = {
            'accuracy': accuracy
        }
        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=predictions,
                                          loss=cross_entropy,
                                          eval_metric_ops=eval_metric)


In [10]:
ws = tf.estimator.WarmStartSettings(ckpt_to_initialize_from=checkpoint,
                                    vars_to_warm_start=["rbm/weights", "rbm/bias"])

bn_estimator = tf.estimator.Estimator(model_fn=belief_net_model_fn,
                                      model_dir=model_dir,
                                      config=run_config,
                                      params={},
                                      warm_start_from=ws)
os.makedirs(bn_estimator.eval_dir())


exporter = tf.estimator.BestExporter(name="best_model",
                                     serving_input_receiver_fn=serving_input_rec_fn,
                                     exports_to_keep=3)

early_stopping = tf.contrib.estimator.stop_if_no_decrease_hook(estimator=bn_estimator,
                                                               metric_name='loss',
                                                               max_steps_without_decrease=5,
                                                               min_steps=5)

train_spec = tf.estimator.TrainSpec(input_fn=data_input_fn(tr_img, tr_lab, phase='train'),
                                    max_steps=EPOCHS * steps_per_epoch,
                                    hooks=[early_stopping])

eval_spec = tf.estimator.EvalSpec(input_fn=data_input_fn(eval_img, eval_lab, phase='eval'),
                                  steps=1,
                                  name='Validation',
                                  hooks=None,
                                  exporters=[exporter],
                                  start_delay_secs=0,
                                  throttle_secs=0)

tf.estimator.train_and_evaluate(estimator=bn_estimator,
                                train_spec=train_spec,
                                eval_spec=eval_spec)

INFO:tensorflow:Using config: {'_model_dir': 'belief_net_dir', '_tf_random_seed': None, '_save_summary_steps': 10, '_save_checkpoints_steps': 500, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': <tensorflow.contrib.distribute.python.parameter_server_strategy.ParameterServerStrategy object at 0x7fc2e1c596a0>, '_device_fn': None, '_protocol': None, '_eval_distribute': <tensorflow.contrib.distribute.python.parameter_server_strategy.ParameterServerStrategy object at 0x7fc2e1c597f0>, '_experimental_distribute': DistributeConfig(train_distribute=<tensorflow.contrib.distribute.python.parameter_server_strategy.ParameterServerStrategy object at 0x7fc2e1c596a0>, eval_distribute=<tensorflow.contrib.distribute.python.parameter_server_strategy.ParameterServerStrategy object 

({'accuracy': 0.923, 'global_step': 50000, 'loss': 0.26785156},
 [b'belief_net_dir/export/best_model/1556354932'])

In [0]:
test_img, test_lab = np.reshape(test[0], (10000, -1)), test[1]
test_img = test_img/255

In [83]:
bn_estimator = tf.estimator.Estimator(model_fn=belief_net_model_fn,
                                      model_dir=model_dir,
                                      config=run_config,
                                      params={})
predictions = bn_estimator.predict(input_fn=data_input_fn(images=test_img, phase='test'),
                                   yield_single_examples=False)
predictions = list(predictions)

INFO:tensorflow:Using config: {'_model_dir': 'belief_net_dir', '_tf_random_seed': None, '_save_summary_steps': 10, '_save_checkpoints_steps': 500, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': <tensorflow.contrib.distribute.python.parameter_server_strategy.ParameterServerStrategy object at 0x7fc2ac3073c8>, '_device_fn': None, '_protocol': None, '_eval_distribute': <tensorflow.contrib.distribute.python.parameter_server_strategy.ParameterServerStrategy object at 0x7fc2ac307518>, '_experimental_distribute': DistributeConfig(train_distribute=<tensorflow.contrib.distribute.python.parameter_server_strategy.ParameterServerStrategy object at 0x7fc2ac3073c8>, eval_distribute=<tensorflow.contrib.distribute.python.parameter_server_strategy.ParameterServerStrategy object 

In [0]:
classes = [np.argmax(x) for x in predictions[0]['probabilities']]

In [79]:
accuracy_score(y_true=test_lab, y_pred=np.array(classes))

0.9304

In [80]:
confusion_matrix(y_true=test_lab, y_pred=np.array(classes))

array([[ 957,    0,    2,    0,    0,    9,    8,    1,    3,    0],
       [   0, 1116,    3,    5,    0,    2,    4,    0,    5,    0],
       [  12,    5,  937,   14,   10,    2,   11,   19,   20,    2],
       [   2,    0,   12,  932,    0,   27,    3,   13,   17,    4],
       [   0,    1,   13,    0,  915,    0,    4,    3,   12,   34],
       [   7,    3,    4,   32,    6,  798,   11,    3,   21,    7],
       [   9,    2,   10,    1,   10,    9,  908,    1,    8,    0],
       [   3,    6,   22,    4,   11,    1,    0,  947,    5,   29],
       [   7,    2,    4,   28,    7,   24,    9,    5,  880,    8],
       [   5,    5,    4,   11,   38,    6,    0,   17,    9,  914]])