diff --git a/examples/mnist/keras/README.md b/examples/mnist/keras/README.md index 106605ae..d4ed2dd7 100644 --- a/examples/mnist/keras/README.md +++ b/examples/mnist/keras/README.md @@ -65,7 +65,7 @@ In this mode, Spark will distribute the MNIST dataset (as CSV) across the worker --conf spark.task.cpus=${CORES_PER_WORKER} \ --conf spark.executorEnv.JAVA_HOME="$JAVA_HOME" \ ${TFoS_HOME}/examples/mnist/keras/mnist_mlp.py \ - --cluster_size 3 \ + --cluster_size ${SPARK_WORKER_INSTANCES} \ --input_mode spark \ --images ${TFoS_HOME}/mnist/csv/train/images \ --labels ${TFoS_HOME}/mnist/csv/train/labels \ diff --git a/examples/mnist/keras/mnist_mlp.py b/examples/mnist/keras/mnist_mlp.py index 93ad6094..4e0fd3c6 100644 --- a/examples/mnist/keras/mnist_mlp.py +++ b/examples/mnist/keras/mnist_mlp.py @@ -13,6 +13,7 @@ def main_fun(args, ctx): import tensorflow as tf from tensorflow.python import keras from tensorflow.python.keras import backend as K + from tensorflow.python.keras.datasets import mnist from tensorflow.python.keras.models import Sequential, load_model, save_model from tensorflow.python.keras.layers import Dense, Dropout from tensorflow.python.keras.optimizers import RMSprop @@ -51,7 +52,6 @@ def generate_rdd_data(tf_feed, batch_size): # the data, shuffled and split between train and test sets if args.input_mode == 'tf': - from tensorflow.python.keras.datasets import mnist (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = x_train.reshape(60000, 784) x_test = x_test.reshape(10000, 784) @@ -64,6 +64,9 @@ def generate_rdd_data(tf_feed, batch_size): else: # args.mode == 'spark' x_train = tf.placeholder(tf.float32, [None, IMAGE_PIXELS * IMAGE_PIXELS], name="x_train") y_train = tf.placeholder(tf.float32, [None, 10], name="y_train") + (_, _), (x_test, y_test) = mnist.load_data() + x_test = x_test.reshape(10000, 784) + y_test = keras.utils.to_categorical(y_test, num_classes) model = Sequential() model.add(Dense(512, activation='relu', input_shape=(784,))) @@ -109,6 +112,7 @@ def save_checkpoint(epoch, logs=None): steps_per_epoch=args.steps_per_epoch, epochs=args.epochs, verbose=1, + validation_data=(x_test, y_test), callbacks=callbacks) if args.export_dir and ctx.job_name == 'worker' and ctx.task_index == 0: @@ -147,7 +151,7 @@ def save_checkpoint(epoch, logs=None): parser = argparse.ArgumentParser() parser.add_argument("--cluster_size", help="number of nodes in the cluster", type=int, default=num_executors) parser.add_argument("--epochs", help="number of epochs of training data", type=int, default=20) - parser.add_argument("--export_dir", help="directory to export saved_mode") + parser.add_argument("--export_dir", help="directory to export saved_model") parser.add_argument("--images", help="HDFS path to MNIST images in parallelized CSV format") parser.add_argument("--input_mode", help="input mode (tf|spark)", default="tf") parser.add_argument("--labels", help="HDFS path to MNIST labels in parallelized CSV format") diff --git a/examples/mnist/keras/mnist_mlp_estimator.py b/examples/mnist/keras/mnist_mlp_estimator.py index f948b351..7f3329b6 100644 --- a/examples/mnist/keras/mnist_mlp_estimator.py +++ b/examples/mnist/keras/mnist_mlp_estimator.py @@ -3,7 +3,6 @@ from tensorflow.python import keras from tensorflow.python.keras.models import Sequential from tensorflow.python.keras.layers import Dense, Dropout -from tensorflow.python.keras.optimizers import RMSprop from tensorflowonspark import TFNode @@ -31,7 +30,7 @@ def main_fun(args, ctx): model.add(Dropout(0.2)) model.add(Dense(10, activation='softmax')) model.compile(loss='categorical_crossentropy', - optimizer=RMSprop(), + optimizer=tf.train.RMSPropOptimizer(learning_rate=0.001), metrics=['accuracy']) model.summary() @@ -43,7 +42,7 @@ def main_fun(args, ctx): if args.input_mode == 'tf': # For InputMode.TENSORFLOW, just use data in memory train_input_fn = tf.estimator.inputs.numpy_input_fn( - x={"dense_1_input": x_train}, + x={"dense_input": x_train}, y=y_train, batch_size=128, num_epochs=None, @@ -70,28 +69,17 @@ def train_input_fn(): # eval_input_fn ALWAYS uses data loaded in memory, since InputMode.SPARK can only feed one RDD at a time eval_input_fn = tf.estimator.inputs.numpy_input_fn( - x={"dense_1_input": x_test}, + x={"dense_input": x_test}, y=y_test, num_epochs=args.epochs, shuffle=False) - # serving_input_receiver_fn ALWAYS expects serialized TFExamples in a placeholder. - def serving_input_receiver_fn(): - """An input receiver that expects a serialized tf.Example.""" - serialized_tf_example = tf.placeholder(dtype=tf.string, - shape=[args.batch_size], - name='input_example_tensor') - receiver_tensors = {'dense_1_input': serialized_tf_example} - feature_spec = {'dense_1_input': tf.FixedLenFeature(784, tf.string)} - features = tf.parse_example(serialized_tf_example, feature_spec) - return tf.estimator.export.ServingInputReceiver(features, receiver_tensors) - # setup tf.estimator.train_and_evaluate() w/ FinalExporter - exporter = tf.estimator.FinalExporter("serving", serving_input_receiver_fn=serving_input_receiver_fn) + feature_spec = {'dense_input': tf.FixedLenFeature(784, tf.float32)} + exporter = tf.estimator.FinalExporter("serving", serving_input_receiver_fn=tf.estimator.export.build_parsing_serving_input_receiver_fn(feature_spec)) train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=args.steps) eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn, exporters=exporter) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) - else: # mode == 'inference' if args.input_mode == 'spark': tf_feed = TFNode.DataFeed(ctx.mgr) @@ -137,7 +125,7 @@ def predict_input_fn(): parser.add_argument("--input_mode", help="input mode (tf|spark)", default="tf") parser.add_argument("--labels", help="HDFS path to MNIST labels in parallelized CSV format") parser.add_argument("--model_dir", help="directory to write model checkpoints") - parser.add_argument("--mode", help="(train|inference") + parser.add_argument("--mode", help="(train|inference)", default="train") parser.add_argument("--output", help="HDFS path to save test/inference output", default="predictions") parser.add_argument("--num_ps", help="number of ps nodes", type=int, default=1) parser.add_argument("--steps", help="max number of steps to train", type=int, default=2000) diff --git a/examples/mnist/spark/mnist_dist.py b/examples/mnist/spark/mnist_dist.py index d94930f2..8574a0d2 100755 --- a/examples/mnist/spark/mnist_dist.py +++ b/examples/mnist/spark/mnist_dist.py @@ -1,4 +1,4 @@ -# Copyright 2018 Yahoo Inc. +# Copyright 2017 Yahoo Inc. # Licensed under the terms of the Apache 2.0 license. # Please see LICENSE file in the project root for terms. @@ -6,173 +6,140 @@ from __future__ import absolute_import from __future__ import division -from __future__ import nested_scopes from __future__ import print_function -from datetime import datetime -import tensorflow as tf -from tensorflowonspark import TFNode - def print_log(worker_num, arg): print("{0}: {1}".format(worker_num, arg)) -class ExportHook(tf.train.SessionRunHook): - def __init__(self, export_dir, input_tensor, output_tensor): - self.export_dir = export_dir - self.input_tensor = input_tensor - self.output_tensor = output_tensor - - def end(self, session): - print("{} ======= Exporting to: {}".format(datetime.now().isoformat(), self.export_dir)) - signatures = { - tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: { - 'inputs': {'image': self.input_tensor}, - 'outputs': {'prediction': self.output_tensor}, - 'method_name': tf.saved_model.signature_constants.PREDICT_METHOD_NAME - } - } - TFNode.export_saved_model(session, - self.export_dir, - tf.saved_model.tag_constants.SERVING, - signatures) - print("{} ======= Done exporting".format(datetime.now().isoformat())) - - def map_fun(args, ctx): + from datetime import datetime import math import numpy + import tensorflow as tf import time worker_num = ctx.worker_num job_name = ctx.job_name task_index = ctx.task_index - # Delay PS nodes a bit, since workers seem to reserve GPUs more quickly/reliably (w/o conflict) - if job_name == "ps": - time.sleep((worker_num + 1) * 5) - # Parameters IMAGE_PIXELS = 28 hidden_units = 128 - batch_size = args.batch_size # Get TF cluster and server instances cluster, server = ctx.start_cluster_server(1, args.rdma) - def feed_dict(batch): - # Convert from [(images, labels)] to two numpy arrays of the proper type - images = [] - labels = [] - for item in batch: - images.append(item[0]) - labels.append(item[1]) - xs = numpy.array(images) - xs = xs.astype(numpy.float32) - xs = xs / 255.0 - ys = numpy.array(labels) - ys = ys.astype(numpy.uint8) - return (xs, ys) + # Create generator for Spark data feed + tf_feed = ctx.get_data_feed(args.mode == 'train') + + def rdd_generator(): + while not tf_feed.should_stop(): + batch = tf_feed.next_batch(1) + if len(batch) == 0: + return + row = batch[0] + image = numpy.array(row[0]).astype(numpy.float32) / 255.0 + label = numpy.array(row[1]).astype(numpy.int64) + yield (image, label) if job_name == "ps": server.join() elif job_name == "worker": - # Assigns ops to the local worker by default. with tf.device(tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % task_index, cluster=cluster)): - # Placeholders or QueueRunner/Readers for input data - with tf.name_scope('inputs'): - x = tf.placeholder(tf.float32, [None, IMAGE_PIXELS * IMAGE_PIXELS], name="x") - y_ = tf.placeholder(tf.float32, [None, 10], name="y_") - - x_img = tf.reshape(x, [-1, IMAGE_PIXELS, IMAGE_PIXELS, 1]) - tf.summary.image("x_img", x_img) - - with tf.name_scope('layer'): - # Variables of the hidden layer - with tf.name_scope('hidden_layer'): - hid_w = tf.Variable(tf.truncated_normal([IMAGE_PIXELS * IMAGE_PIXELS, hidden_units], stddev=1.0 / IMAGE_PIXELS), name="hid_w") - hid_b = tf.Variable(tf.zeros([hidden_units]), name="hid_b") - tf.summary.histogram("hidden_weights", hid_w) - hid_lin = tf.nn.xw_plus_b(x, hid_w, hid_b) - hid = tf.nn.relu(hid_lin) - - # Variables of the softmax layer - with tf.name_scope('softmax_layer'): - sm_w = tf.Variable(tf.truncated_normal([hidden_units, 10], stddev=1.0 / math.sqrt(hidden_units)), name="sm_w") - sm_b = tf.Variable(tf.zeros([10]), name="sm_b") - tf.summary.histogram("softmax_weights", sm_w) - y = tf.nn.softmax(tf.nn.xw_plus_b(hid, sm_w, sm_b)) + # Dataset for input data + ds = tf.data.Dataset.from_generator(rdd_generator, (tf.float32, tf.float32), (tf.TensorShape([IMAGE_PIXELS * IMAGE_PIXELS]), tf.TensorShape([10]))).batch(args.batch_size) + iterator = ds.make_one_shot_iterator() + x, y_ = iterator.get_next() - global_step = tf.train.get_or_create_global_step() + # Variables of the hidden layer + hid_w = tf.Variable(tf.truncated_normal([IMAGE_PIXELS * IMAGE_PIXELS, hidden_units], + stddev=1.0 / IMAGE_PIXELS), name="hid_w") + hid_b = tf.Variable(tf.zeros([hidden_units]), name="hid_b") + tf.summary.histogram("hidden_weights", hid_w) + + # Variables of the softmax layer + sm_w = tf.Variable(tf.truncated_normal([hidden_units, 10], + stddev=1.0 / math.sqrt(hidden_units)), name="sm_w") + sm_b = tf.Variable(tf.zeros([10]), name="sm_b") + tf.summary.histogram("softmax_weights", sm_w) + + x_img = tf.reshape(x, [-1, IMAGE_PIXELS, IMAGE_PIXELS, 1]) + tf.summary.image("x_img", x_img) + + hid_lin = tf.nn.xw_plus_b(x, hid_w, hid_b) + hid = tf.nn.relu(hid_lin) - with tf.name_scope('loss'): - loss = -tf.reduce_sum(y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0))) - tf.summary.scalar("loss", loss) + y = tf.nn.softmax(tf.nn.xw_plus_b(hid, sm_w, sm_b)) - with tf.name_scope('train'): - train_op = tf.train.AdagradOptimizer(0.01).minimize(loss, global_step=global_step) + global_step = tf.train.get_or_create_global_step() + + loss = -tf.reduce_sum(y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0))) + tf.summary.scalar("loss", loss) + train_op = tf.train.AdagradOptimizer(0.01).minimize( + loss, global_step=global_step) # Test trained model label = tf.argmax(y_, 1, name="label") prediction = tf.argmax(y, 1, name="prediction") correct_prediction = tf.equal(prediction, label) - accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="accuracy") tf.summary.scalar("acc", accuracy) + saver = tf.train.Saver() summary_op = tf.summary.merge_all() + init_op = tf.global_variables_initializer() + # Create a "supervisor", which oversees the training process and stores model state into HDFS logdir = ctx.absolute_path(args.model) print("tensorflow model path: {0}".format(logdir)) + summary_writer = tf.summary.FileWriter("tensorboard_%d" % worker_num, graph=tf.get_default_graph()) - if job_name == "worker" and task_index == 0: - summary_writer = tf.summary.FileWriter(logdir, graph=tf.get_default_graph()) - - # The MonitoredTrainingSession takes care of session initialization, restoring from - # a checkpoint, and closing when done or an error occurs with tf.train.MonitoredTrainingSession(master=server.target, - is_chief=(task_index == 0), - checkpoint_dir=logdir, - save_checkpoint_secs=10, - hooks=[tf.train.StopAtStepHook(last_step=args.steps)], - chief_only_hooks=[ExportHook(ctx.absolute_path(args.export_dir), x, prediction)]) as mon_sess: + is_chief=(task_index == 0), + scaffold=tf.train.Scaffold(init_op=init_op, summary_op=summary_op, saver=saver), + checkpoint_dir=logdir, + hooks=[tf.train.StopAtStepHook(last_step=args.steps)]) as sess: + print("{} session ready".format(datetime.now().isoformat())) + + # Loop until the session shuts down or feed has no more data step = 0 - tf_feed = ctx.get_data_feed(args.mode == "train") - while not mon_sess.should_stop() and not tf_feed.should_stop(): - # Run a training step asynchronously + while not sess.should_stop() and not tf_feed.should_stop(): + # Run a training step asynchronously. # See `tf.train.SyncReplicasOptimizer` for additional details on how to # perform *synchronous* training. - # using feed_dict - batch_xs, batch_ys = feed_dict(tf_feed.next_batch(batch_size)) - feed = {x: batch_xs, y_: batch_ys} - - if len(batch_xs) > 0: - if args.mode == "train": - _, summary, step = mon_sess.run([train_op, summary_op, global_step], feed_dict=feed) - # print accuracy and save model checkpoint to HDFS every 100 steps - if (step % 100 == 0): - print("{0} step: {1} accuracy: {2}".format(datetime.now().isoformat(), step, mon_sess.run(accuracy, {x: batch_xs, y_: batch_ys}))) - - if task_index == 0: - summary_writer.add_summary(summary, step) - else: # args.mode == "inference" - labels, preds, acc = mon_sess.run([label, prediction, accuracy], feed_dict=feed) - - results = ["{0} Label: {1}, Prediction: {2}".format(datetime.now().isoformat(), l, p) for l, p in zip(labels, preds)] - tf_feed.batch_results(results) - print("results: {0}, acc: {1}".format(results, acc)) - - if mon_sess.should_stop() or step >= args.steps: - tf_feed.terminate() - - # Ask for all the services to stop. - print("{0} stopping MonitoredTrainingSession".format(datetime.now().isoformat())) - - if job_name == "worker" and task_index == 0: - summary_writer.close() + if args.mode == "train": + _, summary, step = sess.run([train_op, summary_op, global_step]) + if (step % 100 == 0): + print("{} step: {} accuracy: {}".format(datetime.now().isoformat(), step, sess.run(accuracy))) + if task_index == 0: + summary_writer.add_summary(summary, step) + else: # args.mode == "inference" + labels, preds, acc = sess.run([label, prediction, accuracy]) + results = ["{} Label: {}, Prediction: {}".format(datetime.now().isoformat(), l, p) for l, p in zip(labels, preds)] + tf_feed.batch_results(results) + print("acc: {}".format(acc)) + + print("{} stopping MonitoredTrainingSession".format(datetime.now().isoformat())) + + # WORKAROUND FOR https://github.com/tensorflow/tensorflow/issues/21745 + # wait for all other nodes to complete (via done files) + done_dir = "{}/{}/done".format(ctx.absolute_path(args.model), args.mode) + print("Writing done file to: {}".format(done_dir)) + tf.gfile.MakeDirs(done_dir) + with tf.gfile.GFile("{}/{}".format(done_dir, ctx.task_index), 'w') as done_file: + done_file.write("done") + + for i in range(60): + if len(tf.gfile.ListDirectory(done_dir)) < len(ctx.cluster_spec['worker']): + print("{} Waiting for other nodes {}".format(datetime.now().isoformat(), i)) + time.sleep(1) + else: + print("{} All nodes done".format(datetime.now().isoformat())) + break diff --git a/examples/mnist/spark/mnist_dist_dataset.py b/examples/mnist/spark/mnist_dist_dataset.py deleted file mode 100755 index 3a0d9c4c..00000000 --- a/examples/mnist/spark/mnist_dist_dataset.py +++ /dev/null @@ -1,160 +0,0 @@ -# Copyright 2017 Yahoo Inc. -# Licensed under the terms of the Apache 2.0 license. -# Please see LICENSE file in the project root for terms. - -# Distributed MNIST on grid based on TensorFlow MNIST example - -from __future__ import absolute_import -from __future__ import division -from __future__ import nested_scopes -from __future__ import print_function - - -def print_log(worker_num, arg): - print("{0}: {1}".format(worker_num, arg)) - - -def map_fun(args, ctx): - from tensorflowonspark import TFNode - from datetime import datetime - import math - import numpy - import tensorflow as tf - - worker_num = ctx.worker_num - job_name = ctx.job_name - task_index = ctx.task_index - - # Parameters - IMAGE_PIXELS = 28 - hidden_units = 128 - - # Get TF cluster and server instances - cluster, server = TFNode.start_cluster_server(ctx, 1, args.rdma) - - # Create generator for Spark data feed - tf_feed = TFNode.DataFeed(ctx.mgr, args.mode == "train") - - def rdd_generator(): - while not tf_feed.should_stop(): - batch = tf_feed.next_batch(1)[0] - image = numpy.array(batch[0]) - image = image.astype(numpy.float32) / 255.0 - label = numpy.array(batch[1]) - label = label.astype(numpy.int64) - yield (image, label) - - if job_name == "ps": - server.join() - elif job_name == "worker": - - # Assigns ops to the local worker by default. - with tf.device(tf.train.replica_device_setter( - worker_device="/job:worker/task:%d" % task_index, - cluster=cluster)): - - # Dataset for input data - ds = tf.data.Dataset.from_generator(rdd_generator, (tf.float32, tf.float32), (tf.TensorShape([IMAGE_PIXELS * IMAGE_PIXELS]), tf.TensorShape([10]))).batch(args.batch_size) - iterator = ds.make_one_shot_iterator() - x, y_ = iterator.get_next() - - # Variables of the hidden layer - hid_w = tf.Variable(tf.truncated_normal([IMAGE_PIXELS * IMAGE_PIXELS, hidden_units], - stddev=1.0 / IMAGE_PIXELS), name="hid_w") - hid_b = tf.Variable(tf.zeros([hidden_units]), name="hid_b") - tf.summary.histogram("hidden_weights", hid_w) - - # Variables of the softmax layer - sm_w = tf.Variable(tf.truncated_normal([hidden_units, 10], - stddev=1.0 / math.sqrt(hidden_units)), name="sm_w") - sm_b = tf.Variable(tf.zeros([10]), name="sm_b") - tf.summary.histogram("softmax_weights", sm_w) - - # # Placeholders or QueueRunner/Readers for input data - # x = tf.placeholder(tf.float32, [None, IMAGE_PIXELS * IMAGE_PIXELS], name="x") - # y_ = tf.placeholder(tf.float32, [None, 10], name="y_") - - x_img = tf.reshape(x, [-1, IMAGE_PIXELS, IMAGE_PIXELS, 1]) - tf.summary.image("x_img", x_img) - - hid_lin = tf.nn.xw_plus_b(x, hid_w, hid_b) - hid = tf.nn.relu(hid_lin) - - y = tf.nn.softmax(tf.nn.xw_plus_b(hid, sm_w, sm_b)) - - global_step = tf.Variable(0) - - loss = -tf.reduce_sum(y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0))) - tf.summary.scalar("loss", loss) - - train_op = tf.train.AdagradOptimizer(0.01).minimize( - loss, global_step=global_step) - - # Test trained model - label = tf.argmax(y_, 1, name="label") - prediction = tf.argmax(y, 1, name="prediction") - correct_prediction = tf.equal(prediction, label) - - accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="accuracy") - tf.summary.scalar("acc", accuracy) - - saver = tf.train.Saver() - summary_op = tf.summary.merge_all() - init_op = tf.global_variables_initializer() - - # Create a "supervisor", which oversees the training process and stores model state into HDFS - logdir = TFNode.hdfs_path(ctx, args.model) - print("tensorflow model path: {0}".format(logdir)) - summary_writer = tf.summary.FileWriter("tensorboard_%d" % worker_num, graph=tf.get_default_graph()) - - if args.mode == "train": - sv = tf.train.Supervisor(is_chief=(task_index == 0), - logdir=logdir, - init_op=init_op, - summary_op=None, - saver=saver, - global_step=global_step, - stop_grace_secs=300, - save_model_secs=10) - else: - sv = tf.train.Supervisor(is_chief=(task_index == 0), - logdir=logdir, - summary_op=None, - saver=saver, - global_step=global_step, - stop_grace_secs=300, - save_model_secs=0) - - # The supervisor takes care of session initialization, restoring from - # a checkpoint, and closing when done or an error occurs. - with sv.managed_session(server.target) as sess: - print("{0} session ready".format(datetime.now().isoformat())) - - # Loop until the supervisor shuts down or 1000000 steps have completed. - step = 0 - while not sv.should_stop() and not tf_feed.should_stop() and step < args.steps: - # Run a training step asynchronously. - # See `tf.train.SyncReplicasOptimizer` for additional details on how to - # perform *synchronous* training. - - if args.mode == "train": - _, summary, step = sess.run([train_op, summary_op, global_step]) - # print accuracy and save model checkpoint to HDFS every 100 steps - if (step % 100 == 0): - print("{0} step: {1} accuracy: {2}".format(datetime.now().isoformat(), step, sess.run(accuracy))) - - if sv.is_chief: - summary_writer.add_summary(summary, step) - else: # args.mode == "inference" - labels, preds, acc = sess.run([label, prediction, accuracy]) - - results = ["{0} Label: {1}, Prediction: {2}".format(datetime.now().isoformat(), l, p) for l, p in zip(labels, preds)] - tf_feed.batch_results(results) - print("acc: {0}".format(acc)) - - if sv.should_stop() or step >= args.steps: - tf_feed.terminate() - - # Ask for all the services to stop. - print("{0} stopping supervisor".format(datetime.now().isoformat())) - sv.stop() diff --git a/examples/mnist/spark/mnist_spark.py b/examples/mnist/spark/mnist_spark.py index c2757e1f..730f0b66 100755 --- a/examples/mnist/spark/mnist_spark.py +++ b/examples/mnist/spark/mnist_spark.py @@ -25,7 +25,6 @@ parser = argparse.ArgumentParser() parser.add_argument("--batch_size", help="number of records per batch", type=int, default=100) parser.add_argument("--epochs", help="number of epochs", type=int, default=1) -parser.add_argument("--export_dir", help="HDFS path to export saved_model", default="mnist_export") parser.add_argument("--format", help="example format: (csv|tfr)", choices=["csv", "tfr"], default="csv") parser.add_argument("--images", help="HDFS path to MNIST images in parallelized format") parser.add_argument("--labels", help="HDFS path to MNIST labels in parallelized format") @@ -56,23 +55,18 @@ def toNumpy(bytestr): return (image, label) dataRDD = images.map(lambda x: toNumpy(bytes(x[0]))) -else: # "csv" - print("zipping images and labels") - # If partitions of images/labels don't match, you can use the following code: - # images = sc.textFile(args.images).map(lambda ln: [int(x) for x in ln.split(',')]).zipWithIndex().map(lambda x: (x[1], x[0])) - # labels = sc.textFile(args.labels).map(lambda ln: [float(x) for x in ln.split(',')]).zipWithIndex().map(lambda x: (x[1], x[0])) - # dataRDD = images.join(labels).map(lambda x: (x[1][0], x[1][1])) +else: # args.format == "csv": images = sc.textFile(args.images).map(lambda ln: [int(x) for x in ln.split(',')]) labels = sc.textFile(args.labels).map(lambda ln: [float(x) for x in ln.split(',')]) + print("zipping images and labels") dataRDD = images.zip(labels) -cluster = TFCluster.run(sc, mnist_dist.map_fun, args, args.cluster_size, num_ps, args.tensorboard, TFCluster.InputMode.SPARK, log_dir=args.model) +cluster = TFCluster.run(sc, mnist_dist.map_fun, args, args.cluster_size, num_ps, args.tensorboard, TFCluster.InputMode.SPARK) if args.mode == "train": cluster.train(dataRDD, args.epochs) -else: # inference - predRDD = cluster.inference(dataRDD) - predRDD.saveAsTextFile(args.output) - -cluster.shutdown(grace_secs=30) +else: + labelRDD = cluster.inference(dataRDD) + labelRDD.saveAsTextFile(args.output) +cluster.shutdown() print("{0} ===== Stop".format(datetime.now().isoformat())) diff --git a/examples/mnist/spark/mnist_spark_dataset.py b/examples/mnist/spark/mnist_spark_dataset.py deleted file mode 100755 index ef6e23bd..00000000 --- a/examples/mnist/spark/mnist_spark_dataset.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright 2017 Yahoo Inc. -# Licensed under the terms of the Apache 2.0 license. -# Please see LICENSE file in the project root for terms. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from pyspark.context import SparkContext -from pyspark.conf import SparkConf - -import argparse -import numpy -import tensorflow as tf -from datetime import datetime - -from tensorflowonspark import TFCluster -import mnist_dist_dataset - -sc = SparkContext(conf=SparkConf().setAppName("mnist_spark")) -executors = sc._conf.get("spark.executor.instances") -num_executors = int(executors) if executors is not None else 1 -num_ps = 1 - -parser = argparse.ArgumentParser() -parser.add_argument("--batch_size", help="number of records per batch", type=int, default=100) -parser.add_argument("--epochs", help="number of epochs", type=int, default=1) -parser.add_argument("--format", help="example format: (csv|tfr)", choices=["csv", "tfr"], default="csv") -parser.add_argument("--images", help="HDFS path to MNIST images in parallelized format") -parser.add_argument("--labels", help="HDFS path to MNIST labels in parallelized format") -parser.add_argument("--model", help="HDFS path to save/load model during train/inference", default="mnist_model") -parser.add_argument("--cluster_size", help="number of nodes in the cluster", type=int, default=num_executors) -parser.add_argument("--output", help="HDFS path to save test/inference output", default="predictions") -parser.add_argument("--readers", help="number of reader/enqueue threads", type=int, default=1) -parser.add_argument("--steps", help="maximum number of steps", type=int, default=1000) -parser.add_argument("--tensorboard", help="launch tensorboard process", action="store_true") -parser.add_argument("--mode", help="train|inference", default="train") -parser.add_argument("--rdma", help="use rdma connection", default=False) -args = parser.parse_args() -print("args:", args) - -print("{0} ===== Start".format(datetime.now().isoformat())) - -if args.format == "tfr": - images = sc.newAPIHadoopFile(args.images, "org.tensorflow.hadoop.io.TFRecordFileInputFormat", - keyClass="org.apache.hadoop.io.BytesWritable", - valueClass="org.apache.hadoop.io.NullWritable") - - def toNumpy(bytestr): - example = tf.train.Example() - example.ParseFromString(bytestr) - features = example.features.feature - image = numpy.array(features['image'].int64_list.value) - label = numpy.array(features['label'].int64_list.value) - return (image, label) - - dataRDD = images.map(lambda x: toNumpy(bytes(x[0]))) -else: # args.format == "csv": - images = sc.textFile(args.images).map(lambda ln: [int(x) for x in ln.split(',')]) - labels = sc.textFile(args.labels).map(lambda ln: [float(x) for x in ln.split(',')]) - print("zipping images and labels") - dataRDD = images.zip(labels) - -cluster = TFCluster.run(sc, mnist_dist_dataset.map_fun, args, args.cluster_size, num_ps, args.tensorboard, TFCluster.InputMode.SPARK) -if args.mode == "train": - cluster.train(dataRDD, args.epochs) -else: - labelRDD = cluster.inference(dataRDD) - labelRDD.saveAsTextFile(args.output) -cluster.shutdown() - -print("{0} ===== Stop".format(datetime.now().isoformat())) diff --git a/examples/mnist/tf/mnist_dist.py b/examples/mnist/tf/mnist_dist.py index 5048a483..2b1bdb1f 100644 --- a/examples/mnist/tf/mnist_dist.py +++ b/examples/mnist/tf/mnist_dist.py @@ -21,15 +21,10 @@ def map_fun(args, ctx): import tensorflow as tf import time + num_workers = len(ctx.cluster_spec['worker']) worker_num = ctx.worker_num job_name = ctx.job_name task_index = ctx.task_index - cluster_spec = ctx.cluster_spec - num_workers = len(cluster_spec['worker']) - - # Delay PS nodes a bit, since workers seem to reserve GPUs more quickly/reliably (w/o conflict) - if job_name == "ps": - time.sleep((worker_num + 1) * 5) # Parameters IMAGE_PIXELS = 28 @@ -38,70 +33,26 @@ def map_fun(args, ctx): # Get TF cluster and server instances cluster, server = ctx.start_cluster_server(1, args.rdma) - def read_csv_examples(image_dir, label_dir, batch_size=100, num_epochs=None, task_index=None, num_workers=None): - print_log(worker_num, "num_epochs: {0}".format(num_epochs)) - # Setup queue of csv image filenames - csv_file_pattern = os.path.join(image_dir, 'part-*') - images = tf.gfile.Glob(csv_file_pattern) - print_log(worker_num, "images: {0}".format(images)) - image_queue = tf.train.string_input_producer(images, shuffle=False, capacity=1000, num_epochs=num_epochs, name="image_queue") - - # Setup queue of csv label filenames - csv_file_pattern = os.path.join(label_dir, 'part-*') - labels = tf.gfile.Glob(csv_file_pattern) - print_log(worker_num, "labels: {0}".format(labels)) - label_queue = tf.train.string_input_producer(labels, shuffle=False, capacity=1000, num_epochs=num_epochs, name="label_queue") - - # Setup reader for image queue - img_reader = tf.TextLineReader(name="img_reader") - _, img_csv = img_reader.read(image_queue) - image_defaults = [[1.0] for col in range(784)] - img = tf.stack(tf.decode_csv(img_csv, image_defaults)) - # Normalize values to [0,1] + def _parse_csv(ln): + splits = tf.string_split([ln], delimiter='|') + lbl = splits.values[0] + img = splits.values[1] + image_defaults = [[0.0] for col in range(IMAGE_PIXELS * IMAGE_PIXELS)] + image = tf.stack(tf.decode_csv(img, record_defaults=image_defaults)) norm = tf.constant(255, dtype=tf.float32, shape=(784,)) - image = tf.div(img, norm) - print_log(worker_num, "image: {0}".format(image)) - - # Setup reader for label queue - label_reader = tf.TextLineReader(name="label_reader") - _, label_csv = label_reader.read(label_queue) - label_defaults = [[1.0] for col in range(10)] - label = tf.stack(tf.decode_csv(label_csv, label_defaults)) - print_log(worker_num, "label: {0}".format(label)) - - # Return a batch of examples - return tf.train.batch([image, label], batch_size, num_threads=args.readers, name="batch_csv") - - def read_tfr_examples(path, batch_size=100, num_epochs=None, task_index=None, num_workers=None): - print_log(worker_num, "num_epochs: {0}".format(num_epochs)) - - # Setup queue of TFRecord filenames - tf_record_pattern = os.path.join(path, 'part-*') - files = tf.gfile.Glob(tf_record_pattern) - queue_name = "file_queue" - - # split input files across workers, if specified - if task_index is not None and num_workers is not None: - num_files = len(files) - files = files[task_index:num_files:num_workers] - queue_name = "file_queue_{0}".format(task_index) - - print_log(worker_num, "files: {0}".format(files)) - file_queue = tf.train.string_input_producer(files, shuffle=False, capacity=1000, num_epochs=num_epochs, name=queue_name) - - # Setup reader for examples - reader = tf.TFRecordReader(name="reader") - _, serialized = reader.read(file_queue) - feature_def = {'label': tf.FixedLenFeature([10], tf.int64), 'image': tf.FixedLenFeature([784], tf.int64)} - features = tf.parse_single_example(serialized, feature_def) + normalized_image = tf.div(image, norm) + label_value = tf.string_to_number(lbl, tf.int32) + label = tf.one_hot(label_value, 10) + return (normalized_image, label) + + def _parse_tfr(example_proto): + feature_def = {"label": tf.FixedLenFeature(10, tf.int64), + "image": tf.FixedLenFeature(IMAGE_PIXELS * IMAGE_PIXELS, tf.int64)} + features = tf.parse_single_example(example_proto, feature_def) norm = tf.constant(255, dtype=tf.float32, shape=(784,)) image = tf.div(tf.to_float(features['image']), norm) - print_log(worker_num, "image: {0}".format(image)) label = tf.to_float(features['label']) - print_log(worker_num, "label: {0}".format(label)) - - # Return a batch of examples - return tf.train.batch([image, label], batch_size, num_threads=args.readers, name="batch") + return (image, label) if job_name == "ps": server.join() @@ -111,6 +62,22 @@ def read_tfr_examples(path, batch_size=100, num_epochs=None, task_index=None, nu worker_device="/job:worker/task:%d" % task_index, cluster=cluster)): + # Dataset for input data + image_dir = ctx.absolute_path(args.images_labels) + file_pattern = os.path.join(image_dir, 'part-*') + + ds = tf.data.Dataset.list_files(file_pattern) + ds = ds.shard(num_workers, task_index).repeat(args.epochs).shuffle(args.shuffle_size) + if args.format == 'csv2': + ds = ds.interleave(tf.data.TextLineDataset, cycle_length=args.readers, block_length=1) + parse_fn = _parse_csv + else: # args.format == 'tfr' + ds = ds.interleave(tf.data.TFRecordDataset, cycle_length=args.readers, block_length=1) + parse_fn = _parse_tfr + ds = ds.map(parse_fn).batch(args.batch_size) + iterator = ds.make_one_shot_iterator() + x, y_ = iterator.get_next() + # Variables of the hidden layer hid_w = tf.Variable(tf.truncated_normal([IMAGE_PIXELS * IMAGE_PIXELS, hidden_units], stddev=1.0 / IMAGE_PIXELS), name="hid_w") @@ -123,21 +90,6 @@ def read_tfr_examples(path, batch_size=100, num_epochs=None, task_index=None, nu sm_b = tf.Variable(tf.zeros([10]), name="sm_b") tf.summary.histogram("softmax_weights", sm_w) - # Placeholders or QueueRunner/Readers for input data - num_epochs = 1 if args.mode == "inference" else None if args.epochs == 0 else args.epochs - index = task_index if args.mode == "inference" else None - workers = num_workers if args.mode == "inference" else None - - if args.format == "csv": - images = ctx.absolute_path(args.images) - labels = ctx.absolute_path(args.labels) - x, y_ = read_csv_examples(images, labels, 100, num_epochs, index, workers) - elif args.format == "tfr": - images = ctx.absolute_path(args.images) - x, y_ = read_tfr_examples(images, 100, num_epochs, index, workers) - else: - raise("{0} format not supported for tf input mode".format(args.format)) - x_img = tf.reshape(x, [-1, IMAGE_PIXELS, IMAGE_PIXELS, 1]) tf.summary.image("x_img", x_img) @@ -146,7 +98,7 @@ def read_tfr_examples(path, batch_size=100, num_epochs=None, task_index=None, nu y = tf.nn.softmax(tf.nn.xw_plus_b(hid, sm_w, sm_b)) - global_step = tf.Variable(0) + global_step = tf.train.get_or_create_global_step() loss = -tf.reduce_sum(y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0))) tf.summary.scalar("loss", loss) @@ -167,67 +119,62 @@ def read_tfr_examples(path, batch_size=100, num_epochs=None, task_index=None, nu # Create a "supervisor", which oversees the training process and stores model state into HDFS logdir = ctx.absolute_path(args.model) print("tensorflow model path: {0}".format(logdir)) + summary_writer = tf.summary.FileWriter("tensorboard_%d" % worker_num, graph=tf.get_default_graph()) - if job_name == "worker" and task_index == 0: - summary_writer = tf.summary.FileWriter(logdir, graph=tf.get_default_graph()) - - if args.mode == "train": - sv = tf.train.Supervisor(is_chief=(task_index == 0), - logdir=logdir, - init_op=init_op, - summary_op=None, - summary_writer=None, - saver=saver, - global_step=global_step, - stop_grace_secs=300, - save_model_secs=10) - else: - sv = tf.train.Supervisor(is_chief=(task_index == 0), - logdir=logdir, - summary_op=None, - saver=saver, - global_step=global_step, - stop_grace_secs=300, - save_model_secs=0) + if args.mode == 'inference': output_dir = ctx.absolute_path(args.output) - output_file = tf.gfile.Open("{0}/part-{1:05d}".format(output_dir, worker_num), mode='w') - - # The supervisor takes care of session initialization, restoring from - # a checkpoint, and closing when done or an error occurs. - with sv.managed_session(server.target) as sess: - print("{0} session ready".format(datetime.now().isoformat())) - - # Loop until the supervisor shuts down or 1000000 steps have completed. + print("output_dir: {}".format(output_dir)) + tf.gfile.MkDir(output_dir) + output_file = tf.gfile.Open("{}/part-{:05d}".format(output_dir, task_index), mode='w') + + with tf.train.MonitoredTrainingSession(master=server.target, + is_chief=(task_index == 0), + scaffold=tf.train.Scaffold(init_op=init_op, summary_op=summary_op, saver=saver), + checkpoint_dir=logdir, + hooks=[tf.train.StopAtStepHook(last_step=args.steps)]) as sess: + print("{} session ready".format(datetime.now().isoformat())) + + # Loop until the session shuts down step = 0 count = 0 - while not sv.should_stop() and step < args.steps: + while not sess.should_stop(): + # Run a training step asynchronously. # See `tf.train.SyncReplicasOptimizer` for additional details on how to # perform *synchronous* training. - # using QueueRunners/Readers if args.mode == "train": if (step % 100 == 0): - print("{0} step: {1} accuracy: {2}".format(datetime.now().isoformat(), step, sess.run(accuracy))) + print("{} step: {} accuracy: {}".format(datetime.now().isoformat(), step, sess.run(accuracy))) _, summary, step = sess.run([train_op, summary_op, global_step]) - if sv.is_chief: + if task_index == 0: summary_writer.add_summary(summary, step) else: # args.mode == "inference" labels, pred, acc = sess.run([label, prediction, accuracy]) # print("label: {0}, pred: {1}".format(labels, pred)) - print("acc: {0}".format(acc)) + print("acc: {}".format(acc)) for i in range(len(labels)): count += 1 - output_file.write("{0} {1}\n".format(labels[i], pred[i])) - print("count: {0}".format(count)) + output_file.write("{} {}\n".format(labels[i], pred[i])) + print("count: {}".format(count)) - if args.mode == "inference": + if args.mode == 'inference': output_file.close() - # Delay chief worker from shutting down supervisor during inference, since it can load model, start session, - # run inference and request stop before the other workers even start/sync their sessions. - if task_index == 0: - time.sleep(60) - - # Ask for all the services to stop. - print("{0} stopping supervisor".format(datetime.now().isoformat())) - sv.stop() + + print("{} stopping MonitoredTrainingSession".format(datetime.now().isoformat())) + + # WORKAROUND for https://github.com/tensorflow/tensorflow/issues/21745 + # wait for all other nodes to complete (via done files) + done_dir = "{}/{}/done".format(ctx.absolute_path(args.model), args.mode) + print("Writing done file to: {}".format(done_dir)) + tf.gfile.MakeDirs(done_dir) + with tf.gfile.GFile("{}/{}".format(done_dir, ctx.task_index), 'w') as done_file: + done_file.write("done") + + for i in range(60): + if len(tf.gfile.ListDirectory(done_dir)) < len(ctx.cluster_spec['worker']): + print("{} Waiting for other nodes {}".format(datetime.now().isoformat(), i)) + time.sleep(1) + else: + print("{} All nodes done".format(datetime.now().isoformat())) + break diff --git a/examples/mnist/tf/mnist_dist_dataset.py b/examples/mnist/tf/mnist_dist_dataset.py deleted file mode 100644 index d85ea35d..00000000 --- a/examples/mnist/tf/mnist_dist_dataset.py +++ /dev/null @@ -1,185 +0,0 @@ -# Copyright 2017 Yahoo Inc. -# Licensed under the terms of the Apache 2.0 license. -# Please see LICENSE file in the project root for terms. - -# Distributed MNIST on grid based on TensorFlow MNIST example - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -def print_log(worker_num, arg): - print("%d: " % worker_num, end=" ") - print(arg) - - -def map_fun(args, ctx): - from tensorflowonspark import TFNode - from datetime import datetime - import math - import os - import tensorflow as tf - import time - - num_workers = args.cluster_size if args.driver_ps_nodes else args.cluster_size - args.num_ps - worker_num = ctx.worker_num - job_name = ctx.job_name - task_index = ctx.task_index - - # Parameters - IMAGE_PIXELS = 28 - hidden_units = 128 - - # Get TF cluster and server instances - cluster, server = TFNode.start_cluster_server(ctx, 1, args.rdma) - - def _parse_csv(ln): - splits = tf.string_split([ln], delimiter='|') - lbl = splits.values[0] - img = splits.values[1] - image_defaults = [[0.0] for col in range(IMAGE_PIXELS * IMAGE_PIXELS)] - image = tf.stack(tf.decode_csv(img, record_defaults=image_defaults)) - norm = tf.constant(255, dtype=tf.float32, shape=(784,)) - normalized_image = tf.div(image, norm) - label_value = tf.string_to_number(lbl, tf.int32) - label = tf.one_hot(label_value, 10) - return (normalized_image, label) - - def _parse_tfr(example_proto): - feature_def = {"label": tf.FixedLenFeature(10, tf.int64), - "image": tf.FixedLenFeature(IMAGE_PIXELS * IMAGE_PIXELS, tf.int64)} - features = tf.parse_single_example(example_proto, feature_def) - norm = tf.constant(255, dtype=tf.float32, shape=(784,)) - image = tf.div(tf.to_float(features['image']), norm) - label = tf.to_float(features['label']) - return (image, label) - - if job_name == "ps": - server.join() - elif job_name == "worker": - # Assigns ops to the local worker by default. - with tf.device(tf.train.replica_device_setter( - worker_device="/job:worker/task:%d" % task_index, - cluster=cluster)): - - # Dataset for input data - image_dir = TFNode.hdfs_path(ctx, args.images_labels) - file_pattern = os.path.join(image_dir, 'part-*') - - ds = tf.data.Dataset.list_files(file_pattern) - ds = ds.shard(num_workers, task_index).repeat(args.epochs).shuffle(args.shuffle_size) - if args.format == 'csv2': - ds = ds.interleave(tf.data.TextLineDataset, cycle_length=args.readers, block_length=1) - parse_fn = _parse_csv - else: # args.format == 'tfr' - ds = ds.interleave(tf.data.TFRecordDataset, cycle_length=args.readers, block_length=1) - parse_fn = _parse_tfr - ds = ds.map(parse_fn).batch(args.batch_size) - iterator = ds.make_initializable_iterator() - x, y_ = iterator.get_next() - - # Variables of the hidden layer - hid_w = tf.Variable(tf.truncated_normal([IMAGE_PIXELS * IMAGE_PIXELS, hidden_units], - stddev=1.0 / IMAGE_PIXELS), name="hid_w") - hid_b = tf.Variable(tf.zeros([hidden_units]), name="hid_b") - tf.summary.histogram("hidden_weights", hid_w) - - # Variables of the softmax layer - sm_w = tf.Variable(tf.truncated_normal([hidden_units, 10], - stddev=1.0 / math.sqrt(hidden_units)), name="sm_w") - sm_b = tf.Variable(tf.zeros([10]), name="sm_b") - tf.summary.histogram("softmax_weights", sm_w) - - x_img = tf.reshape(x, [-1, IMAGE_PIXELS, IMAGE_PIXELS, 1]) - tf.summary.image("x_img", x_img) - - hid_lin = tf.nn.xw_plus_b(x, hid_w, hid_b) - hid = tf.nn.relu(hid_lin) - - y = tf.nn.softmax(tf.nn.xw_plus_b(hid, sm_w, sm_b)) - - global_step = tf.Variable(0) - - loss = -tf.reduce_sum(y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0))) - tf.summary.scalar("loss", loss) - train_op = tf.train.AdagradOptimizer(0.01).minimize( - loss, global_step=global_step) - - # Test trained model - label = tf.argmax(y_, 1, name="label") - prediction = tf.argmax(y, 1, name="prediction") - correct_prediction = tf.equal(prediction, label) - accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="accuracy") - tf.summary.scalar("acc", accuracy) - - saver = tf.train.Saver() - summary_op = tf.summary.merge_all() - init_op = tf.global_variables_initializer() - - # Create a "supervisor", which oversees the training process and stores model state into HDFS - logdir = TFNode.hdfs_path(ctx, args.model) - print("tensorflow model path: {0}".format(logdir)) - summary_writer = tf.summary.FileWriter("tensorboard_%d" % worker_num, graph=tf.get_default_graph()) - - if args.mode == "train": - sv = tf.train.Supervisor(is_chief=(task_index == 0), - logdir=logdir, - init_op=init_op, - summary_op=None, - saver=saver, - global_step=global_step, - stop_grace_secs=300, - save_model_secs=10) - else: - sv = tf.train.Supervisor(is_chief=(task_index == 0), - logdir=logdir, - summary_op=None, - saver=saver, - global_step=global_step, - stop_grace_secs=300, - save_model_secs=0) - output_dir = TFNode.hdfs_path(ctx, args.output) - tf.gfile.MkDir(output_dir) - output_file = tf.gfile.Open("{0}/part-{1:05d}".format(output_dir, worker_num), mode='w') - - # The supervisor takes care of session initialization, restoring from - # a checkpoint, and closing when done or an error occurs. - with sv.managed_session(server.target) as sess: - print("{0} session ready".format(datetime.now().isoformat())) - - # Loop until the supervisor shuts down or 1000000 steps have completed. - sess.run(iterator.initializer) - step = 0 - count = 0 - while not sv.should_stop() and step < args.steps: - - # Run a training step asynchronously. - # See `tf.train.SyncReplicasOptimizer` for additional details on how to - # perform *synchronous* training. - - if args.mode == "train": - if (step % 100 == 0): - print("{0} step: {1} accuracy: {2}".format(datetime.now().isoformat(), step, sess.run(accuracy))) - _, summary, step = sess.run([train_op, summary_op, global_step]) - if sv.is_chief: - summary_writer.add_summary(summary, step) - else: # args.mode == "inference" - labels, pred, acc = sess.run([label, prediction, accuracy]) - # print("label: {0}, pred: {1}".format(labels, pred)) - print("acc: {0}".format(acc)) - for i in range(len(labels)): - count += 1 - output_file.write("{0} {1}\n".format(labels[i], pred[i])) - print("count: {0}".format(count)) - - if args.mode == "inference": - output_file.close() - # Delay chief worker from shutting down supervisor during inference, since it can load model, start session, - # run inference and request stop before the other workers even start/sync their sessions. - if task_index == 0: - time.sleep(60) - - # Ask for all the services to stop. - print("{0} stopping supervisor".format(datetime.now().isoformat())) - sv.stop() diff --git a/examples/mnist/tf/mnist_spark.py b/examples/mnist/tf/mnist_spark.py index e217a20a..2a25d6e3 100644 --- a/examples/mnist/tf/mnist_spark.py +++ b/examples/mnist/tf/mnist_spark.py @@ -21,24 +21,27 @@ num_ps = 1 parser = argparse.ArgumentParser() -parser.add_argument("--epochs", help="number of epochs", type=int, default=0) -parser.add_argument("--format", help="example format: (csv|pickle|tfr)", choices=["csv", "pickle", "tfr"], default="tfr") -parser.add_argument("--images", help="HDFS path to MNIST images in parallelized format") -parser.add_argument("--labels", help="HDFS path to MNIST labels in parallelized format") -parser.add_argument("--model", help="HDFS path to save/load model during train/test", default="mnist_model") +parser.add_argument("--batch_size", help="number of records per batch", type=int, default=100) parser.add_argument("--cluster_size", help="number of nodes in the cluster (for Spark Standalone)", type=int, default=num_executors) +parser.add_argument("--driver_ps_nodes", help="""run tensorflow PS node on driver locally. + You will need to set cluster_size = num_executors + num_ps""", default=False) +parser.add_argument("--epochs", help="number of epochs", type=int, default=1) +parser.add_argument("--format", help="example format: (csv2|tfr)", choices=["csv2", "tfr"], default="tfr") +parser.add_argument("--images_labels", help="HDFS path to MNIST image_label files in parallelized format") +parser.add_argument("--mode", help="train|inference", default="train") +parser.add_argument("--model", help="HDFS path to save/load model during train/test", default="mnist_model") +parser.add_argument("--num_ps", help="number of ps nodes", default=1) parser.add_argument("--output", help="HDFS path to save test/inference output", default="predictions") -parser.add_argument("--readers", help="number of reader/enqueue threads", type=int, default=1) +parser.add_argument("--rdma", help="use rdma connection", default=False) +parser.add_argument("--readers", help="number of reader/enqueue threads per worker", type=int, default=10) +parser.add_argument("--shuffle_size", help="size of shuffle buffer", type=int, default=1000) parser.add_argument("--steps", help="maximum number of steps", type=int, default=1000) parser.add_argument("--tensorboard", help="launch tensorboard process", action="store_true") -parser.add_argument("--mode", help="train|inference", default="train") -parser.add_argument("--rdma", help="use rdma connection", default=False) -parser.add_argument("--driver_ps_nodes", help="run tensorflow PS node on driver locally", default=False) args = parser.parse_args() print("args:", args) print("{0} ===== Start".format(datetime.now().isoformat())) -cluster = TFCluster.run(sc, mnist_dist.map_fun, args, args.cluster_size, num_ps, args.tensorboard, TFCluster.InputMode.TENSORFLOW, - driver_ps_nodes=args.driver_ps_nodes, log_dir=args.model) +cluster = TFCluster.run(sc, mnist_dist.map_fun, args, args.cluster_size, args.num_ps, args.tensorboard, + TFCluster.InputMode.TENSORFLOW, driver_ps_nodes=args.driver_ps_nodes) cluster.shutdown() print("{0} ===== Stop".format(datetime.now().isoformat())) diff --git a/examples/mnist/tf/mnist_spark_dataset.py b/examples/mnist/tf/mnist_spark_dataset.py deleted file mode 100644 index 8a03ab15..00000000 --- a/examples/mnist/tf/mnist_spark_dataset.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright 2017 Yahoo Inc. -# Licensed under the terms of the Apache 2.0 license. -# Please see LICENSE file in the project root for terms. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from pyspark.context import SparkContext -from pyspark.conf import SparkConf - -import argparse -from datetime import datetime - -from tensorflowonspark import TFCluster -import mnist_dist_dataset - -sc = SparkContext(conf=SparkConf().setAppName("mnist_tf")) -executors = sc._conf.get("spark.executor.instances") -num_executors = int(executors) if executors is not None else 1 -num_ps = 1 - -parser = argparse.ArgumentParser() -parser.add_argument("--batch_size", help="number of records per batch", type=int, default=100) -parser.add_argument("--cluster_size", help="number of nodes in the cluster (for Spark Standalone)", type=int, default=num_executors) -parser.add_argument("--driver_ps_nodes", help="""run tensorflow PS node on driver locally. - You will need to set cluster_size = num_executors + num_ps""", default=False) -parser.add_argument("--epochs", help="number of epochs", type=int, default=1) -parser.add_argument("--format", help="example format: (csv2|tfr)", choices=["csv2", "tfr"], default="tfr") -parser.add_argument("--images_labels", help="HDFS path to MNIST image_label files in parallelized format") -parser.add_argument("--mode", help="train|inference", default="train") -parser.add_argument("--model", help="HDFS path to save/load model during train/test", default="mnist_model") -parser.add_argument("--num_ps", help="number of ps nodes", default=1) -parser.add_argument("--output", help="HDFS path to save test/inference output", default="predictions") -parser.add_argument("--rdma", help="use rdma connection", default=False) -parser.add_argument("--readers", help="number of reader/enqueue threads per worker", type=int, default=10) -parser.add_argument("--shuffle_size", help="size of shuffle buffer", type=int, default=1000) -parser.add_argument("--steps", help="maximum number of steps", type=int, default=1000) -parser.add_argument("--tensorboard", help="launch tensorboard process", action="store_true") -args = parser.parse_args() -print("args:", args) - -print("{0} ===== Start".format(datetime.now().isoformat())) -cluster = TFCluster.run(sc, mnist_dist_dataset.map_fun, args, args.cluster_size, args.num_ps, args.tensorboard, - TFCluster.InputMode.TENSORFLOW, driver_ps_nodes=args.driver_ps_nodes) -cluster.shutdown() -print("{0} ===== Stop".format(datetime.now().isoformat())) diff --git a/examples/slim/BUILD b/examples/slim/BUILD deleted file mode 100644 index e0f39d2a..00000000 --- a/examples/slim/BUILD +++ /dev/null @@ -1,371 +0,0 @@ -# Description: -# Contains files for loading, training and evaluating TF-Slim-based models. - -package(default_visibility = [":internal"]) - -licenses(["notice"]) # Apache 2.0 - -exports_files(["LICENSE"]) - -package_group(name = "internal") - -py_library( - name = "dataset_utils", - srcs = ["datasets/dataset_utils.py"], -) - -py_library( - name = "download_and_convert_cifar10", - srcs = ["datasets/download_and_convert_cifar10.py"], - deps = [":dataset_utils"], -) - -py_library( - name = "download_and_convert_flowers", - srcs = ["datasets/download_and_convert_flowers.py"], - deps = [":dataset_utils"], -) - -py_library( - name = "download_and_convert_mnist", - srcs = ["datasets/download_and_convert_mnist.py"], - deps = [":dataset_utils"], -) - -py_binary( - name = "download_and_convert_data", - srcs = ["download_and_convert_data.py"], - deps = [ - ":download_and_convert_cifar10", - ":download_and_convert_flowers", - ":download_and_convert_mnist", - ], -) - -py_binary( - name = "cifar10", - srcs = ["datasets/cifar10.py"], - deps = [":dataset_utils"], -) - -py_binary( - name = "flowers", - srcs = ["datasets/flowers.py"], - deps = [":dataset_utils"], -) - -py_binary( - name = "imagenet", - srcs = ["datasets/imagenet.py"], - deps = [":dataset_utils"], -) - -py_binary( - name = "mnist", - srcs = ["datasets/mnist.py"], - deps = [":dataset_utils"], -) - -py_library( - name = "dataset_factory", - srcs = ["datasets/dataset_factory.py"], - deps = [ - ":cifar10", - ":flowers", - ":imagenet", - ":mnist", - ], -) - -py_library( - name = "model_deploy", - srcs = ["deployment/model_deploy.py"], -) - -py_test( - name = "model_deploy_test", - srcs = ["deployment/model_deploy_test.py"], - srcs_version = "PY2AND3", - deps = [":model_deploy"], -) - -py_library( - name = "cifarnet_preprocessing", - srcs = ["preprocessing/cifarnet_preprocessing.py"], -) - -py_library( - name = "inception_preprocessing", - srcs = ["preprocessing/inception_preprocessing.py"], -) - -py_library( - name = "lenet_preprocessing", - srcs = ["preprocessing/lenet_preprocessing.py"], -) - -py_library( - name = "vgg_preprocessing", - srcs = ["preprocessing/vgg_preprocessing.py"], -) - -py_library( - name = "preprocessing_factory", - srcs = ["preprocessing/preprocessing_factory.py"], - deps = [ - ":cifarnet_preprocessing", - ":inception_preprocessing", - ":lenet_preprocessing", - ":vgg_preprocessing", - ], -) - -# Typical networks definitions. - -py_library( - name = "nets", - deps = [ - ":alexnet", - ":cifarnet", - ":inception", - ":lenet", - ":overfeat", - ":resnet_v1", - ":resnet_v2", - ":vgg", - ], -) - -py_library( - name = "alexnet", - srcs = ["nets/alexnet.py"], - srcs_version = "PY2AND3", -) - -py_test( - name = "alexnet_test", - size = "medium", - srcs = ["nets/alexnet_test.py"], - srcs_version = "PY2AND3", - deps = [":alexnet"], -) - -py_library( - name = "cifarnet", - srcs = ["nets/cifarnet.py"], -) - -py_library( - name = "inception", - srcs = ["nets/inception.py"], - srcs_version = "PY2AND3", - deps = [ - ":inception_resnet_v2", - ":inception_v1", - ":inception_v2", - ":inception_v3", - ":inception_v4", - ], -) - -py_library( - name = "inception_utils", - srcs = ["nets/inception_utils.py"], - srcs_version = "PY2AND3", -) - -py_library( - name = "inception_v1", - srcs = ["nets/inception_v1.py"], - srcs_version = "PY2AND3", - deps = [ - ":inception_utils", - ], -) - -py_library( - name = "inception_v2", - srcs = ["nets/inception_v2.py"], - srcs_version = "PY2AND3", - deps = [ - ":inception_utils", - ], -) - -py_library( - name = "inception_v3", - srcs = ["nets/inception_v3.py"], - srcs_version = "PY2AND3", - deps = [ - ":inception_utils", - ], -) - -py_library( - name = "inception_v4", - srcs = ["nets/inception_v4.py"], - srcs_version = "PY2AND3", - deps = [ - ":inception_utils", - ], -) - -py_library( - name = "inception_resnet_v2", - srcs = ["nets/inception_resnet_v2.py"], - srcs_version = "PY2AND3", -) - -py_test( - name = "inception_v1_test", - size = "large", - srcs = ["nets/inception_v1_test.py"], - shard_count = 3, - srcs_version = "PY2AND3", - deps = [":inception"], -) - -py_test( - name = "inception_v2_test", - size = "large", - srcs = ["nets/inception_v2_test.py"], - shard_count = 3, - srcs_version = "PY2AND3", - deps = [":inception"], -) - -py_test( - name = "inception_v3_test", - size = "large", - srcs = ["nets/inception_v3_test.py"], - shard_count = 3, - srcs_version = "PY2AND3", - deps = [":inception"], -) - -py_test( - name = "inception_v4_test", - size = "large", - srcs = ["nets/inception_v4_test.py"], - shard_count = 3, - srcs_version = "PY2AND3", - deps = [":inception"], -) - -py_test( - name = "inception_resnet_v2_test", - size = "large", - srcs = ["nets/inception_resnet_v2_test.py"], - shard_count = 3, - srcs_version = "PY2AND3", - deps = [":inception"], -) - -py_library( - name = "lenet", - srcs = ["nets/lenet.py"], -) - -py_library( - name = "overfeat", - srcs = ["nets/overfeat.py"], - srcs_version = "PY2AND3", -) - -py_test( - name = "overfeat_test", - size = "medium", - srcs = ["nets/overfeat_test.py"], - srcs_version = "PY2AND3", - deps = [":overfeat"], -) - -py_library( - name = "resnet_utils", - srcs = ["nets/resnet_utils.py"], - srcs_version = "PY2AND3", -) - -py_library( - name = "resnet_v1", - srcs = ["nets/resnet_v1.py"], - srcs_version = "PY2AND3", - deps = [ - ":resnet_utils", - ], -) - -py_test( - name = "resnet_v1_test", - size = "medium", - srcs = ["nets/resnet_v1_test.py"], - srcs_version = "PY2AND3", - deps = [":resnet_v1"], -) - -py_library( - name = "resnet_v2", - srcs = ["nets/resnet_v2.py"], - srcs_version = "PY2AND3", - deps = [ - ":resnet_utils", - ], -) - -py_test( - name = "resnet_v2_test", - size = "medium", - srcs = ["nets/resnet_v2_test.py"], - srcs_version = "PY2AND3", - deps = [":resnet_v2"], -) - -py_library( - name = "vgg", - srcs = ["nets/vgg.py"], - srcs_version = "PY2AND3", -) - -py_test( - name = "vgg_test", - size = "medium", - srcs = ["nets/vgg_test.py"], - srcs_version = "PY2AND3", - deps = [":vgg"], -) - -py_library( - name = "nets_factory", - srcs = ["nets/nets_factory.py"], - deps = [":nets"], -) - -py_test( - name = "nets_factory_test", - size = "medium", - srcs = ["nets/nets_factory_test.py"], - srcs_version = "PY2AND3", - deps = [":nets_factory"], -) - -py_binary( - name = "train_image_classifier", - srcs = ["train_image_classifier.py"], - deps = [ - ":dataset_factory", - ":model_deploy", - ":nets_factory", - ":preprocessing_factory", - ], -) - -py_binary( - name = "eval_image_classifier", - srcs = ["eval_image_classifier.py"], - deps = [ - ":dataset_factory", - ":model_deploy", - ":nets_factory", - ":preprocessing_factory", - ], -) diff --git a/examples/slim/README.md b/examples/slim/README.md deleted file mode 100644 index ec4815d0..00000000 --- a/examples/slim/README.md +++ /dev/null @@ -1,74 +0,0 @@ -# TF-Slim Inception - -Original Source: https://github.com/tensorflow/models/tree/master/slim - -This example demonstrates the conversion of a TF-Slim image classification application. - -Please ensure that you have followed [these instructions](https://github.com/yahoo/TensorFlowOnSpark/wiki/GetStarted_YARN) first. -And, you will need to [download an image dataset](https://github.com/tensorflow/models/tree/master/slim) per the original instructions. - -#### Package the code as a Python zip/module - - export TFoS_HOME= - pushd ${TFoS_HOME}/examples/slim; zip -r ~/slim.zip .; popd - -#### Train TF-Slim Classifier - - # set environment variables (if not already done) - export PYTHON_ROOT=~/Python - export PYSPARK_PYTHON=${PYTHON_ROOT}/bin/python - export PATH=${PYTHON_ROOT}/bin/:$PATH - export QUEUE=gpu - export DATASET_DIR= - - # for CPU mode: - # export QUEUE=default - # --conf spark.executorEnv.LD_LIBRARY_PATH="$JAVA_HOME/jre/lib/amd64/server" \ - # remove --driver-library-path - - # hadoop fs -rm -r slim_train - export NUM_GPU=1 - export MEMORY=$((NUM_GPU * 27)) - ${SPARK_HOME}/bin/spark-submit --master yarn --deploy-mode cluster \ - --queue ${QUEUE} \ - --num-executors 3 \ - --executor-memory ${MEMORY}G \ - --py-files ${TFoS_HOME}/tfspark.zip,slim.zip \ - --conf spark.dynamicAllocation.enabled=false \ - --conf spark.yarn.maxAppAttempts=1 \ - --conf spark.ui.view.acls=* \ - --archives hdfs:///user/${USER}/Python.zip#Python \ - --conf spark.executorEnv.LD_LIBRARY_PATH="/usr/local/cuda-7.5/lib64:$JAVA_HOME/jre/lib/amd64/server" \ - --driver-library-path="/usr/local/cuda-7.5/lib64" \ - ${TFoS_HOME}/examples/slim/train_image_classifier.py \ - --dataset_dir ${DATASET_DIR} \ - --train_dir hdfs://default/user/${USER}/slim_train \ - --dataset_name imagenet \ - --dataset_split_name train \ - --model_name inception_v3 \ - --max_number_of_steps 1000 \ - --num_gpus ${NUM_GPU} \ - --batch_size 32 \ - --num_ps_tasks 1 - -#### Evaluate TF-Slim Classifier - - # hadoop fs -rm -r slim_eval - ${SPARK_HOME}/bin/spark-submit --master yarn --deploy-mode cluster \ - --queue ${QUEUE} \ - --num-executors 1 \ - --executor-memory 27G \ - --py-files ${TFoS_HOME}/tfspark.zip,slim.zip \ - --conf spark.dynamicAllocation.enabled=false \ - --conf spark.yarn.maxAppAttempts=1 \ - --conf spark.ui.view.acls=* \ - --archives hdfs:///user/${USER}/Python.zip#Python \ - --conf spark.executorEnv.LD_LIBRARY_PATH="/usr/local/cuda-7.5/lib64:$JAVA_HOME/jre/lib/amd64/server" \ - --driver-library-path="/usr/local/cuda-7.5/lib64" \ - ${TFoS_HOME}/examples/slim/eval_image_classifier.py \ - --dataset_dir ${DATASET_DIR} \ - --dataset_name imagenet \ - --dataset_split_name validation \ - --model_name inception_v3 \ - --checkpoint_path hdfs://default/user/${USER}/slim_train \ - --eval_dir hdfs://default/user/${USER}/slim_eval diff --git a/examples/slim/README_orig.md b/examples/slim/README_orig.md deleted file mode 100644 index 52eef6f4..00000000 --- a/examples/slim/README_orig.md +++ /dev/null @@ -1,385 +0,0 @@ -# TensorFlow-Slim image classification library - -[TF-slim](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim) -is a new lightweight high-level API of TensorFlow (`tensorflow.contrib.slim`) -for defining, training and evaluating complex -models. This directory contains -code for training and evaluating several widely used Convolutional Neural -Network (CNN) image classification models using TF-slim. -It contains scripts that will allow -you to train models from scratch or fine-tune them from pre-trained network -weights. It also contains code for downloading standard image datasets, -converting them -to TensorFlow's native TFRecord format and reading them in using TF-Slim's -data reading and queueing utilities. You can easily train any model on any of -these datasets, as we demonstrate below. We've also included a -[jupyter notebook](https://github.com/tensorflow/models/blob/master/slim/slim_walkthrough.ipynb), -which provides working examples of how to use TF-Slim for image classification. - -## Contacts - -Maintainers of TF-slim: - -* Nathan Silberman, - github: [nathansilberman](https://github.com/nathansilberman) -* Sergio Guadarrama, github: [sguada](https://github.com/sguada) - -## Table of contents - -Installation and setup
-Preparing the datasets
-Using pre-trained models
-Training from scratch
-Fine tuning to a new task
-Evaluating performance
- -# Installation - - -In this section, we describe the steps required to install the appropriate -prerequisite packages. - -## Installing latest version of TF-slim - -As of 8/28/16, the latest [stable release of TF](https://www.tensorflow.org/versions/r0.10/get_started/os_setup.html#pip-installation) -is r0.10, which contains most of TF-Slim but not some later additions. To obtain the -latest version, you must install the most recent nightly build of -TensorFlow. You can find the latest nightly binaries at -[TensorFlow Installation](https://github.com/tensorflow/tensorflow#installation) -in the section that reads "People who are a little more adventurous can -also try our nightly binaries". Copy the link address that corresponds to -the appropriate machine architecture and python version, and pip install -it. For example: - -```shell -export TF_BINARY_URL=https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-cp27-none-linux_x86_64.whl -sudo pip install --upgrade $TF_BINARY_URL -``` - -To test this has worked, execute the following command; it should run -without raising any errors. - -``` -python -c "import tensorflow.contrib.slim as slim; eval = slim.evaluation.evaluate_once" -``` - -## Installing the TF-slim image models library - -To use TF-Slim for image classification, you also have to install -the [TF-Slim image models library](https://github.com/tensorflow/models/tree/master/slim), -which is not part of the core TF library. -To do this, check out the -[tensorflow/models](https://github.com/tensorflow/models/) repository as follows: - -```bash -cd $HOME/workspace -git clone https://github.com/tensorflow/models/ -``` - -This will put the TF-Slim image models library in `$HOME/workspace/models/slim`. -(It will also create a directory called -[models/inception](https://github.com/tensorflow/models/tree/master/inception), -which contains an older version of slim; you can safely ignore this.) - -To verify that this has worked, execute the following commands; it should run -without raising any errors. - -``` -cd $HOME/workspace/models/slim -python -c "from nets import cifarnet; mynet = cifarnet.cifarnet" -``` - - -# Preparing the datasets - - -As part of this library, we've included scripts to download several popular -image datasets (listed below) and convert them to slim format. - -Dataset | Training Set Size | Testing Set Size | Number of Classes | Comments -:------:|:---------------:|:---------------------:|:-----------:|:-----------: -Flowers|2500 | 2500 | 5 | Various sizes (source: Flickr) -[Cifar10](https://www.cs.toronto.edu/~kriz/cifar.html) | 60k| 10k | 10 |32x32 color -[MNIST](http://yann.lecun.com/exdb/mnist/)| 60k | 10k | 10 | 28x28 gray -[ImageNet](http://www.image-net.org/challenges/LSVRC/2012/)|1.2M| 50k | 1000 | Various sizes - -## Downloading and converting to TFRecord format - -For each dataset, we'll need to download the raw data and convert it to -TensorFlow's native -[TFRecord](https://www.tensorflow.org/versions/r0.10/api_docs/python/python_io.html#tfrecords-format-details) -format. Each TFRecord contains a -[TF-Example](https://github.com/tensorflow/tensorflow/blob/r0.10/tensorflow/core/example/example.proto) -protocol buffer. Below we demonstrate how to do this for the Flowers dataset. - -```shell -$ DATA_DIR=/tmp/data/flowers -$ python download_and_convert_data.py \ - --dataset_name=flowers \ - --dataset_dir="${DATA_DIR}" -``` - -When the script finishes you will find several TFRecord files created: - -```shell -$ ls ${DATA_DIR} -flowers_train-00000-of-00005.tfrecord -... -flowers_train-00004-of-00005.tfrecord -flowers_validation-00000-of-00005.tfrecord -... -flowers_validation-00004-of-00005.tfrecord -labels.txt -``` - -These represent the training and validation data, sharded over 5 files each. -You will also find the `$DATA_DIR/labels.txt` file which contains the mapping -from integer labels to class names. - -You can use the same script to create the mnist and cifar10 datasets. -However, for ImageNet, you have to follow the instructions -[here](https://github.com/tensorflow/models/blob/master/inception/README.md#getting-started). -Note that you first have to sign up for an account at image-net.org. -Also, the download can take several hours, and uses about 500MB. - - -## Creating a TF-Slim Dataset Descriptor. - -Once the TFRecord files have been created, you can easily define a Slim -[Dataset](https://github.com/tensorflow/tensorflow/blob/r0.10/tensorflow/contrib/slim/python/slim/data/dataset.py), -which stores pointers to the data file, as well as various other pieces of -metadata, such as the class labels, the train/test split, and how to parse the -TFExample protos. We have included the TF-Slim Dataset descriptors -for -[Cifar10](https://github.com/tensorflow/models/blob/master/slim/datasets/cifar10.py), -[ImageNet](https://github.com/tensorflow/models/blob/master/slim/datasets/imagenet.py), -[Flowers](https://github.com/tensorflow/models/blob/master/slim/datasets/flowers.py), -and -[MNIST](https://github.com/tensorflow/models/blob/master/slim/datasets/mnist.py). -An example of how to load data using a TF-Slim dataset descriptor using a -TF-Slim -[DatasetDataProvider](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py) -is found below: - -```python -import tensorflow as tf -from datasets import flowers - -slim = tf.contrib.slim - -# Selects the 'validation' dataset. -dataset = flowers.get_split('validation', DATA_DIR) - -# Creates a TF-Slim DataProvider which reads the dataset in the background -# during both training and testing. -provider = slim.dataset_data_provider.DatasetDataProvider(dataset) -[image, label] = provider.get(['image', 'label']) -``` - - -# Pre-trained Models - - -Neural nets work best when they have many parameters, making them powerful -function approximators. -However, this means they must be trained on very large datasets. Because -training models from scratch can be a very computationally intensive process -requiring days or even weeks, we provide various pre-trained models, -as listed below. These CNNs have been trained on the -[ILSVRC-2012-CLS](http://www.image-net.org/challenges/LSVRC/2012/) -image classification dataset. - -In the table below, we list each model, the corresponding -TensorFlow model file, the link to the model checkpoint, and the top 1 and top 5 -accuracy (on the imagenet test set). -Note that the VGG and ResNet parameters have been converted from their original -caffe formats -([here](https://github.com/BVLC/caffe/wiki/Model-Zoo#models-used-by-the-vgg-team-in-ilsvrc-2014) -and -[here](https://github.com/KaimingHe/deep-residual-networks)), -whereas the Inception parameters have been trained internally at -Google. Also be aware that these accuracies were computed by evaluating using a -single image crop. Some academic papers report higher accuracy by using multiple -crops at multiple scales. - -Model | TF-Slim File | Checkpoint | Top-1 Accuracy| Top-5 Accuracy | -:----:|:------------:|:----------:|:-------:|:--------:| -[Inception V1](http://arxiv.org/abs/1409.4842v1)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/inception_v1.py)|[inception_v1_2016_08_28.tar.gz](http://download.tensorflow.org/models/inception_v1_2016_08_28.tar.gz)|69.8|89.6| -[Inception V2](http://arxiv.org/abs/1502.03167)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/inception_v2.py)|[inception_v2_2016_08_28.tar.gz](http://download.tensorflow.org/models/inception_v2_2016_08_28.tar.gz)|73.9|91.8| -[Inception V3](http://arxiv.org/abs/1512.00567)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/inception_v3.py)|[inception_v3_2016_08_28.tar.gz](http://download.tensorflow.org/models/inception_v3_2016_08_28.tar.gz)|78.0|93.9| -[Inception V4](http://arxiv.org/abs/1602.07261)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/inception_v4.py)|[inception_v4_2016_09_09.tar.gz](http://download.tensorflow.org/models/inception_v4_2016_09_09.tar.gz)|80.2|95.2| -[Inception-ResNet-v2](http://arxiv.org/abs/1602.07261)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/inception_resnet_v2.py)|[inception_resnet_v2.tar.gz](http://download.tensorflow.org/models/inception_resnet_v2_2016_08_30.tar.gz)|80.4|95.3| -[ResNet 50](https://arxiv.org/abs/1512.03385)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/resnet_v1.py)|[resnet_v1_50.tar.gz](http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz)|75.2|92.2| -[ResNet 101](https://arxiv.org/abs/1512.03385)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/resnet_v1.py)|[resnet_v1_101.tar.gz](http://download.tensorflow.org/models/resnet_v1_101_2016_08_28.tar.gz)|76.4|92.9| -[ResNet 152](https://arxiv.org/abs/1512.03385)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/resnet_v1.py)|[resnet_v1_152.tar.gz](http://download.tensorflow.org/models/resnet_v1_152_2016_08_28.tar.gz)|76.8|93.2| -[VGG 16](http://arxiv.org/abs/1409.1556.pdf)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/vgg.py)|[vgg_16.tar.gz](http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz)|71.5|89.8| -[VGG 19](http://arxiv.org/abs/1409.1556.pdf)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/vgg.py)|[vgg_19.tar.gz](http://download.tensorflow.org/models/vgg_19_2016_08_28.tar.gz)|71.1|89.8| - - -Here is an example of how to download the Inception V3 checkpoint: - -```shell -$ CHECKPOINT_DIR=/tmp/checkpoints -$ mkdir ${CHECKPOINT_DIR} -$ wget http://download.tensorflow.org/models/inception_v3_2016_08_28.tar.gz -$ tar -xvf inception_v3_2016_08_28.tar.gz -$ mv inception_v3.ckpt ${CHECKPOINT_DIR} -$ rm inception_v3_2016_08_28.tar.gz -``` - - - -# Training a model from scratch. - - -We provide an easy way to train a model from scratch using any TF-Slim dataset. -The following example demonstrates how to train Inception V3 using the default -parameters on the ImageNet dataset. - -```shell -DATASET_DIR=/tmp/imagenet -TRAIN_DIR=/tmp/train_logs -python train_image_classifier.py \ - --train_dir=${TRAIN_DIR} \ - --dataset_name=imagenet \ - --dataset_split_name=train \ - --dataset_dir=${DATASET_DIR} \ - --model_name=inception_v3 -``` - -This process may take several days, depending on your hardware setup. -For convenience, we provide a way to train a model on multiple GPUs, -and/or multiple CPUs, either synchrononously or asynchronously. -See [model_deploy](https://github.com/tensorflow/models/blob/master/slim/deployment/model_deploy.py) -for details. - - -# Fine-tuning a model from an existing checkpoint - - -Rather than training from scratch, we'll often want to start from a pre-trained -model and fine-tune it. -To indicate a checkpoint from which to fine-tune, we'll call training with -the `--checkpoint_path` flag and assign it an absolute path to a checkpoint -file. - -When fine-tuning a model, we need to be careful about restoring checkpoint -weights. In particular, when we fine-tune a model on a new task with a different -number of output labels, we wont be able restore the final logits (classifier) -layer. For this, we'll use the `--checkpoint_exclude_scopes` flag. This flag -hinders certain variables from being loaded. When fine-tuning on a -classification task using a different number of classes than the trained model, -the new model will have a final 'logits' layer whose dimensions differ from the -pre-trained model. For example, if fine-tuning an ImageNet-trained model on -Flowers, the pre-trained logits layer will have dimensions `[2048 x 1001]` but -our new logits layer will have dimensions `[2048 x 5]`. Consequently, this -flag indicates to TF-Slim to avoid loading these weights from the checkpoint. - -Keep in mind that warm-starting from a checkpoint affects the model's weights -only during the initialization of the model. Once a model has started training, -a new checkpoint will be created in `${TRAIN_DIR}`. If the fine-tuning -training is stopped and restarted, this new checkpoint will be the one from -which weights are restored and not the `${checkpoint_path}$`. Consequently, -the flags `--checkpoint_path` and `--checkpoint_exclude_scopes` are only used -during the `0-`th global step (model initialization). Typically for fine-tuning -one only want train a sub-set of layers, so the flag `--trainable_scopes` allows -to specify which subsets of layers should trained, the rest would remain frozen. - -Below we give an example of -[fine-tuning inception-v3 on flowers](https://github.com/tensorflow/models/blob/master/slim/scripts/finetune_inception_v3_on_flowers.sh), -inception_v3 was trained on ImageNet with 1000 class labels, but the flowers -dataset only have 5 classes. Since the dataset is quite small we will only train -the new layers. - - -```shell -$ DATASET_DIR=/tmp/flowers -$ TRAIN_DIR=/tmp/flowers-models/inception_v3 -$ CHECKPOINT_PATH=/tmp/my_checkpoints/inception_v3.ckpt -$ python train_image_classifier.py \ - --train_dir=${TRAIN_DIR} \ - --dataset_dir=${DATASET_DIR} \ - --dataset_name=flowers \ - --dataset_split_name=train \ - --model_name=inception_v3 \ - --checkpoint_path=${CHECKPOINT_PATH} \ - --checkpoint_exclude_scopes=InceptionV3/Logits,InceptionV3/AuxLogits \ - --trainable_scopes=InceptionV3/Logits,InceptionV3/AuxLogits -``` - - - -# Evaluating performance of a model - - -To evaluate the performance of a model (whether pretrained or your own), -you can use the eval_image_classifier.py script, as shown below. - -Below we give an example of downloading the pretrained inception model and -evaluating it on the imagenet dataset. - -```shell -CHECKPOINT_FILE = ${CHECKPOINT_DIR}/inception_v3.ckpt # Example -$ python eval_image_classifier.py \ - --alsologtostderr \ - --checkpoint_path=${CHECKPOINT_FILE} \ - --dataset_dir=${DATASET_DIR} \ - --dataset_name=imagenet \ - --dataset_split_name=validation \ - --model_name=inception_v3 -``` - - - -# Troubleshooting - -#### The model runs out of CPU memory. - -See -[Model Runs out of CPU memory](https://github.com/tensorflow/models/tree/master/inception#the-model-runs-out-of-cpu-memory). - -#### The model runs out of GPU memory. - -See -[Adjusting Memory Demands](https://github.com/tensorflow/models/tree/master/inception#adjusting-memory-demands). - -#### The model training results in NaN's. - -See -[Model Resulting in NaNs](https://github.com/tensorflow/models/tree/master/inception#the-model-training-results-in-nans). - -#### The ResNet and VGG Models have 1000 classes but the ImageNet dataset has 1001 - -The ImageNet dataset provided has an empty background class which can be used -to fine-tune the model to other tasks. If you try training or fine-tuning the -VGG or ResNet models using the ImageNet dataset, you might encounter the -following error: - -```bash -InvalidArgumentError: Assign requires shapes of both tensors to match. lhs shape= [1001] rhs shape= [1000] -``` -This is due to the fact that the VGG and ResNet final layers have only 1000 -outputs rather than 1001. - -To fix this issue, you can set the `--labels_offsets=1` flag. This results in -the ImageNet labels being shifted down by one: - - -#### I wish to train a model with a different image size. - -The preprocessing functions all take `height` and `width` as parameters. You -can change the default values using the following snippet: - -```python -image_preprocessing_fn = preprocessing_factory.get_preprocessing( - preprocessing_name, - height=MY_NEW_HEIGHT, - width=MY_NEW_WIDTH, - is_training=True) -``` - -#### What hardware specification are these hyper-parameters targeted for? - -See -[Hardware Specifications](https://github.com/tensorflow/models/tree/master/inception#what-hardware-specification-are-these-hyper-parameters-targeted-for). - diff --git a/examples/slim/datasets/__init__.py b/examples/slim/datasets/__init__.py deleted file mode 100644 index 8b137891..00000000 --- a/examples/slim/datasets/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/examples/slim/datasets/cifar10.py b/examples/slim/datasets/cifar10.py deleted file mode 100644 index 72d58f8d..00000000 --- a/examples/slim/datasets/cifar10.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Provides data for the Cifar10 dataset. - -The dataset scripts used to create the dataset can be found at: -tensorflow/models/slim/datasets/download_and_convert_cifar10.py -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import tensorflow as tf - -from datasets import dataset_utils - -slim = tf.contrib.slim - -_FILE_PATTERN = 'cifar10_%s.tfrecord' - -SPLITS_TO_SIZES = {'train': 50000, 'test': 10000} - -_NUM_CLASSES = 10 - -_ITEMS_TO_DESCRIPTIONS = { - 'image': 'A [32 x 32 x 3] color image.', - 'label': 'A single integer between 0 and 9', -} - - -def get_split(split_name, dataset_dir, file_pattern=None, reader=None): - """Gets a dataset tuple with instructions for reading cifar10. - - Args: - split_name: A train/test split name. - dataset_dir: The base directory of the dataset sources. - file_pattern: The file pattern to use when matching the dataset sources. - It is assumed that the pattern contains a '%s' string so that the split - name can be inserted. - reader: The TensorFlow reader type. - - Returns: - A `Dataset` namedtuple. - - Raises: - ValueError: if `split_name` is not a valid train/test split. - """ - if split_name not in SPLITS_TO_SIZES: - raise ValueError('split name %s was not recognized.' % split_name) - - if not file_pattern: - file_pattern = _FILE_PATTERN - file_pattern = os.path.join(dataset_dir, file_pattern % split_name) - - # Allowing None in the signature so that dataset_factory can use the default. - if not reader: - reader = tf.TFRecordReader - - keys_to_features = { - 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), - 'image/format': tf.FixedLenFeature((), tf.string, default_value='png'), - 'image/class/label': tf.FixedLenFeature( - [], tf.int64, default_value=tf.zeros([], dtype=tf.int64)), - } - - items_to_handlers = { - 'image': slim.tfexample_decoder.Image(shape=[32, 32, 3]), - 'label': slim.tfexample_decoder.Tensor('image/class/label'), - } - - decoder = slim.tfexample_decoder.TFExampleDecoder( - keys_to_features, items_to_handlers) - - labels_to_names = None - if dataset_utils.has_labels(dataset_dir): - labels_to_names = dataset_utils.read_label_file(dataset_dir) - - return slim.dataset.Dataset( - data_sources=file_pattern, - reader=reader, - decoder=decoder, - num_samples=SPLITS_TO_SIZES[split_name], - items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, - num_classes=_NUM_CLASSES, - labels_to_names=labels_to_names) diff --git a/examples/slim/datasets/dataset_factory.py b/examples/slim/datasets/dataset_factory.py deleted file mode 100644 index 141079a6..00000000 --- a/examples/slim/datasets/dataset_factory.py +++ /dev/null @@ -1,57 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""A factory-pattern class which returns classification image/label pairs.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from datasets import cifar10 -from datasets import flowers -from datasets import imagenet -from datasets import mnist - -datasets_map = { - 'cifar10': cifar10, - 'flowers': flowers, - 'imagenet': imagenet, - 'mnist': mnist, -} - - -def get_dataset(name, split_name, dataset_dir, file_pattern=None, reader=None): - """Given a dataset name and a split_name returns a Dataset. - - Args: - name: String, the name of the dataset. - split_name: A train/test split name. - dataset_dir: The directory where the dataset files are stored. - file_pattern: The file pattern to use for matching the dataset source files. - reader: The subclass of tf.ReaderBase. If left as `None`, then the default - reader defined by each dataset is used. - - Returns: - A `Dataset` class. - - Raises: - ValueError: If the dataset `name` is unknown. - """ - if name not in datasets_map: - raise ValueError('Name of dataset unknown %s' % name) - return datasets_map[name].get_split( - split_name, - dataset_dir, - file_pattern, - reader) diff --git a/examples/slim/datasets/dataset_utils.py b/examples/slim/datasets/dataset_utils.py deleted file mode 100644 index d3660190..00000000 --- a/examples/slim/datasets/dataset_utils.py +++ /dev/null @@ -1,136 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Contains utilities for downloading and converting datasets.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import sys -import tarfile - -from six.moves import urllib -import tensorflow as tf - -LABELS_FILENAME = 'labels.txt' - - -def int64_feature(values): - """Returns a TF-Feature of int64s. - - Args: - values: A scalar or list of values. - - Returns: - a TF-Feature. - """ - if not isinstance(values, (tuple, list)): - values = [values] - return tf.train.Feature(int64_list=tf.train.Int64List(value=values)) - - -def bytes_feature(values): - """Returns a TF-Feature of bytes. - - Args: - values: A string. - - Returns: - a TF-Feature. - """ - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values])) - - -def image_to_tfexample(image_data, image_format, height, width, class_id): - return tf.train.Example(features=tf.train.Features(feature={ - 'image/encoded': bytes_feature(image_data), - 'image/format': bytes_feature(image_format), - 'image/class/label': int64_feature(class_id), - 'image/height': int64_feature(height), - 'image/width': int64_feature(width), - })) - - -def download_and_uncompress_tarball(tarball_url, dataset_dir): - """Downloads the `tarball_url` and uncompresses it locally. - - Args: - tarball_url: The URL of a tarball file. - dataset_dir: The directory where the temporary files are stored. - """ - filename = tarball_url.split('/')[-1] - filepath = os.path.join(dataset_dir, filename) - - def _progress(count, block_size, total_size): - sys.stdout.write('\r>> Downloading %s %.1f%%' % ( - filename, float(count * block_size) / float(total_size) * 100.0)) - sys.stdout.flush() - filepath, _ = urllib.request.urlretrieve(tarball_url, filepath, _progress) - print() - statinfo = os.stat(filepath) - print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') - tarfile.open(filepath, 'r:gz').extractall(dataset_dir) - - -def write_label_file(labels_to_class_names, dataset_dir, - filename=LABELS_FILENAME): - """Writes a file with the list of class names. - - Args: - labels_to_class_names: A map of (integer) labels to class names. - dataset_dir: The directory in which the labels file should be written. - filename: The filename where the class names are written. - """ - labels_filename = os.path.join(dataset_dir, filename) - with tf.gfile.Open(labels_filename, 'w') as f: - for label in labels_to_class_names: - class_name = labels_to_class_names[label] - f.write('%d:%s\n' % (label, class_name)) - - -def has_labels(dataset_dir, filename=LABELS_FILENAME): - """Specifies whether or not the dataset directory contains a label map file. - - Args: - dataset_dir: The directory in which the labels file is found. - filename: The filename where the class names are written. - - Returns: - `True` if the labels file exists and `False` otherwise. - """ - return tf.gfile.Exists(os.path.join(dataset_dir, filename)) - - -def read_label_file(dataset_dir, filename=LABELS_FILENAME): - """Reads the labels file and returns a mapping from ID to class name. - - Args: - dataset_dir: The directory in which the labels file is found. - filename: The filename where the class names are written. - - Returns: - A map from a label (integer) to class name. - """ - labels_filename = os.path.join(dataset_dir, filename) - with tf.gfile.Open(labels_filename, 'r') as f: - lines = f.read() - lines = lines.split('\n') - lines = filter(None, lines) - - labels_to_class_names = {} - for line in lines: - index = line.index(':') - labels_to_class_names[int(line[:index])] = line[index+1:] - return labels_to_class_names diff --git a/examples/slim/datasets/download_and_convert_cifar10.py b/examples/slim/datasets/download_and_convert_cifar10.py deleted file mode 100644 index 2cb787d0..00000000 --- a/examples/slim/datasets/download_and_convert_cifar10.py +++ /dev/null @@ -1,195 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Downloads and converts cifar10 data to TFRecords of TF-Example protos. - -This module downloads the cifar10 data, uncompresses it, reads the files -that make up the cifar10 data and creates two TFRecord datasets: one for train -and one for test. Each TFRecord dataset is comprised of a set of TF-Example -protocol buffers, each of which contain a single image and label. - -The script should take several minutes to run. - -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import cPickle -import os -import sys -import tarfile - -import numpy as np -from six.moves import urllib -import tensorflow as tf - -from datasets import dataset_utils - -# The URL where the CIFAR data can be downloaded. -_DATA_URL = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' - -# The number of training files. -_NUM_TRAIN_FILES = 5 - -# The height and width of each image. -_IMAGE_SIZE = 32 - -# The names of the classes. -_CLASS_NAMES = [ - 'airplane', - 'automobile', - 'bird', - 'cat', - 'deer', - 'dog', - 'frog', - 'horse', - 'ship', - 'truck', -] - - -def _add_to_tfrecord(filename, tfrecord_writer, offset=0): - """Loads data from the cifar10 pickle files and writes files to a TFRecord. - - Args: - filename: The filename of the cifar10 pickle file. - tfrecord_writer: The TFRecord writer to use for writing. - offset: An offset into the absolute number of images previously written. - - Returns: - The new offset. - """ - with tf.gfile.Open(filename, 'r') as f: - data = cPickle.load(f) - - images = data['data'] - num_images = images.shape[0] - - images = images.reshape((num_images, 3, 32, 32)) - labels = data['labels'] - - with tf.Graph().as_default(): - image_placeholder = tf.placeholder(dtype=tf.uint8) - encoded_image = tf.image.encode_png(image_placeholder) - - with tf.Session('') as sess: - - for j in range(num_images): - sys.stdout.write('\r>> Reading file [%s] image %d/%d' % ( - filename, offset + j + 1, offset + num_images)) - sys.stdout.flush() - - image = np.squeeze(images[j]).transpose((1, 2, 0)) - label = labels[j] - - png_string = sess.run(encoded_image, - feed_dict={image_placeholder: image}) - - example = dataset_utils.image_to_tfexample( - png_string, 'png', _IMAGE_SIZE, _IMAGE_SIZE, label) - tfrecord_writer.write(example.SerializeToString()) - - return offset + num_images - - -def _get_output_filename(dataset_dir, split_name): - """Creates the output filename. - - Args: - dataset_dir: The dataset directory where the dataset is stored. - split_name: The name of the train/test split. - - Returns: - An absolute file path. - """ - return '%s/cifar10_%s.tfrecord' % (dataset_dir, split_name) - - -def _download_and_uncompress_dataset(dataset_dir): - """Downloads cifar10 and uncompresses it locally. - - Args: - dataset_dir: The directory where the temporary files are stored. - """ - filename = _DATA_URL.split('/')[-1] - filepath = os.path.join(dataset_dir, filename) - - if not os.path.exists(filepath): - def _progress(count, block_size, total_size): - sys.stdout.write('\r>> Downloading %s %.1f%%' % ( - filename, float(count * block_size) / float(total_size) * 100.0)) - sys.stdout.flush() - filepath, _ = urllib.request.urlretrieve(_DATA_URL, filepath, _progress) - print() - statinfo = os.stat(filepath) - print('Successfully downloaded', filename, statinfo.st_size, 'bytes.') - tarfile.open(filepath, 'r:gz').extractall(dataset_dir) - - -def _clean_up_temporary_files(dataset_dir): - """Removes temporary files used to create the dataset. - - Args: - dataset_dir: The directory where the temporary files are stored. - """ - filename = _DATA_URL.split('/')[-1] - filepath = os.path.join(dataset_dir, filename) - tf.gfile.Remove(filepath) - - tmp_dir = os.path.join(dataset_dir, 'cifar-10-batches-py') - tf.gfile.DeleteRecursively(tmp_dir) - - -def run(dataset_dir): - """Runs the download and conversion operation. - - Args: - dataset_dir: The dataset directory where the dataset is stored. - """ - if not tf.gfile.Exists(dataset_dir): - tf.gfile.MakeDirs(dataset_dir) - - training_filename = _get_output_filename(dataset_dir, 'train') - testing_filename = _get_output_filename(dataset_dir, 'test') - - if tf.gfile.Exists(training_filename) and tf.gfile.Exists(testing_filename): - print('Dataset files already exist. Exiting without re-creating them.') - return - - dataset_utils.download_and_uncompress_tarball(_DATA_URL, dataset_dir) - - # First, process the training data: - with tf.python_io.TFRecordWriter(training_filename) as tfrecord_writer: - offset = 0 - for i in range(_NUM_TRAIN_FILES): - filename = os.path.join(dataset_dir, - 'cifar-10-batches-py', - 'data_batch_%d' % (i + 1)) # 1-indexed. - offset = _add_to_tfrecord(filename, tfrecord_writer, offset) - - # Next, process the testing data: - with tf.python_io.TFRecordWriter(testing_filename) as tfrecord_writer: - filename = os.path.join(dataset_dir, - 'cifar-10-batches-py', - 'test_batch') - _add_to_tfrecord(filename, tfrecord_writer) - - # Finally, write the labels file: - labels_to_class_names = dict(zip(range(len(_CLASS_NAMES)), _CLASS_NAMES)) - dataset_utils.write_label_file(labels_to_class_names, dataset_dir) - - _clean_up_temporary_files(dataset_dir) - print('\nFinished converting the Cifar10 dataset!') diff --git a/examples/slim/datasets/download_and_convert_flowers.py b/examples/slim/datasets/download_and_convert_flowers.py deleted file mode 100644 index 347a4df2..00000000 --- a/examples/slim/datasets/download_and_convert_flowers.py +++ /dev/null @@ -1,212 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Downloads and converts Flowers data to TFRecords of TF-Example protos. - -This module downloads the Flowers data, uncompresses it, reads the files -that make up the Flowers data and creates two TFRecord datasets: one for train -and one for test. Each TFRecord dataset is comprised of a set of TF-Example -protocol buffers, each of which contain a single image and label. - -The script should take about a minute to run. - -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -import os -import random -import sys - -import tensorflow as tf - -from datasets import dataset_utils - -# The URL where the Flowers data can be downloaded. -_DATA_URL = 'http://download.tensorflow.org/example_images/flower_photos.tgz' - -# The number of images in the validation set. -_NUM_VALIDATION = 350 - -# Seed for repeatability. -_RANDOM_SEED = 0 - -# The number of shards per dataset split. -_NUM_SHARDS = 5 - - -class ImageReader(object): - """Helper class that provides TensorFlow image coding utilities.""" - - def __init__(self): - # Initializes function that decodes RGB JPEG data. - self._decode_jpeg_data = tf.placeholder(dtype=tf.string) - self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3) - - def read_image_dims(self, sess, image_data): - image = self.decode_jpeg(sess, image_data) - return image.shape[0], image.shape[1] - - def decode_jpeg(self, sess, image_data): - image = sess.run(self._decode_jpeg, - feed_dict={self._decode_jpeg_data: image_data}) - assert len(image.shape) == 3 - assert image.shape[2] == 3 - return image - - -def _get_filenames_and_classes(dataset_dir): - """Returns a list of filenames and inferred class names. - - Args: - dataset_dir: A directory containing a set of subdirectories representing - class names. Each subdirectory should contain PNG or JPG encoded images. - - Returns: - A list of image file paths, relative to `dataset_dir` and the list of - subdirectories, representing class names. - """ - flower_root = os.path.join(dataset_dir, 'flower_photos') - directories = [] - class_names = [] - for filename in os.listdir(flower_root): - path = os.path.join(flower_root, filename) - if os.path.isdir(path): - directories.append(path) - class_names.append(filename) - - photo_filenames = [] - for directory in directories: - for filename in os.listdir(directory): - path = os.path.join(directory, filename) - photo_filenames.append(path) - - return photo_filenames, sorted(class_names) - - -def _get_dataset_filename(dataset_dir, split_name, shard_id): - output_filename = 'flowers_%s_%05d-of-%05d.tfrecord' % ( - split_name, shard_id, _NUM_SHARDS) - return os.path.join(dataset_dir, output_filename) - - -def _convert_dataset(split_name, filenames, class_names_to_ids, dataset_dir): - """Converts the given filenames to a TFRecord dataset. - - Args: - split_name: The name of the dataset, either 'train' or 'validation'. - filenames: A list of absolute paths to png or jpg images. - class_names_to_ids: A dictionary from class names (strings) to ids - (integers). - dataset_dir: The directory where the converted datasets are stored. - """ - assert split_name in ['train', 'validation'] - - num_per_shard = int(math.ceil(len(filenames) / float(_NUM_SHARDS))) - - with tf.Graph().as_default(): - image_reader = ImageReader() - - with tf.Session('') as sess: - - for shard_id in range(_NUM_SHARDS): - output_filename = _get_dataset_filename( - dataset_dir, split_name, shard_id) - - with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer: - start_ndx = shard_id * num_per_shard - end_ndx = min((shard_id+1) * num_per_shard, len(filenames)) - for i in range(start_ndx, end_ndx): - sys.stdout.write('\r>> Converting image %d/%d shard %d' % ( - i+1, len(filenames), shard_id)) - sys.stdout.flush() - - # Read the filename: - image_data = tf.gfile.FastGFile(filenames[i], 'r').read() - height, width = image_reader.read_image_dims(sess, image_data) - - class_name = os.path.basename(os.path.dirname(filenames[i])) - class_id = class_names_to_ids[class_name] - - example = dataset_utils.image_to_tfexample( - image_data, 'jpg', height, width, class_id) - tfrecord_writer.write(example.SerializeToString()) - - sys.stdout.write('\n') - sys.stdout.flush() - - -def _clean_up_temporary_files(dataset_dir): - """Removes temporary files used to create the dataset. - - Args: - dataset_dir: The directory where the temporary files are stored. - """ - filename = _DATA_URL.split('/')[-1] - filepath = os.path.join(dataset_dir, filename) - tf.gfile.Remove(filepath) - - tmp_dir = os.path.join(dataset_dir, 'flower_photos') - tf.gfile.DeleteRecursively(tmp_dir) - - -def _dataset_exists(dataset_dir): - for split_name in ['train', 'validation']: - for shard_id in range(_NUM_SHARDS): - output_filename = _get_dataset_filename( - dataset_dir, split_name, shard_id) - if not tf.gfile.Exists(output_filename): - return False - return True - - -def run(dataset_dir): - """Runs the download and conversion operation. - - Args: - dataset_dir: The dataset directory where the dataset is stored. - """ - if not tf.gfile.Exists(dataset_dir): - tf.gfile.MakeDirs(dataset_dir) - - if _dataset_exists(dataset_dir): - print('Dataset files already exist. Exiting without re-creating them.') - return - - dataset_utils.download_and_uncompress_tarball(_DATA_URL, dataset_dir) - photo_filenames, class_names = _get_filenames_and_classes(dataset_dir) - class_names_to_ids = dict(zip(class_names, range(len(class_names)))) - - # Divide into train and test: - random.seed(_RANDOM_SEED) - random.shuffle(photo_filenames) - training_filenames = photo_filenames[_NUM_VALIDATION:] - validation_filenames = photo_filenames[:_NUM_VALIDATION] - - # First, convert the training and validation sets. - _convert_dataset('train', training_filenames, class_names_to_ids, - dataset_dir) - _convert_dataset('validation', validation_filenames, class_names_to_ids, - dataset_dir) - - # Finally, write the labels file: - labels_to_class_names = dict(zip(range(len(class_names)), class_names)) - dataset_utils.write_label_file(labels_to_class_names, dataset_dir) - - _clean_up_temporary_files(dataset_dir) - print('\nFinished converting the Flowers dataset!') - diff --git a/examples/slim/datasets/download_and_convert_mnist.py b/examples/slim/datasets/download_and_convert_mnist.py deleted file mode 100644 index d6ae8743..00000000 --- a/examples/slim/datasets/download_and_convert_mnist.py +++ /dev/null @@ -1,221 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Downloads and converts MNIST data to TFRecords of TF-Example protos. - -This module downloads the MNIST data, uncompresses it, reads the files -that make up the MNIST data and creates two TFRecord datasets: one for train -and one for test. Each TFRecord dataset is comprised of a set of TF-Example -protocol buffers, each of which contain a single image and label. - -The script should take about a minute to run. - -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import gzip -import os -import sys - -import numpy as np -from six.moves import urllib -import tensorflow as tf - -from datasets import dataset_utils - -# The URLs where the MNIST data can be downloaded. -_DATA_URL = 'http://yann.lecun.com/exdb/mnist/' -_TRAIN_DATA_FILENAME = 'train-images-idx3-ubyte.gz' -_TRAIN_LABELS_FILENAME = 'train-labels-idx1-ubyte.gz' -_TEST_DATA_FILENAME = 't10k-images-idx3-ubyte.gz' -_TEST_LABELS_FILENAME = 't10k-labels-idx1-ubyte.gz' - -_IMAGE_SIZE = 28 -_NUM_CHANNELS = 1 - -# The names of the classes. -_CLASS_NAMES = [ - 'zero', - 'one', - 'two', - 'three', - 'four', - 'five', - 'size', - 'seven', - 'eight', - 'nine', -] - - -def _extract_images(filename, num_images): - """Extract the images into a numpy array. - - Args: - filename: The path to an MNIST images file. - num_images: The number of images in the file. - - Returns: - A numpy array of shape [number_of_images, height, width, channels]. - """ - print('Extracting images from: ', filename) - with gzip.open(filename) as bytestream: - bytestream.read(16) - buf = bytestream.read( - _IMAGE_SIZE * _IMAGE_SIZE * num_images * _NUM_CHANNELS) - data = np.frombuffer(buf, dtype=np.uint8) - data = data.reshape(num_images, _IMAGE_SIZE, _IMAGE_SIZE, _NUM_CHANNELS) - return data - - -def _extract_labels(filename, num_labels): - """Extract the labels into a vector of int64 label IDs. - - Args: - filename: The path to an MNIST labels file. - num_labels: The number of labels in the file. - - Returns: - A numpy array of shape [number_of_labels] - """ - print('Extracting labels from: ', filename) - with gzip.open(filename) as bytestream: - bytestream.read(8) - buf = bytestream.read(1 * num_labels) - labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64) - return labels - - -def _add_to_tfrecord(data_filename, labels_filename, num_images, - tfrecord_writer): - """Loads data from the binary MNIST files and writes files to a TFRecord. - - Args: - data_filename: The filename of the MNIST images. - labels_filename: The filename of the MNIST labels. - num_images: The number of images in the dataset. - tfrecord_writer: The TFRecord writer to use for writing. - """ - images = _extract_images(data_filename, num_images) - labels = _extract_labels(labels_filename, num_images) - - shape = (_IMAGE_SIZE, _IMAGE_SIZE, _NUM_CHANNELS) - with tf.Graph().as_default(): - image = tf.placeholder(dtype=tf.uint8, shape=shape) - encoded_png = tf.image.encode_png(image) - - with tf.Session('') as sess: - for j in range(num_images): - sys.stdout.write('\r>> Converting image %d/%d' % (j + 1, num_images)) - sys.stdout.flush() - - png_string = sess.run(encoded_png, feed_dict={image: images[j]}) - - example = dataset_utils.image_to_tfexample( - png_string, 'png'.encode(), _IMAGE_SIZE, _IMAGE_SIZE, labels[j]) - tfrecord_writer.write(example.SerializeToString()) - - -def _get_output_filename(dataset_dir, split_name): - """Creates the output filename. - - Args: - dataset_dir: The directory where the temporary files are stored. - split_name: The name of the train/test split. - - Returns: - An absolute file path. - """ - return '%s/mnist_%s.tfrecord' % (dataset_dir, split_name) - - -def _download_dataset(dataset_dir): - """Downloads MNIST locally. - - Args: - dataset_dir: The directory where the temporary files are stored. - """ - for filename in [_TRAIN_DATA_FILENAME, - _TRAIN_LABELS_FILENAME, - _TEST_DATA_FILENAME, - _TEST_LABELS_FILENAME]: - filepath = os.path.join(dataset_dir, filename) - - if not os.path.exists(filepath): - print('Downloading file %s...' % filename) - def _progress(count, block_size, total_size): - sys.stdout.write('\r>> Downloading %.1f%%' % ( - float(count * block_size) / float(total_size) * 100.0)) - sys.stdout.flush() - filepath, _ = urllib.request.urlretrieve(_DATA_URL + filename, - filepath, - _progress) - print() - with tf.gfile.GFile(filepath) as f: - size = f.size() - print('Successfully downloaded', filename, size, 'bytes.') - - -def _clean_up_temporary_files(dataset_dir): - """Removes temporary files used to create the dataset. - - Args: - dataset_dir: The directory where the temporary files are stored. - """ - for filename in [_TRAIN_DATA_FILENAME, - _TRAIN_LABELS_FILENAME, - _TEST_DATA_FILENAME, - _TEST_LABELS_FILENAME]: - filepath = os.path.join(dataset_dir, filename) - tf.gfile.Remove(filepath) - - -def run(dataset_dir): - """Runs the download and conversion operation. - - Args: - dataset_dir: The dataset directory where the dataset is stored. - """ - if not tf.gfile.Exists(dataset_dir): - tf.gfile.MakeDirs(dataset_dir) - - training_filename = _get_output_filename(dataset_dir, 'train') - testing_filename = _get_output_filename(dataset_dir, 'test') - - if tf.gfile.Exists(training_filename) and tf.gfile.Exists(testing_filename): - print('Dataset files already exist. Exiting without re-creating them.') - return - - _download_dataset(dataset_dir) - - # First, process the training data: - with tf.python_io.TFRecordWriter(training_filename) as tfrecord_writer: - data_filename = os.path.join(dataset_dir, _TRAIN_DATA_FILENAME) - labels_filename = os.path.join(dataset_dir, _TRAIN_LABELS_FILENAME) - _add_to_tfrecord(data_filename, labels_filename, 60000, tfrecord_writer) - - # Next, process the testing data: - with tf.python_io.TFRecordWriter(testing_filename) as tfrecord_writer: - data_filename = os.path.join(dataset_dir, _TEST_DATA_FILENAME) - labels_filename = os.path.join(dataset_dir, _TEST_LABELS_FILENAME) - _add_to_tfrecord(data_filename, labels_filename, 10000, tfrecord_writer) - - # Finally, write the labels file: - labels_to_class_names = dict(zip(range(len(_CLASS_NAMES)), _CLASS_NAMES)) - dataset_utils.write_label_file(labels_to_class_names, dataset_dir) - - _clean_up_temporary_files(dataset_dir) - print('\nFinished converting the MNIST dataset!') diff --git a/examples/slim/datasets/flowers.py b/examples/slim/datasets/flowers.py deleted file mode 100644 index a73e9acd..00000000 --- a/examples/slim/datasets/flowers.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Provides data for the flowers dataset. - -The dataset scripts used to create the dataset can be found at: -tensorflow/models/slim/datasets/download_and_convert_flowers.py -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import tensorflow as tf - -from datasets import dataset_utils - -slim = tf.contrib.slim - -_FILE_PATTERN = 'flowers_%s_*.tfrecord' - -SPLITS_TO_SIZES = {'train': 3320, 'validation': 350} - -_NUM_CLASSES = 5 - -_ITEMS_TO_DESCRIPTIONS = { - 'image': 'A color image of varying size.', - 'label': 'A single integer between 0 and 4', -} - - -def get_split(split_name, dataset_dir, file_pattern=None, reader=None): - """Gets a dataset tuple with instructions for reading flowers. - - Args: - split_name: A train/validation split name. - dataset_dir: The base directory of the dataset sources. - file_pattern: The file pattern to use when matching the dataset sources. - It is assumed that the pattern contains a '%s' string so that the split - name can be inserted. - reader: The TensorFlow reader type. - - Returns: - A `Dataset` namedtuple. - - Raises: - ValueError: if `split_name` is not a valid train/validation split. - """ - if split_name not in SPLITS_TO_SIZES: - raise ValueError('split name %s was not recognized.' % split_name) - - if not file_pattern: - file_pattern = _FILE_PATTERN - file_pattern = os.path.join(dataset_dir, file_pattern % split_name) - - # Allowing None in the signature so that dataset_factory can use the default. - if reader is None: - reader = tf.TFRecordReader - - keys_to_features = { - 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), - 'image/format': tf.FixedLenFeature((), tf.string, default_value='png'), - 'image/class/label': tf.FixedLenFeature( - [], tf.int64, default_value=tf.zeros([], dtype=tf.int64)), - } - - items_to_handlers = { - 'image': slim.tfexample_decoder.Image(), - 'label': slim.tfexample_decoder.Tensor('image/class/label'), - } - - decoder = slim.tfexample_decoder.TFExampleDecoder( - keys_to_features, items_to_handlers) - - labels_to_names = None - if dataset_utils.has_labels(dataset_dir): - labels_to_names = dataset_utils.read_label_file(dataset_dir) - - return slim.dataset.Dataset( - data_sources=file_pattern, - reader=reader, - decoder=decoder, - num_samples=SPLITS_TO_SIZES[split_name], - items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, - num_classes=_NUM_CLASSES, - labels_to_names=labels_to_names) diff --git a/examples/slim/datasets/imagenet.py b/examples/slim/datasets/imagenet.py deleted file mode 100644 index 17bc6f9a..00000000 --- a/examples/slim/datasets/imagenet.py +++ /dev/null @@ -1,193 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Provides data for the ImageNet ILSVRC 2012 Dataset plus some bounding boxes. - -Some images have one or more bounding boxes associated with the label of the -image. See details here: http://image-net.org/download-bboxes - -ImageNet is based upon WordNet 3.0. To uniquely identify a synset, we use -"WordNet ID" (wnid), which is a concatenation of POS ( i.e. part of speech ) -and SYNSET OFFSET of WordNet. For more information, please refer to the -WordNet documentation[http://wordnet.princeton.edu/wordnet/documentation/]. - -"There are bounding boxes for over 3000 popular synsets available. -For each synset, there are on average 150 images with bounding boxes." - -WARNING: Don't use for object detection, in this case all the bounding boxes -of the image belong to just one class. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -from six.moves import urllib -import tensorflow as tf - -from datasets import dataset_utils - -slim = tf.contrib.slim - -# TODO(nsilberman): Add tfrecord file type once the script is updated. -_FILE_PATTERN = '%s-*' - -_SPLITS_TO_SIZES = { - 'train': 1281167, - 'validation': 50000, -} - -_ITEMS_TO_DESCRIPTIONS = { - 'image': 'A color image of varying height and width.', - 'label': 'The label id of the image, integer between 0 and 999', - 'label_text': 'The text of the label.', - 'object/bbox': 'A list of bounding boxes.', - 'object/label': 'A list of labels, one per each object.', -} - -_NUM_CLASSES = 1001 - - -def create_readable_names_for_imagenet_labels(): - """Create a dict mapping label id to human readable string. - - Returns: - labels_to_names: dictionary where keys are integers from to 1000 - and values are human-readable names. - - We retrieve a synset file, which contains a list of valid synset labels used - by ILSVRC competition. There is one synset one per line, eg. - # n01440764 - # n01443537 - We also retrieve a synset_to_human_file, which contains a mapping from synsets - to human-readable names for every synset in Imagenet. These are stored in a - tsv format, as follows: - # n02119247 black fox - # n02119359 silver fox - We assign each synset (in alphabetical order) an integer, starting from 1 - (since 0 is reserved for the background class). - - Code is based on - https://github.com/tensorflow/models/blob/master/inception/inception/data/build_imagenet_data.py#L463 - """ - - # pylint: disable=g-line-too-long - base_url = 'https://raw.githubusercontent.com/tensorflow/models/master/inception/inception/data/' - synset_url = '{}/imagenet_lsvrc_2015_synsets.txt'.format(base_url) - synset_to_human_url = '{}/imagenet_metadata.txt'.format(base_url) - - filename, _ = urllib.request.urlretrieve(synset_url) - synset_list = [s.strip() for s in open(filename).readlines()] - num_synsets_in_ilsvrc = len(synset_list) - assert num_synsets_in_ilsvrc == 1000 - - filename, _ = urllib.request.urlretrieve(synset_to_human_url) - synset_to_human_list = open(filename).readlines() - num_synsets_in_all_imagenet = len(synset_to_human_list) - assert num_synsets_in_all_imagenet == 21842 - - synset_to_human = {} - for s in synset_to_human_list: - parts = s.strip().split('\t') - assert len(parts) == 2 - synset = parts[0] - human = parts[1] - synset_to_human[synset] = human - - label_index = 1 - labels_to_names = {0: 'background'} - for synset in synset_list: - name = synset_to_human[synset] - labels_to_names[label_index] = name - label_index += 1 - - return labels_to_names - - -def get_split(split_name, dataset_dir, file_pattern=None, reader=None): - """Gets a dataset tuple with instructions for reading ImageNet. - - Args: - split_name: A train/test split name. - dataset_dir: The base directory of the dataset sources. - file_pattern: The file pattern to use when matching the dataset sources. - It is assumed that the pattern contains a '%s' string so that the split - name can be inserted. - reader: The TensorFlow reader type. - - Returns: - A `Dataset` namedtuple. - - Raises: - ValueError: if `split_name` is not a valid train/test split. - """ - if split_name not in _SPLITS_TO_SIZES: - raise ValueError('split name %s was not recognized.' % split_name) - - if not file_pattern: - file_pattern = _FILE_PATTERN - file_pattern = os.path.join(dataset_dir, file_pattern % split_name) - - # Allowing None in the signature so that dataset_factory can use the default. - if reader is None: - reader = tf.TFRecordReader - - keys_to_features = { - 'image/encoded': tf.FixedLenFeature( - (), tf.string, default_value=''), - 'image/format': tf.FixedLenFeature( - (), tf.string, default_value='jpeg'), - 'image/class/label': tf.FixedLenFeature( - [], dtype=tf.int64, default_value=-1), - 'image/class/text': tf.FixedLenFeature( - [], dtype=tf.string, default_value=''), - 'image/object/bbox/xmin': tf.VarLenFeature( - dtype=tf.float32), - 'image/object/bbox/ymin': tf.VarLenFeature( - dtype=tf.float32), - 'image/object/bbox/xmax': tf.VarLenFeature( - dtype=tf.float32), - 'image/object/bbox/ymax': tf.VarLenFeature( - dtype=tf.float32), - 'image/object/class/label': tf.VarLenFeature( - dtype=tf.int64), - } - - items_to_handlers = { - 'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'), - 'label': slim.tfexample_decoder.Tensor('image/class/label'), - 'label_text': slim.tfexample_decoder.Tensor('image/class/text'), - 'object/bbox': slim.tfexample_decoder.BoundingBox( - ['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'), - 'object/label': slim.tfexample_decoder.Tensor('image/object/class/label'), - } - - decoder = slim.tfexample_decoder.TFExampleDecoder( - keys_to_features, items_to_handlers) - - labels_to_names = None - if dataset_utils.has_labels(dataset_dir): - labels_to_names = dataset_utils.read_label_file(dataset_dir) - else: - labels_to_names = create_readable_names_for_imagenet_labels() - dataset_utils.write_label_file(labels_to_names, dataset_dir) - - return slim.dataset.Dataset( - data_sources=file_pattern, - reader=reader, - decoder=decoder, - num_samples=_SPLITS_TO_SIZES[split_name], - items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, - num_classes=_NUM_CLASSES, - labels_to_names=labels_to_names) diff --git a/examples/slim/datasets/mnist.py b/examples/slim/datasets/mnist.py deleted file mode 100644 index 525061c5..00000000 --- a/examples/slim/datasets/mnist.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Provides data for the MNIST dataset. - -The dataset scripts used to create the dataset can be found at: -tensorflow/models/slim/datasets/download_and_convert_mnist.py -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import tensorflow as tf - -from datasets import dataset_utils - -slim = tf.contrib.slim - -_FILE_PATTERN = 'mnist_%s.tfrecord' - -_SPLITS_TO_SIZES = {'train': 60000, 'test': 10000} - -_NUM_CLASSES = 10 - -_ITEMS_TO_DESCRIPTIONS = { - 'image': 'A [28 x 28 x 1] grayscale image.', - 'label': 'A single integer between 0 and 9', -} - - -def get_split(split_name, dataset_dir, file_pattern=None, reader=None): - """Gets a dataset tuple with instructions for reading MNIST. - - Args: - split_name: A train/test split name. - dataset_dir: The base directory of the dataset sources. - file_pattern: The file pattern to use when matching the dataset sources. - It is assumed that the pattern contains a '%s' string so that the split - name can be inserted. - reader: The TensorFlow reader type. - - Returns: - A `Dataset` namedtuple. - - Raises: - ValueError: if `split_name` is not a valid train/test split. - """ - if split_name not in _SPLITS_TO_SIZES: - raise ValueError('split name %s was not recognized.' % split_name) - - if not file_pattern: - file_pattern = _FILE_PATTERN - file_pattern = os.path.join(dataset_dir, file_pattern % split_name) - - # Allowing None in the signature so that dataset_factory can use the default. - if reader is None: - reader = tf.TFRecordReader - - keys_to_features = { - 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), - 'image/format': tf.FixedLenFeature((), tf.string, default_value='raw'), - 'image/class/label': tf.FixedLenFeature( - [1], tf.int64, default_value=tf.zeros([1], dtype=tf.int64)), - } - - items_to_handlers = { - 'image': slim.tfexample_decoder.Image(shape=[28, 28, 1], channels=1), - 'label': slim.tfexample_decoder.Tensor('image/class/label', shape=[]), - } - - decoder = slim.tfexample_decoder.TFExampleDecoder( - keys_to_features, items_to_handlers) - - labels_to_names = None - if dataset_utils.has_labels(dataset_dir): - labels_to_names = dataset_utils.read_label_file(dataset_dir) - - return slim.dataset.Dataset( - data_sources=file_pattern, - reader=reader, - decoder=decoder, - num_samples=_SPLITS_TO_SIZES[split_name], - num_classes=_NUM_CLASSES, - items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, - labels_to_names=labels_to_names) diff --git a/examples/slim/deployment/__init__.py b/examples/slim/deployment/__init__.py deleted file mode 100644 index 8b137891..00000000 --- a/examples/slim/deployment/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/examples/slim/deployment/model_deploy.py b/examples/slim/deployment/model_deploy.py deleted file mode 100644 index 8855f2ae..00000000 --- a/examples/slim/deployment/model_deploy.py +++ /dev/null @@ -1,678 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Deploy Slim models across multiple clones and replicas. - -# TODO(sguada) docstring paragraph by (a) motivating the need for the file and -# (b) defining clones. - -# TODO(sguada) describe the high-level components of model deployment. -# E.g. "each model deployment is composed of several parts: a DeploymentConfig, -# which captures A, B and C, an input_fn which loads data.. etc - -To easily train a model on multiple GPUs or across multiple machines this -module provides a set of helper functions: `create_clones`, -`optimize_clones` and `deploy`. - -Usage: - - g = tf.Graph() - - # Set up DeploymentConfig - config = model_deploy.DeploymentConfig(num_clones=2, clone_on_cpu=True) - - # Create the global step on the device storing the variables. - with tf.device(config.variables_device()): - global_step = slim.create_global_step() - - # Define the inputs - with tf.device(config.inputs_device()): - images, labels = LoadData(...) - inputs_queue = slim.data.prefetch_queue((images, labels)) - - # Define the optimizer. - with tf.device(config.optimizer_device()): - optimizer = tf.train.MomentumOptimizer(FLAGS.learning_rate, FLAGS.momentum) - - # Define the model including the loss. - def model_fn(inputs_queue): - images, labels = inputs_queue.dequeue() - predictions = CreateNetwork(images) - slim.losses.log_loss(predictions, labels) - - model_dp = model_deploy.deploy(config, model_fn, [inputs_queue], - optimizer=optimizer) - - # Run training. - slim.learning.train(model_dp.train_op, my_log_dir, - summary_op=model_dp.summary_op) - -The Clone namedtuple holds together the values associated with each call to -model_fn: - * outputs: The return values of the calls to `model_fn()`. - * scope: The scope used to create the clone. - * device: The device used to create the clone. - -DeployedModel namedtuple, holds together the values needed to train multiple -clones: - * train_op: An operation that run the optimizer training op and include - all the update ops created by `model_fn`. Present only if an optimizer - was specified. - * summary_op: An operation that run the summaries created by `model_fn` - and process_gradients. - * total_loss: A `Tensor` that contains the sum of all losses created by - `model_fn` plus the regularization losses. - * clones: List of `Clone` tuples returned by `create_clones()`. - -DeploymentConfig parameters: - * num_clones: Number of model clones to deploy in each replica. - * clone_on_cpu: True if clones should be placed on CPU. - * replica_id: Integer. Index of the replica for which the model is - deployed. Usually 0 for the chief replica. - * num_replicas: Number of replicas to use. - * num_ps_tasks: Number of tasks for the `ps` job. 0 to not use replicas. - * worker_job_name: A name for the worker job. - * ps_job_name: A name for the parameter server job. - -TODO(sguada): - - describe side effect to the graph. - - what happens to summaries and update_ops. - - which graph collections are altered. - - write a tutorial on how to use this. - - analyze the possibility of calling deploy more than once. - - -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections - -import tensorflow as tf - -from tensorflow.python.ops import control_flow_ops - -slim = tf.contrib.slim - - -__all__ = ['create_clones', - 'deploy', - 'optimize_clones', - 'DeployedModel', - 'DeploymentConfig', - 'Clone', - ] - - -# Namedtuple used to represent a clone during deployment. -Clone = collections.namedtuple('Clone', - ['outputs', # Whatever model_fn() returned. - 'scope', # The scope used to create it. - 'device', # The device used to create. - ]) - -# Namedtuple used to represent a DeployedModel, returned by deploy(). -DeployedModel = collections.namedtuple('DeployedModel', - ['train_op', # The `train_op` - 'summary_op', # The `summary_op` - 'total_loss', # The loss `Tensor` - 'clones', # A list of `Clones` tuples. - ]) - -# Default parameters for DeploymentConfig -_deployment_params = {'num_clones': 1, - 'clone_on_cpu': False, - 'replica_id': 0, - 'num_replicas': 1, - 'num_ps_tasks': 0, - 'worker_job_name': 'worker', - 'ps_job_name': 'ps'} - - -def create_clones(config, model_fn, args=None, kwargs=None): - """Creates multiple clones according to config using a `model_fn`. - - The returned values of `model_fn(*args, **kwargs)` are collected along with - the scope and device used to created it in a namedtuple - `Clone(outputs, scope, device)` - - Note: it is assumed that any loss created by `model_fn` is collected at - the tf.GraphKeys.LOSSES collection. - - To recover the losses, summaries or update_ops created by the clone use: - ```python - losses = tf.get_collection(tf.GraphKeys.LOSSES, clone.scope) - summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, clone.scope) - update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, clone.scope) - ``` - - The deployment options are specified by the config object and support - deploying one or several clones on different GPUs and one or several replicas - of such clones. - - The argument `model_fn` is called `config.num_clones` times to create the - model clones as `model_fn(*args, **kwargs)`. - - If `config` specifies deployment on multiple replicas then the default - tensorflow device is set appropriatly for each call to `model_fn` and for the - slim variable creation functions: model and global variables will be created - on the `ps` device, the clone operations will be on the `worker` device. - - Args: - config: A DeploymentConfig object. - model_fn: A callable. Called as `model_fn(*args, **kwargs)` - args: Optional list of arguments to pass to `model_fn`. - kwargs: Optional list of keyword arguments to pass to `model_fn`. - - Returns: - A list of namedtuples `Clone`. - """ - clones = [] - args = args or [] - kwargs = kwargs or {} - with slim.arg_scope([slim.model_variable, slim.variable], - device=config.variables_device()): - # Create clones. - for i in range(0, config.num_clones): - with tf.name_scope(config.clone_scope(i)) as clone_scope: - clone_device = config.clone_device(i) - with tf.device(clone_device): - with tf.variable_scope(tf.get_variable_scope(), - reuse=True if i > 0 else None): - outputs = model_fn(*args, **kwargs) - clones.append(Clone(outputs, clone_scope, clone_device)) - return clones - - -def _gather_clone_loss(clone, num_clones, regularization_losses): - """Gather the loss for a single clone. - - Args: - clone: A Clone namedtuple. - num_clones: The number of clones being deployed. - regularization_losses: Possibly empty list of regularization_losses - to add to the clone losses. - - Returns: - A tensor for the total loss for the clone. Can be None. - """ - # The return value. - sum_loss = None - # Individual components of the loss that will need summaries. - clone_loss = None - regularization_loss = None - # Compute and aggregate losses on the clone device. - with tf.device(clone.device): - all_losses = [] - clone_losses = tf.get_collection(tf.GraphKeys.LOSSES, clone.scope) - if clone_losses: - clone_loss = tf.add_n(clone_losses, name='clone_loss') - if num_clones > 1: - clone_loss = tf.div(clone_loss, 1.0 * num_clones, - name='scaled_clone_loss') - all_losses.append(clone_loss) - if regularization_losses: - regularization_loss = tf.add_n(regularization_losses, - name='regularization_loss') - all_losses.append(regularization_loss) - if all_losses: - sum_loss = tf.add_n(all_losses) - # Add the summaries out of the clone device block. - if clone_loss is not None: - tf.summary.scalar(clone.scope + '/clone_loss', clone_loss) - if regularization_loss is not None: - tf.summary.scalar('regularization_loss', regularization_loss) - return sum_loss - - -def _optimize_clone(optimizer, clone, num_clones, regularization_losses, - **kwargs): - """Compute losses and gradients for a single clone. - - Args: - optimizer: A tf.Optimizer object. - clone: A Clone namedtuple. - num_clones: The number of clones being deployed. - regularization_losses: Possibly empty list of regularization_losses - to add to the clone losses. - **kwargs: Dict of kwarg to pass to compute_gradients(). - - Returns: - A tuple (clone_loss, clone_grads_and_vars). - - clone_loss: A tensor for the total loss for the clone. Can be None. - - clone_grads_and_vars: List of (gradient, variable) for the clone. - Can be empty. - """ - sum_loss = _gather_clone_loss(clone, num_clones, regularization_losses) - clone_grad = None - if sum_loss is not None: - with tf.device(clone.device): - clone_grad = optimizer.compute_gradients(sum_loss, **kwargs) - return sum_loss, clone_grad - - -def optimize_clones(clones, optimizer, - regularization_losses=None, - **kwargs): - """Compute clone losses and gradients for the given list of `Clones`. - - Note: The regularization_losses are added to the first clone losses. - - Args: - clones: List of `Clones` created by `create_clones()`. - optimizer: An `Optimizer` object. - regularization_losses: Optional list of regularization losses. If None it - will gather them from tf.GraphKeys.REGULARIZATION_LOSSES. Pass `[]` to - exclude them. - **kwargs: Optional list of keyword arguments to pass to `compute_gradients`. - - Returns: - A tuple (total_loss, grads_and_vars). - - total_loss: A Tensor containing the average of the clone losses including - the regularization loss. - - grads_and_vars: A List of tuples (gradient, variable) containing the sum - of the gradients for each variable. - - """ - grads_and_vars = [] - clones_losses = [] - num_clones = len(clones) - if regularization_losses is None: - regularization_losses = tf.get_collection( - tf.GraphKeys.REGULARIZATION_LOSSES) - for clone in clones: - with tf.name_scope(clone.scope): - clone_loss, clone_grad = _optimize_clone( - optimizer, clone, num_clones, regularization_losses, **kwargs) - if clone_loss is not None: - clones_losses.append(clone_loss) - grads_and_vars.append(clone_grad) - # Only use regularization_losses for the first clone - regularization_losses = None - # Compute the total_loss summing all the clones_losses. - total_loss = tf.add_n(clones_losses, name='total_loss') - # Sum the gradients across clones. - grads_and_vars = _sum_clones_gradients(grads_and_vars) - return total_loss, grads_and_vars - - -def deploy(config, - model_fn, - args=None, - kwargs=None, - optimizer=None, - summarize_gradients=False): - """Deploys a Slim-constructed model across multiple clones. - - The deployment options are specified by the config object and support - deploying one or several clones on different GPUs and one or several replicas - of such clones. - - The argument `model_fn` is called `config.num_clones` times to create the - model clones as `model_fn(*args, **kwargs)`. - - The optional argument `optimizer` is an `Optimizer` object. If not `None`, - the deployed model is configured for training with that optimizer. - - If `config` specifies deployment on multiple replicas then the default - tensorflow device is set appropriatly for each call to `model_fn` and for the - slim variable creation functions: model and global variables will be created - on the `ps` device, the clone operations will be on the `worker` device. - - Args: - config: A `DeploymentConfig` object. - model_fn: A callable. Called as `model_fn(*args, **kwargs)` - args: Optional list of arguments to pass to `model_fn`. - kwargs: Optional list of keyword arguments to pass to `model_fn`. - optimizer: Optional `Optimizer` object. If passed the model is deployed - for training with that optimizer. - summarize_gradients: Whether or not add summaries to the gradients. - - Returns: - A `DeployedModel` namedtuple. - - """ - # Gather initial summaries. - summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) - - # Create Clones. - clones = create_clones(config, model_fn, args, kwargs) - first_clone = clones[0] - - # Gather update_ops from the first clone. These contain, for example, - # the updates for the batch_norm variables created by model_fn. - update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone.scope) - - train_op = None - total_loss = None - with tf.device(config.optimizer_device()): - if optimizer: - # Place the global step on the device storing the variables. - with tf.device(config.variables_device()): - global_step = slim.get_or_create_global_step() - - # Compute the gradients for the clones. - total_loss, clones_gradients = optimize_clones(clones, optimizer) - - if clones_gradients: - if summarize_gradients: - # Add summaries to the gradients. - summaries |= set(_add_gradients_summaries(clones_gradients)) - - # Create gradient updates. - grad_updates = optimizer.apply_gradients(clones_gradients, - global_step=global_step) - update_ops.append(grad_updates) - - update_op = tf.group(*update_ops) - train_op = control_flow_ops.with_dependencies([update_op], total_loss, - name='train_op') - else: - clones_losses = [] - regularization_losses = tf.get_collection( - tf.GraphKeys.REGULARIZATION_LOSSES) - for clone in clones: - with tf.name_scope(clone.scope): - clone_loss = _gather_clone_loss(clone, len(clones), - regularization_losses) - if clone_loss is not None: - clones_losses.append(clone_loss) - # Only use regularization_losses for the first clone - regularization_losses = None - if clones_losses: - total_loss = tf.add_n(clones_losses, name='total_loss') - - # Add the summaries from the first clone. These contain the summaries - # created by model_fn and either optimize_clones() or _gather_clone_loss(). - summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES, - first_clone.scope)) - - if total_loss is not None: - # Add total_loss to summary. - summaries.add(tf.summary.scalar('total_loss', total_loss)) - - if summaries: - # Merge all summaries together. - summary_op = tf.summary.merge(list(summaries), name='summary_op') - else: - summary_op = None - - return DeployedModel(train_op, summary_op, total_loss, clones) - - -def _sum_clones_gradients(clone_grads): - """Calculate the sum gradient for each shared variable across all clones. - - This function assumes that the clone_grads has been scaled appropriately by - 1 / num_clones. - - Args: - clone_grads: A List of List of tuples (gradient, variable), one list per - `Clone`. - - Returns: - List of tuples of (gradient, variable) where the gradient has been summed - across all clones. - """ - sum_grads = [] - for grad_and_vars in zip(*clone_grads): - # Note that each grad_and_vars looks like the following: - # ((grad_var0_clone0, var0), ... (grad_varN_cloneN, varN)) - grads = [] - var = grad_and_vars[0][1] - for g, v in grad_and_vars: - assert v == var - if g is not None: - grads.append(g) - if grads: - if len(grads) > 1: - sum_grad = tf.add_n(grads, name=var.op.name + '/sum_grads') - else: - sum_grad = grads[0] - sum_grads.append((sum_grad, var)) - return sum_grads - - -def _add_gradients_summaries(grads_and_vars): - """Add histogram summaries to gradients. - - Note: The summaries are also added to the SUMMARIES collection. - - Args: - grads_and_vars: A list of gradient to variable pairs (tuples). - - Returns: - The _list_ of the added summaries for grads_and_vars. - """ - summaries = [] - for grad, var in grads_and_vars: - if grad is not None: - if isinstance(grad, tf.IndexedSlices): - grad_values = grad.values - else: - grad_values = grad - summaries.append(tf.summary.histogram(var.op.name + ':gradient', - grad_values)) - summaries.append(tf.summary.histogram(var.op.name + ':gradient_norm', - tf.global_norm([grad_values]))) - else: - tf.logging.info('Var %s has no gradient', var.op.name) - return summaries - - -class DeploymentConfig(object): - """Configuration for deploying a model with `deploy()`. - - You can pass an instance of this class to `deploy()` to specify exactly - how to deploy the model to build. If you do not pass one, an instance built - from the default deployment_hparams will be used. - """ - - def __init__(self, - num_clones=1, - clone_on_cpu=False, - replica_id=0, - num_replicas=1, - num_ps_tasks=0, - worker_job_name='worker', - ps_job_name='ps'): - """Create a DeploymentConfig. - - The config describes how to deploy a model across multiple clones and - replicas. The model will be replicated `num_clones` times in each replica. - If `clone_on_cpu` is True, each clone will placed on CPU. - - If `num_replicas` is 1, the model is deployed via a single process. In that - case `worker_device`, `num_ps_tasks`, and `ps_device` are ignored. - - If `num_replicas` is greater than 1, then `worker_device` and `ps_device` - must specify TensorFlow devices for the `worker` and `ps` jobs and - `num_ps_tasks` must be positive. - - Args: - num_clones: Number of model clones to deploy in each replica. - clone_on_cpu: If True clones would be placed on CPU. - replica_id: Integer. Index of the replica for which the model is - deployed. Usually 0 for the chief replica. - num_replicas: Number of replicas to use. - num_ps_tasks: Number of tasks for the `ps` job. 0 to not use replicas. - worker_job_name: A name for the worker job. - ps_job_name: A name for the parameter server job. - - Raises: - ValueError: If the arguments are invalid. - """ - if num_replicas > 1: - if num_ps_tasks < 1: - raise ValueError('When using replicas num_ps_tasks must be positive') - if num_replicas > 1 or num_ps_tasks > 0: - if not worker_job_name: - raise ValueError('Must specify worker_job_name when using replicas') - if not ps_job_name: - raise ValueError('Must specify ps_job_name when using parameter server') - if replica_id >= num_replicas: - raise ValueError('replica_id must be less than num_replicas') - self._num_clones = num_clones - self._clone_on_cpu = clone_on_cpu - self._replica_id = replica_id - self._num_replicas = num_replicas - self._num_ps_tasks = num_ps_tasks - self._ps_device = '/job:' + ps_job_name if num_ps_tasks > 0 else '' - self._worker_device = '/job:' + worker_job_name if num_ps_tasks > 0 else '' - - @property - def num_clones(self): - return self._num_clones - - @property - def clone_on_cpu(self): - return self._clone_on_cpu - - @property - def replica_id(self): - return self._replica_id - - @property - def num_replicas(self): - return self._num_replicas - - @property - def num_ps_tasks(self): - return self._num_ps_tasks - - @property - def ps_device(self): - return self._ps_device - - @property - def worker_device(self): - return self._worker_device - - def caching_device(self): - """Returns the device to use for caching variables. - - Variables are cached on the worker CPU when using replicas. - - Returns: - A device string or None if the variables do not need to be cached. - """ - if self._num_ps_tasks > 0: - return lambda op: op.device - else: - return None - - def clone_device(self, clone_index): - """Device used to create the clone and all the ops inside the clone. - - Args: - clone_index: Int, representing the clone_index. - - Returns: - A value suitable for `tf.device()`. - - Raises: - ValueError: if `clone_index` is greater or equal to the number of clones". - """ - if clone_index >= self._num_clones: - raise ValueError('clone_index must be less than num_clones') - device = '' - if self._num_ps_tasks > 0: - device += self._worker_device - if self._clone_on_cpu: - device += '/device:CPU:0' - else: - if self._num_clones > 1: - device += '/device:GPU:%d' % clone_index - return device - - def clone_scope(self, clone_index): - """Name scope to create the clone. - - Args: - clone_index: Int, representing the clone_index. - - Returns: - A name_scope suitable for `tf.name_scope()`. - - Raises: - ValueError: if `clone_index` is greater or equal to the number of clones". - """ - if clone_index >= self._num_clones: - raise ValueError('clone_index must be less than num_clones') - scope = '' - if self._num_clones > 1: - scope = 'clone_%d' % clone_index - return scope - - def optimizer_device(self): - """Device to use with the optimizer. - - Returns: - A value suitable for `tf.device()`. - """ - if self._num_ps_tasks > 0 or self._num_clones > 0: - return self._worker_device + '/device:CPU:0' - else: - return '' - - def inputs_device(self): - """Device to use to build the inputs. - - Returns: - A value suitable for `tf.device()`. - """ - device = '' - if self._num_ps_tasks > 0: - device += self._worker_device - device += '/device:CPU:0' - return device - - def variables_device(self): - """Returns the device to use for variables created inside the clone. - - Returns: - A value suitable for `tf.device()`. - """ - device = '' - if self._num_ps_tasks > 0: - device += self._ps_device - device += '/device:CPU:0' - - class _PSDeviceChooser(object): - """Slim device chooser for variables when using PS.""" - - def __init__(self, device, tasks): - self._device = device - self._tasks = tasks - self._task = 0 - - def choose(self, op): - if op.device: - return op.device - node_def = op if isinstance(op, tf.NodeDef) else op.node_def - if node_def.op == 'Variable': - t = self._task - self._task = (self._task + 1) % self._tasks - d = '%s/task:%d' % (self._device, t) - return d - else: - return op.device - - if not self._num_ps_tasks: - return device - else: - chooser = _PSDeviceChooser(device, self._num_ps_tasks) - return chooser.choose diff --git a/examples/slim/deployment/model_deploy_test.py b/examples/slim/deployment/model_deploy_test.py deleted file mode 100644 index 57951db9..00000000 --- a/examples/slim/deployment/model_deploy_test.py +++ /dev/null @@ -1,565 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for model_deploy.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow as tf - -from deployment import model_deploy - -slim = tf.contrib.slim - - -class DeploymentConfigTest(tf.test.TestCase): - - def testDefaults(self): - deploy_config = model_deploy.DeploymentConfig() - - self.assertEqual(slim.get_variables(), []) - self.assertEqual(deploy_config.caching_device(), None) - self.assertDeviceEqual(deploy_config.clone_device(0), '') - self.assertEqual(deploy_config.clone_scope(0), '') - self.assertDeviceEqual(deploy_config.optimizer_device(), 'CPU:0') - self.assertDeviceEqual(deploy_config.inputs_device(), 'CPU:0') - self.assertDeviceEqual(deploy_config.variables_device(), 'CPU:0') - - def testCPUonly(self): - deploy_config = model_deploy.DeploymentConfig(clone_on_cpu=True) - - self.assertEqual(deploy_config.caching_device(), None) - self.assertDeviceEqual(deploy_config.clone_device(0), 'CPU:0') - self.assertEqual(deploy_config.clone_scope(0), '') - self.assertDeviceEqual(deploy_config.optimizer_device(), 'CPU:0') - self.assertDeviceEqual(deploy_config.inputs_device(), 'CPU:0') - self.assertDeviceEqual(deploy_config.variables_device(), 'CPU:0') - - def testMultiGPU(self): - deploy_config = model_deploy.DeploymentConfig(num_clones=2) - - self.assertEqual(deploy_config.caching_device(), None) - self.assertDeviceEqual(deploy_config.clone_device(0), 'GPU:0') - self.assertDeviceEqual(deploy_config.clone_device(1), 'GPU:1') - self.assertEqual(deploy_config.clone_scope(0), 'clone_0') - self.assertEqual(deploy_config.clone_scope(1), 'clone_1') - self.assertDeviceEqual(deploy_config.optimizer_device(), 'CPU:0') - self.assertDeviceEqual(deploy_config.inputs_device(), 'CPU:0') - self.assertDeviceEqual(deploy_config.variables_device(), 'CPU:0') - - def testPS(self): - deploy_config = model_deploy.DeploymentConfig(num_clones=1, num_ps_tasks=1) - - self.assertDeviceEqual(deploy_config.clone_device(0), - '/job:worker') - self.assertEqual(deploy_config.clone_scope(0), '') - self.assertDeviceEqual(deploy_config.optimizer_device(), - '/job:worker/device:CPU:0') - self.assertDeviceEqual(deploy_config.inputs_device(), - '/job:worker/device:CPU:0') - with tf.device(deploy_config.variables_device()): - a = tf.Variable(0) - b = tf.Variable(0) - c = tf.no_op() - d = slim.variable('a', [], - caching_device=deploy_config.caching_device()) - self.assertDeviceEqual(a.device, '/job:ps/task:0/device:CPU:0') - self.assertDeviceEqual(a.device, a.value().device) - self.assertDeviceEqual(b.device, '/job:ps/task:0/device:CPU:0') - self.assertDeviceEqual(b.device, b.value().device) - self.assertDeviceEqual(c.device, '') - self.assertDeviceEqual(d.device, '/job:ps/task:0/device:CPU:0') - self.assertDeviceEqual(d.value().device, '') - - def testMultiGPUPS(self): - deploy_config = model_deploy.DeploymentConfig(num_clones=2, num_ps_tasks=1) - - self.assertEqual(deploy_config.caching_device()(tf.no_op()), '') - self.assertDeviceEqual(deploy_config.clone_device(0), - '/job:worker/device:GPU:0') - self.assertDeviceEqual(deploy_config.clone_device(1), - '/job:worker/device:GPU:1') - self.assertEqual(deploy_config.clone_scope(0), 'clone_0') - self.assertEqual(deploy_config.clone_scope(1), 'clone_1') - self.assertDeviceEqual(deploy_config.optimizer_device(), - '/job:worker/device:CPU:0') - self.assertDeviceEqual(deploy_config.inputs_device(), - '/job:worker/device:CPU:0') - - def testReplicasPS(self): - deploy_config = model_deploy.DeploymentConfig(num_replicas=2, - num_ps_tasks=2) - - self.assertDeviceEqual(deploy_config.clone_device(0), - '/job:worker') - self.assertEqual(deploy_config.clone_scope(0), '') - self.assertDeviceEqual(deploy_config.optimizer_device(), - '/job:worker/device:CPU:0') - self.assertDeviceEqual(deploy_config.inputs_device(), - '/job:worker/device:CPU:0') - - def testReplicasMultiGPUPS(self): - deploy_config = model_deploy.DeploymentConfig(num_replicas=2, - num_clones=2, - num_ps_tasks=2) - self.assertDeviceEqual(deploy_config.clone_device(0), - '/job:worker/device:GPU:0') - self.assertDeviceEqual(deploy_config.clone_device(1), - '/job:worker/device:GPU:1') - self.assertEqual(deploy_config.clone_scope(0), 'clone_0') - self.assertEqual(deploy_config.clone_scope(1), 'clone_1') - self.assertDeviceEqual(deploy_config.optimizer_device(), - '/job:worker/device:CPU:0') - self.assertDeviceEqual(deploy_config.inputs_device(), - '/job:worker/device:CPU:0') - - def testVariablesPS(self): - deploy_config = model_deploy.DeploymentConfig(num_ps_tasks=2) - - with tf.device(deploy_config.variables_device()): - a = tf.Variable(0) - b = tf.Variable(0) - c = tf.no_op() - d = slim.variable('a', [], - caching_device=deploy_config.caching_device()) - - self.assertDeviceEqual(a.device, '/job:ps/task:0/device:CPU:0') - self.assertDeviceEqual(a.device, a.value().device) - self.assertDeviceEqual(b.device, '/job:ps/task:1/device:CPU:0') - self.assertDeviceEqual(b.device, b.value().device) - self.assertDeviceEqual(c.device, '') - self.assertDeviceEqual(d.device, '/job:ps/task:0/device:CPU:0') - self.assertDeviceEqual(d.value().device, '') - - -def LogisticClassifier(inputs, labels, scope=None, reuse=None): - with tf.variable_scope(scope, 'LogisticClassifier', [inputs, labels], - reuse=reuse): - predictions = slim.fully_connected(inputs, 1, activation_fn=tf.sigmoid, - scope='fully_connected') - slim.losses.log_loss(predictions, labels) - return predictions - - -def BatchNormClassifier(inputs, labels, scope=None, reuse=None): - with tf.variable_scope(scope, 'BatchNormClassifier', [inputs, labels], - reuse=reuse): - inputs = slim.batch_norm(inputs, decay=0.1) - predictions = slim.fully_connected(inputs, 1, - activation_fn=tf.sigmoid, - scope='fully_connected') - slim.losses.log_loss(predictions, labels) - return predictions - - -class CreatecloneTest(tf.test.TestCase): - - def setUp(self): - # Create an easy training set: - np.random.seed(0) - - self._inputs = np.zeros((16, 4)) - self._labels = np.random.randint(0, 2, size=(16, 1)).astype(np.float32) - self._logdir = self.get_temp_dir() - - for i in range(16): - j = int(2 * self._labels[i] + np.random.randint(0, 2)) - self._inputs[i, j] = 1 - - def testCreateLogisticClassifier(self): - g = tf.Graph() - with g.as_default(): - tf.set_random_seed(0) - tf_inputs = tf.constant(self._inputs, dtype=tf.float32) - tf_labels = tf.constant(self._labels, dtype=tf.float32) - - model_fn = LogisticClassifier - clone_args = (tf_inputs, tf_labels) - deploy_config = model_deploy.DeploymentConfig(num_clones=1) - - self.assertEqual(slim.get_variables(), []) - clones = model_deploy.create_clones(deploy_config, model_fn, clone_args) - clone = clones[0] - self.assertEqual(len(slim.get_variables()), 2) - for v in slim.get_variables(): - self.assertDeviceEqual(v.device, 'CPU:0') - self.assertDeviceEqual(v.value().device, 'CPU:0') - self.assertEqual(clone.outputs.op.name, - 'LogisticClassifier/fully_connected/Sigmoid') - self.assertEqual(clone.scope, '') - self.assertDeviceEqual(clone.device, '') - self.assertEqual(len(slim.losses.get_losses()), 1) - update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) - self.assertEqual(update_ops, []) - - def testCreateSingleclone(self): - g = tf.Graph() - with g.as_default(): - tf.set_random_seed(0) - tf_inputs = tf.constant(self._inputs, dtype=tf.float32) - tf_labels = tf.constant(self._labels, dtype=tf.float32) - - model_fn = BatchNormClassifier - clone_args = (tf_inputs, tf_labels) - deploy_config = model_deploy.DeploymentConfig(num_clones=1) - - self.assertEqual(slim.get_variables(), []) - clones = model_deploy.create_clones(deploy_config, model_fn, clone_args) - clone = clones[0] - self.assertEqual(len(slim.get_variables()), 5) - for v in slim.get_variables(): - self.assertDeviceEqual(v.device, 'CPU:0') - self.assertDeviceEqual(v.value().device, 'CPU:0') - self.assertEqual(clone.outputs.op.name, - 'BatchNormClassifier/fully_connected/Sigmoid') - self.assertEqual(clone.scope, '') - self.assertDeviceEqual(clone.device, '') - self.assertEqual(len(slim.losses.get_losses()), 1) - update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) - self.assertEqual(len(update_ops), 2) - - def testCreateMulticlone(self): - g = tf.Graph() - with g.as_default(): - tf.set_random_seed(0) - tf_inputs = tf.constant(self._inputs, dtype=tf.float32) - tf_labels = tf.constant(self._labels, dtype=tf.float32) - - model_fn = BatchNormClassifier - clone_args = (tf_inputs, tf_labels) - num_clones = 4 - deploy_config = model_deploy.DeploymentConfig(num_clones=num_clones) - - self.assertEqual(slim.get_variables(), []) - clones = model_deploy.create_clones(deploy_config, model_fn, clone_args) - self.assertEqual(len(slim.get_variables()), 5) - for v in slim.get_variables(): - self.assertDeviceEqual(v.device, 'CPU:0') - self.assertDeviceEqual(v.value().device, 'CPU:0') - self.assertEqual(len(clones), num_clones) - for i, clone in enumerate(clones): - self.assertEqual( - clone.outputs.op.name, - 'clone_%d/BatchNormClassifier/fully_connected/Sigmoid' % i) - update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, clone.scope) - self.assertEqual(len(update_ops), 2) - self.assertEqual(clone.scope, 'clone_%d/' % i) - self.assertDeviceEqual(clone.device, 'GPU:%d' % i) - - def testCreateOnecloneWithPS(self): - g = tf.Graph() - with g.as_default(): - tf.set_random_seed(0) - tf_inputs = tf.constant(self._inputs, dtype=tf.float32) - tf_labels = tf.constant(self._labels, dtype=tf.float32) - - model_fn = BatchNormClassifier - clone_args = (tf_inputs, tf_labels) - deploy_config = model_deploy.DeploymentConfig(num_clones=1, - num_ps_tasks=1) - - self.assertEqual(slim.get_variables(), []) - clones = model_deploy.create_clones(deploy_config, model_fn, clone_args) - self.assertEqual(len(clones), 1) - clone = clones[0] - self.assertEqual(clone.outputs.op.name, - 'BatchNormClassifier/fully_connected/Sigmoid') - self.assertDeviceEqual(clone.device, '/job:worker') - self.assertEqual(clone.scope, '') - self.assertEqual(len(slim.get_variables()), 5) - for v in slim.get_variables(): - self.assertDeviceEqual(v.device, '/job:ps/task:0/CPU:0') - self.assertDeviceEqual(v.device, v.value().device) - - def testCreateMulticloneWithPS(self): - g = tf.Graph() - with g.as_default(): - tf.set_random_seed(0) - tf_inputs = tf.constant(self._inputs, dtype=tf.float32) - tf_labels = tf.constant(self._labels, dtype=tf.float32) - - model_fn = BatchNormClassifier - clone_args = (tf_inputs, tf_labels) - deploy_config = model_deploy.DeploymentConfig(num_clones=2, - num_ps_tasks=2) - - self.assertEqual(slim.get_variables(), []) - clones = model_deploy.create_clones(deploy_config, model_fn, clone_args) - self.assertEqual(len(slim.get_variables()), 5) - for i, v in enumerate(slim.get_variables()): - t = i % 2 - self.assertDeviceEqual(v.device, '/job:ps/task:%d/device:CPU:0' % t) - self.assertDeviceEqual(v.device, v.value().device) - self.assertEqual(len(clones), 2) - for i, clone in enumerate(clones): - self.assertEqual( - clone.outputs.op.name, - 'clone_%d/BatchNormClassifier/fully_connected/Sigmoid' % i) - self.assertEqual(clone.scope, 'clone_%d/' % i) - self.assertDeviceEqual(clone.device, '/job:worker/device:GPU:%d' % i) - - -class OptimizeclonesTest(tf.test.TestCase): - - def setUp(self): - # Create an easy training set: - np.random.seed(0) - - self._inputs = np.zeros((16, 4)) - self._labels = np.random.randint(0, 2, size=(16, 1)).astype(np.float32) - self._logdir = self.get_temp_dir() - - for i in range(16): - j = int(2 * self._labels[i] + np.random.randint(0, 2)) - self._inputs[i, j] = 1 - - def testCreateLogisticClassifier(self): - g = tf.Graph() - with g.as_default(): - tf.set_random_seed(0) - tf_inputs = tf.constant(self._inputs, dtype=tf.float32) - tf_labels = tf.constant(self._labels, dtype=tf.float32) - - model_fn = LogisticClassifier - clone_args = (tf_inputs, tf_labels) - deploy_config = model_deploy.DeploymentConfig(num_clones=1) - - self.assertEqual(slim.get_variables(), []) - clones = model_deploy.create_clones(deploy_config, model_fn, clone_args) - self.assertEqual(len(slim.get_variables()), 2) - update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) - self.assertEqual(update_ops, []) - - optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0) - total_loss, grads_and_vars = model_deploy.optimize_clones(clones, - optimizer) - self.assertEqual(len(grads_and_vars), len(tf.trainable_variables())) - self.assertEqual(total_loss.op.name, 'total_loss') - for g, v in grads_and_vars: - self.assertDeviceEqual(g.device, '') - self.assertDeviceEqual(v.device, 'CPU:0') - - def testCreateSingleclone(self): - g = tf.Graph() - with g.as_default(): - tf.set_random_seed(0) - tf_inputs = tf.constant(self._inputs, dtype=tf.float32) - tf_labels = tf.constant(self._labels, dtype=tf.float32) - - model_fn = BatchNormClassifier - clone_args = (tf_inputs, tf_labels) - deploy_config = model_deploy.DeploymentConfig(num_clones=1) - - self.assertEqual(slim.get_variables(), []) - clones = model_deploy.create_clones(deploy_config, model_fn, clone_args) - self.assertEqual(len(slim.get_variables()), 5) - update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) - self.assertEqual(len(update_ops), 2) - - optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0) - total_loss, grads_and_vars = model_deploy.optimize_clones(clones, - optimizer) - self.assertEqual(len(grads_and_vars), len(tf.trainable_variables())) - self.assertEqual(total_loss.op.name, 'total_loss') - for g, v in grads_and_vars: - self.assertDeviceEqual(g.device, '') - self.assertDeviceEqual(v.device, 'CPU:0') - - def testCreateMulticlone(self): - g = tf.Graph() - with g.as_default(): - tf.set_random_seed(0) - tf_inputs = tf.constant(self._inputs, dtype=tf.float32) - tf_labels = tf.constant(self._labels, dtype=tf.float32) - - model_fn = BatchNormClassifier - clone_args = (tf_inputs, tf_labels) - num_clones = 4 - deploy_config = model_deploy.DeploymentConfig(num_clones=num_clones) - - self.assertEqual(slim.get_variables(), []) - clones = model_deploy.create_clones(deploy_config, model_fn, clone_args) - self.assertEqual(len(slim.get_variables()), 5) - update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) - self.assertEqual(len(update_ops), num_clones * 2) - - optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0) - total_loss, grads_and_vars = model_deploy.optimize_clones(clones, - optimizer) - self.assertEqual(len(grads_and_vars), len(tf.trainable_variables())) - self.assertEqual(total_loss.op.name, 'total_loss') - for g, v in grads_and_vars: - self.assertDeviceEqual(g.device, '') - self.assertDeviceEqual(v.device, 'CPU:0') - - def testCreateMulticloneCPU(self): - g = tf.Graph() - with g.as_default(): - tf.set_random_seed(0) - tf_inputs = tf.constant(self._inputs, dtype=tf.float32) - tf_labels = tf.constant(self._labels, dtype=tf.float32) - - model_fn = BatchNormClassifier - model_args = (tf_inputs, tf_labels) - num_clones = 4 - deploy_config = model_deploy.DeploymentConfig(num_clones=num_clones, - clone_on_cpu=True) - - self.assertEqual(slim.get_variables(), []) - clones = model_deploy.create_clones(deploy_config, model_fn, model_args) - self.assertEqual(len(slim.get_variables()), 5) - update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) - self.assertEqual(len(update_ops), num_clones * 2) - - optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0) - total_loss, grads_and_vars = model_deploy.optimize_clones(clones, - optimizer) - self.assertEqual(len(grads_and_vars), len(tf.trainable_variables())) - self.assertEqual(total_loss.op.name, 'total_loss') - for g, v in grads_and_vars: - self.assertDeviceEqual(g.device, '') - self.assertDeviceEqual(v.device, 'CPU:0') - - def testCreateOnecloneWithPS(self): - g = tf.Graph() - with g.as_default(): - tf.set_random_seed(0) - tf_inputs = tf.constant(self._inputs, dtype=tf.float32) - tf_labels = tf.constant(self._labels, dtype=tf.float32) - - model_fn = BatchNormClassifier - model_args = (tf_inputs, tf_labels) - deploy_config = model_deploy.DeploymentConfig(num_clones=1, - num_ps_tasks=1) - - self.assertEqual(slim.get_variables(), []) - clones = model_deploy.create_clones(deploy_config, model_fn, model_args) - self.assertEqual(len(slim.get_variables()), 5) - update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) - self.assertEqual(len(update_ops), 2) - - optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0) - total_loss, grads_and_vars = model_deploy.optimize_clones(clones, - optimizer) - self.assertEqual(len(grads_and_vars), len(tf.trainable_variables())) - self.assertEqual(total_loss.op.name, 'total_loss') - for g, v in grads_and_vars: - self.assertDeviceEqual(g.device, '/job:worker') - self.assertDeviceEqual(v.device, '/job:ps/task:0/CPU:0') - - -class DeployTest(tf.test.TestCase): - - def setUp(self): - # Create an easy training set: - np.random.seed(0) - - self._inputs = np.zeros((16, 4)) - self._labels = np.random.randint(0, 2, size=(16, 1)).astype(np.float32) - self._logdir = self.get_temp_dir() - - for i in range(16): - j = int(2 * self._labels[i] + np.random.randint(0, 2)) - self._inputs[i, j] = 1 - - def testLocalTrainOp(self): - g = tf.Graph() - with g.as_default(): - tf.set_random_seed(0) - tf_inputs = tf.constant(self._inputs, dtype=tf.float32) - tf_labels = tf.constant(self._labels, dtype=tf.float32) - - model_fn = BatchNormClassifier - model_args = (tf_inputs, tf_labels) - deploy_config = model_deploy.DeploymentConfig(num_clones=2, - clone_on_cpu=True) - - optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0) - - self.assertEqual(slim.get_variables(), []) - model = model_deploy.deploy(deploy_config, model_fn, model_args, - optimizer=optimizer) - - update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) - self.assertEqual(len(update_ops), 4) - self.assertEqual(len(model.clones), 2) - self.assertEqual(model.total_loss.op.name, 'total_loss') - self.assertEqual(model.summary_op.op.name, 'summary_op/summary_op') - self.assertEqual(model.train_op.op.name, 'train_op') - - with tf.Session() as sess: - sess.run(tf.global_variables_initializer()) - moving_mean = tf.contrib.framework.get_variables_by_name( - 'moving_mean')[0] - moving_variance = tf.contrib.framework.get_variables_by_name( - 'moving_variance')[0] - initial_loss = sess.run(model.total_loss) - initial_mean, initial_variance = sess.run([moving_mean, - moving_variance]) - self.assertAllClose(initial_mean, [0.0, 0.0, 0.0, 0.0]) - self.assertAllClose(initial_variance, [1.0, 1.0, 1.0, 1.0]) - for _ in range(10): - sess.run(model.train_op) - final_loss = sess.run(model.total_loss) - self.assertLess(final_loss, initial_loss / 10.0) - - final_mean, final_variance = sess.run([moving_mean, - moving_variance]) - self.assertAllClose(final_mean, [0.125, 0.25, 0.375, 0.25]) - self.assertAllClose(final_variance, [0.109375, 0.1875, - 0.234375, 0.1875]) - - def testNoSummariesOnGPU(self): - with tf.Graph().as_default(): - deploy_config = model_deploy.DeploymentConfig(num_clones=2) - - # clone function creates a fully_connected layer with a regularizer loss. - def ModelFn(): - inputs = tf.constant(1.0, shape=(10, 20), dtype=tf.float32) - reg = tf.contrib.layers.l2_regularizer(0.001) - tf.contrib.layers.fully_connected(inputs, 30, weights_regularizer=reg) - - model = model_deploy.deploy( - deploy_config, ModelFn, - optimizer=tf.train.GradientDescentOptimizer(1.0)) - # The model summary op should have a few summary inputs and all of them - # should be on the CPU. - self.assertTrue(model.summary_op.op.inputs) - for inp in model.summary_op.op.inputs: - self.assertEqual('/device:CPU:0', inp.device) - - def testNoSummariesOnGPUForEvals(self): - with tf.Graph().as_default(): - deploy_config = model_deploy.DeploymentConfig(num_clones=2) - - # clone function creates a fully_connected layer with a regularizer loss. - def ModelFn(): - inputs = tf.constant(1.0, shape=(10, 20), dtype=tf.float32) - reg = tf.contrib.layers.l2_regularizer(0.001) - tf.contrib.layers.fully_connected(inputs, 30, weights_regularizer=reg) - - # No optimizer here, it's an eval. - model = model_deploy.deploy(deploy_config, ModelFn) - # The model summary op should have a few summary inputs and all of them - # should be on the CPU. - self.assertTrue(model.summary_op.op.inputs) - for inp in model.summary_op.op.inputs: - self.assertEqual('/device:CPU:0', inp.device) - - -if __name__ == '__main__': - tf.test.main() diff --git a/examples/slim/download_and_convert_data.py b/examples/slim/download_and_convert_data.py deleted file mode 100644 index 15dd0a77..00000000 --- a/examples/slim/download_and_convert_data.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -r"""Downloads and converts a particular dataset. - -Usage: -```shell - -$ python download_and_convert_data.py \ - --dataset_name=mnist \ - --dataset_dir=/tmp/mnist - -$ python download_and_convert_data.py \ - --dataset_name=cifar10 \ - --dataset_dir=/tmp/cifar10 - -$ python download_and_convert_data.py \ - --dataset_name=flowers \ - --dataset_dir=/tmp/flowers -``` -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from datasets import download_and_convert_cifar10 -from datasets import download_and_convert_flowers -from datasets import download_and_convert_mnist - -FLAGS = tf.app.flags.FLAGS - -tf.app.flags.DEFINE_string( - 'dataset_name', - None, - 'The name of the dataset to convert, one of "cifar10", "flowers", "mnist".') - -tf.app.flags.DEFINE_string( - 'dataset_dir', - None, - 'The directory where the output TFRecords and temporary files are saved.') - - -def main(_): - if not FLAGS.dataset_name: - raise ValueError('You must supply the dataset name with --dataset_name') - if not FLAGS.dataset_dir: - raise ValueError('You must supply the dataset directory with --dataset_dir') - - if FLAGS.dataset_name == 'cifar10': - download_and_convert_cifar10.run(FLAGS.dataset_dir) - elif FLAGS.dataset_name == 'flowers': - download_and_convert_flowers.run(FLAGS.dataset_dir) - elif FLAGS.dataset_name == 'mnist': - download_and_convert_mnist.run(FLAGS.dataset_dir) - else: - raise ValueError( - 'dataset_name [%s] was not recognized.' % FLAGS.dataset_dir) - -if __name__ == '__main__': - tf.app.run() - diff --git a/examples/slim/eval_image_classifier.py b/examples/slim/eval_image_classifier.py deleted file mode 100644 index bf80cd55..00000000 --- a/examples/slim/eval_image_classifier.py +++ /dev/null @@ -1,205 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Generic evaluation script that evaluates a model using a given dataset.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from pyspark.context import SparkContext -from pyspark.conf import SparkConf -from tensorflowonspark import TFCluster, TFNode -from datetime import datetime -import sys - -def main_fun(argv, ctx): - import math - import six - import tensorflow as tf - - from datasets import dataset_factory - from nets import nets_factory - from preprocessing import preprocessing_factory - - sys.argv = argv - - slim = tf.contrib.slim - - tf.app.flags.DEFINE_integer( - 'batch_size', 100, 'The number of samples in each batch.') - - tf.app.flags.DEFINE_integer( - 'max_num_batches', None, - 'Max number of batches to evaluate by default use all.') - - tf.app.flags.DEFINE_string( - 'master', '', 'The address of the TensorFlow master to use.') - - tf.app.flags.DEFINE_string( - 'checkpoint_path', '/tmp/tfmodel/', - 'The directory where the model was written to or an absolute path to a ' - 'checkpoint file.') - - tf.app.flags.DEFINE_string( - 'eval_dir', '/tmp/tfmodel/', 'Directory where the results are saved to.') - - tf.app.flags.DEFINE_integer( - 'num_preprocessing_threads', 4, - 'The number of threads used to create the batches.') - - tf.app.flags.DEFINE_string( - 'dataset_name', 'imagenet', 'The name of the dataset to load.') - - tf.app.flags.DEFINE_string( - 'dataset_split_name', 'test', 'The name of the train/test split.') - - tf.app.flags.DEFINE_string( - 'dataset_dir', None, 'The directory where the dataset files are stored.') - - tf.app.flags.DEFINE_integer( - 'labels_offset', 0, - 'An offset for the labels in the dataset. This flag is primarily used to ' - 'evaluate the VGG and ResNet architectures which do not use a background ' - 'class for the ImageNet dataset.') - - tf.app.flags.DEFINE_string( - 'model_name', 'inception_v3', 'The name of the architecture to evaluate.') - - tf.app.flags.DEFINE_string( - 'preprocessing_name', None, 'The name of the preprocessing to use. If left ' - 'as `None`, then the model_name flag is used.') - - tf.app.flags.DEFINE_float( - 'moving_average_decay', None, - 'The decay to use for the moving average.' - 'If left as None, then moving averages are not used.') - - tf.app.flags.DEFINE_integer( - 'eval_image_size', None, 'Eval image size') - - FLAGS = tf.app.flags.FLAGS - - if not FLAGS.dataset_dir: - raise ValueError('You must supply the dataset directory with --dataset_dir') - - cluster_spec, server = TFNode.start_cluster_server(ctx) - - tf.logging.set_verbosity(tf.logging.INFO) - with tf.Graph().as_default(): - #tf_global_step = slim.get_or_create_global_step() - tf_global_step = tf.Variable(0, name="global_step") - - ###################### - # Select the dataset # - ###################### - dataset = dataset_factory.get_dataset( - FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) - - #################### - # Select the model # - #################### - network_fn = nets_factory.get_network_fn( - FLAGS.model_name, - num_classes=(dataset.num_classes - FLAGS.labels_offset), - is_training=False) - - ############################################################## - # Create a dataset provider that loads data from the dataset # - ############################################################## - provider = slim.dataset_data_provider.DatasetDataProvider( - dataset, - shuffle=False, - common_queue_capacity=2 * FLAGS.batch_size, - common_queue_min=FLAGS.batch_size) - [image, label] = provider.get(['image', 'label']) - label -= FLAGS.labels_offset - - ##################################### - # Select the preprocessing function # - ##################################### - preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name - image_preprocessing_fn = preprocessing_factory.get_preprocessing( - preprocessing_name, - is_training=False) - - eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size - - image = image_preprocessing_fn(image, eval_image_size, eval_image_size) - - images, labels = tf.train.batch( - [image, label], - batch_size=FLAGS.batch_size, - num_threads=FLAGS.num_preprocessing_threads, - capacity=5 * FLAGS.batch_size) - - #################### - # Define the model # - #################### - logits, _ = network_fn(images) - - if FLAGS.moving_average_decay: - variable_averages = tf.train.ExponentialMovingAverage( - FLAGS.moving_average_decay, tf_global_step) - variables_to_restore = variable_averages.variables_to_restore( - slim.get_model_variables()) - variables_to_restore[tf_global_step.op.name] = tf_global_step - else: - variables_to_restore = slim.get_variables_to_restore() - - predictions = tf.argmax(logits, 1) - labels = tf.squeeze(labels) - - # Define the metrics: - names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ - 'Accuracy': slim.metrics.streaming_accuracy(predictions, labels), - 'Recall_5': slim.metrics.streaming_recall_at_k( - logits, labels, 5), - }) - - # Print the summaries to screen. - for name, value in six.iteritems(names_to_values): - summary_name = 'eval/%s' % name - op = tf.summary.scalar(summary_name, value, collections=[]) - op = tf.Print(op, [value], summary_name) - tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) - - # TODO(sguada) use num_epochs=1 - if FLAGS.max_num_batches: - num_batches = FLAGS.max_num_batches - else: - # This ensures that we make a single pass over all of the data. - num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size)) - - if tf.gfile.IsDirectory(FLAGS.checkpoint_path): - checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) - else: - checkpoint_path = FLAGS.checkpoint_path - - tf.logging.info('Evaluating %s' % checkpoint_path) - - slim.evaluation.evaluate_once( - master=FLAGS.master, - checkpoint_path=checkpoint_path, - logdir=FLAGS.eval_dir, - num_evals=num_batches, - eval_op=list(names_to_updates.values()), - variables_to_restore=variables_to_restore) - - -if __name__ == '__main__': - sc = SparkContext(conf=SparkConf().setAppName("eval_image_classifier")) - num_executors = int(sc._conf.get("spark.executor.instances")) - cluster = TFCluster.run(sc, main_fun, sys.argv, num_executors, 0, False, TFCluster.InputMode.TENSORFLOW) - cluster.shutdown() diff --git a/examples/slim/nets/__init__.py b/examples/slim/nets/__init__.py deleted file mode 100644 index 8b137891..00000000 --- a/examples/slim/nets/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/examples/slim/nets/alexnet.py b/examples/slim/nets/alexnet.py deleted file mode 100644 index 4e7e563c..00000000 --- a/examples/slim/nets/alexnet.py +++ /dev/null @@ -1,125 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Contains a model definition for AlexNet. - -This work was first described in: - ImageNet Classification with Deep Convolutional Neural Networks - Alex Krizhevsky, Ilya Sutskever and Geoffrey E. Hinton - -and later refined in: - One weird trick for parallelizing convolutional neural networks - Alex Krizhevsky, 2014 - -Here we provide the implementation proposed in "One weird trick" and not -"ImageNet Classification", as per the paper, the LRN layers have been removed. - -Usage: - with slim.arg_scope(alexnet.alexnet_v2_arg_scope()): - outputs, end_points = alexnet.alexnet_v2(inputs) - -@@alexnet_v2 -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -slim = tf.contrib.slim -trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev) - - -def alexnet_v2_arg_scope(weight_decay=0.0005): - with slim.arg_scope([slim.conv2d, slim.fully_connected], - activation_fn=tf.nn.relu, - biases_initializer=tf.constant_initializer(0.1), - weights_regularizer=slim.l2_regularizer(weight_decay)): - with slim.arg_scope([slim.conv2d], padding='SAME'): - with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc: - return arg_sc - - -def alexnet_v2(inputs, - num_classes=1000, - is_training=True, - dropout_keep_prob=0.5, - spatial_squeeze=True, - scope='alexnet_v2'): - """AlexNet version 2. - - Described in: http://arxiv.org/pdf/1404.5997v2.pdf - Parameters from: - github.com/akrizhevsky/cuda-convnet2/blob/master/layers/ - layers-imagenet-1gpu.cfg - - Note: All the fully_connected layers have been transformed to conv2d layers. - To use in classification mode, resize input to 224x224. To use in fully - convolutional mode, set spatial_squeeze to false. - The LRN layers have been removed and change the initializers from - random_normal_initializer to xavier_initializer. - - Args: - inputs: a tensor of size [batch_size, height, width, channels]. - num_classes: number of predicted classes. - is_training: whether or not the model is being trained. - dropout_keep_prob: the probability that activations are kept in the dropout - layers during training. - spatial_squeeze: whether or not should squeeze the spatial dimensions of the - outputs. Useful to remove unnecessary dimensions for classification. - scope: Optional scope for the variables. - - Returns: - the last op containing the log predictions and end_points dict. - """ - with tf.variable_scope(scope, 'alexnet_v2', [inputs]) as sc: - end_points_collection = sc.name + '_end_points' - # Collect outputs for conv2d, fully_connected and max_pool2d. - with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], - outputs_collections=[end_points_collection]): - net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID', - scope='conv1') - net = slim.max_pool2d(net, [3, 3], 2, scope='pool1') - net = slim.conv2d(net, 192, [5, 5], scope='conv2') - net = slim.max_pool2d(net, [3, 3], 2, scope='pool2') - net = slim.conv2d(net, 384, [3, 3], scope='conv3') - net = slim.conv2d(net, 384, [3, 3], scope='conv4') - net = slim.conv2d(net, 256, [3, 3], scope='conv5') - net = slim.max_pool2d(net, [3, 3], 2, scope='pool5') - - # Use conv2d instead of fully_connected layers. - with slim.arg_scope([slim.conv2d], - weights_initializer=trunc_normal(0.005), - biases_initializer=tf.constant_initializer(0.1)): - net = slim.conv2d(net, 4096, [5, 5], padding='VALID', - scope='fc6') - net = slim.dropout(net, dropout_keep_prob, is_training=is_training, - scope='dropout6') - net = slim.conv2d(net, 4096, [1, 1], scope='fc7') - net = slim.dropout(net, dropout_keep_prob, is_training=is_training, - scope='dropout7') - net = slim.conv2d(net, num_classes, [1, 1], - activation_fn=None, - normalizer_fn=None, - biases_initializer=tf.zeros_initializer(), - scope='fc8') - - # Convert end_points_collection into a end_point dict. - end_points = slim.utils.convert_collection_to_dict(end_points_collection) - if spatial_squeeze: - net = tf.squeeze(net, [1, 2], name='fc8/squeezed') - end_points[sc.name + '/fc8'] = net - return net, end_points -alexnet_v2.default_image_size = 224 diff --git a/examples/slim/nets/alexnet_test.py b/examples/slim/nets/alexnet_test.py deleted file mode 100644 index 0e562fbb..00000000 --- a/examples/slim/nets/alexnet_test.py +++ /dev/null @@ -1,145 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for slim.nets.alexnet.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from nets import alexnet - -slim = tf.contrib.slim - - -class AlexnetV2Test(tf.test.TestCase): - - def testBuild(self): - batch_size = 5 - height, width = 224, 224 - num_classes = 1000 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, _ = alexnet.alexnet_v2(inputs, num_classes) - self.assertEquals(logits.op.name, 'alexnet_v2/fc8/squeezed') - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, num_classes]) - - def testFullyConvolutional(self): - batch_size = 1 - height, width = 300, 400 - num_classes = 1000 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, _ = alexnet.alexnet_v2(inputs, num_classes, spatial_squeeze=False) - self.assertEquals(logits.op.name, 'alexnet_v2/fc8/BiasAdd') - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, 4, 7, num_classes]) - - def testEndPoints(self): - batch_size = 5 - height, width = 224, 224 - num_classes = 1000 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - _, end_points = alexnet.alexnet_v2(inputs, num_classes) - expected_names = ['alexnet_v2/conv1', - 'alexnet_v2/pool1', - 'alexnet_v2/conv2', - 'alexnet_v2/pool2', - 'alexnet_v2/conv3', - 'alexnet_v2/conv4', - 'alexnet_v2/conv5', - 'alexnet_v2/pool5', - 'alexnet_v2/fc6', - 'alexnet_v2/fc7', - 'alexnet_v2/fc8' - ] - self.assertSetEqual(set(end_points.keys()), set(expected_names)) - - def testModelVariables(self): - batch_size = 5 - height, width = 224, 224 - num_classes = 1000 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - alexnet.alexnet_v2(inputs, num_classes) - expected_names = ['alexnet_v2/conv1/weights', - 'alexnet_v2/conv1/biases', - 'alexnet_v2/conv2/weights', - 'alexnet_v2/conv2/biases', - 'alexnet_v2/conv3/weights', - 'alexnet_v2/conv3/biases', - 'alexnet_v2/conv4/weights', - 'alexnet_v2/conv4/biases', - 'alexnet_v2/conv5/weights', - 'alexnet_v2/conv5/biases', - 'alexnet_v2/fc6/weights', - 'alexnet_v2/fc6/biases', - 'alexnet_v2/fc7/weights', - 'alexnet_v2/fc7/biases', - 'alexnet_v2/fc8/weights', - 'alexnet_v2/fc8/biases', - ] - model_variables = [v.op.name for v in slim.get_model_variables()] - self.assertSetEqual(set(model_variables), set(expected_names)) - - def testEvaluation(self): - batch_size = 2 - height, width = 224, 224 - num_classes = 1000 - with self.test_session(): - eval_inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, _ = alexnet.alexnet_v2(eval_inputs, is_training=False) - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, num_classes]) - predictions = tf.argmax(logits, 1) - self.assertListEqual(predictions.get_shape().as_list(), [batch_size]) - - def testTrainEvalWithReuse(self): - train_batch_size = 2 - eval_batch_size = 1 - train_height, train_width = 224, 224 - eval_height, eval_width = 300, 400 - num_classes = 1000 - with self.test_session(): - train_inputs = tf.random_uniform( - (train_batch_size, train_height, train_width, 3)) - logits, _ = alexnet.alexnet_v2(train_inputs) - self.assertListEqual(logits.get_shape().as_list(), - [train_batch_size, num_classes]) - tf.get_variable_scope().reuse_variables() - eval_inputs = tf.random_uniform( - (eval_batch_size, eval_height, eval_width, 3)) - logits, _ = alexnet.alexnet_v2(eval_inputs, is_training=False, - spatial_squeeze=False) - self.assertListEqual(logits.get_shape().as_list(), - [eval_batch_size, 4, 7, num_classes]) - logits = tf.reduce_mean(logits, [1, 2]) - predictions = tf.argmax(logits, 1) - self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size]) - - def testForward(self): - batch_size = 1 - height, width = 224, 224 - with self.test_session() as sess: - inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, _ = alexnet.alexnet_v2(inputs) - sess.run(tf.global_variables_initializer()) - output = sess.run(logits) - self.assertTrue(output.any()) - -if __name__ == '__main__': - tf.test.main() diff --git a/examples/slim/nets/cifarnet.py b/examples/slim/nets/cifarnet.py deleted file mode 100644 index 44ca0fed..00000000 --- a/examples/slim/nets/cifarnet.py +++ /dev/null @@ -1,112 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Contains a variant of the CIFAR-10 model definition.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -slim = tf.contrib.slim - -trunc_normal = lambda stddev: tf.truncated_normal_initializer(stddev=stddev) - - -def cifarnet(images, num_classes=10, is_training=False, - dropout_keep_prob=0.5, - prediction_fn=slim.softmax, - scope='CifarNet'): - """Creates a variant of the CifarNet model. - - Note that since the output is a set of 'logits', the values fall in the - interval of (-infinity, infinity). Consequently, to convert the outputs to a - probability distribution over the characters, one will need to convert them - using the softmax function: - - logits = cifarnet.cifarnet(images, is_training=False) - probabilities = tf.nn.softmax(logits) - predictions = tf.argmax(logits, 1) - - Args: - images: A batch of `Tensors` of size [batch_size, height, width, channels]. - num_classes: the number of classes in the dataset. - is_training: specifies whether or not we're currently training the model. - This variable will determine the behaviour of the dropout layer. - dropout_keep_prob: the percentage of activation values that are retained. - prediction_fn: a function to get predictions out of logits. - scope: Optional variable_scope. - - Returns: - logits: the pre-softmax activations, a tensor of size - [batch_size, `num_classes`] - end_points: a dictionary from components of the network to the corresponding - activation. - """ - end_points = {} - - with tf.variable_scope(scope, 'CifarNet', [images, num_classes]): - net = slim.conv2d(images, 64, [5, 5], scope='conv1') - end_points['conv1'] = net - net = slim.max_pool2d(net, [2, 2], 2, scope='pool1') - end_points['pool1'] = net - net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm1') - net = slim.conv2d(net, 64, [5, 5], scope='conv2') - end_points['conv2'] = net - net = tf.nn.lrn(net, 4, bias=1.0, alpha=0.001/9.0, beta=0.75, name='norm2') - net = slim.max_pool2d(net, [2, 2], 2, scope='pool2') - end_points['pool2'] = net - net = slim.flatten(net) - end_points['Flatten'] = net - net = slim.fully_connected(net, 384, scope='fc3') - end_points['fc3'] = net - net = slim.dropout(net, dropout_keep_prob, is_training=is_training, - scope='dropout3') - net = slim.fully_connected(net, 192, scope='fc4') - end_points['fc4'] = net - logits = slim.fully_connected(net, num_classes, - biases_initializer=tf.zeros_initializer(), - weights_initializer=trunc_normal(1/192.0), - weights_regularizer=None, - activation_fn=None, - scope='logits') - - end_points['Logits'] = logits - end_points['Predictions'] = prediction_fn(logits, scope='Predictions') - - return logits, end_points -cifarnet.default_image_size = 32 - - -def cifarnet_arg_scope(weight_decay=0.004): - """Defines the default cifarnet argument scope. - - Args: - weight_decay: The weight decay to use for regularizing the model. - - Returns: - An `arg_scope` to use for the inception v3 model. - """ - with slim.arg_scope( - [slim.conv2d], - weights_initializer=tf.truncated_normal_initializer(stddev=5e-2), - activation_fn=tf.nn.relu): - with slim.arg_scope( - [slim.fully_connected], - biases_initializer=tf.constant_initializer(0.1), - weights_initializer=trunc_normal(0.04), - weights_regularizer=slim.l2_regularizer(weight_decay), - activation_fn=tf.nn.relu) as sc: - return sc diff --git a/examples/slim/nets/inception.py b/examples/slim/nets/inception.py deleted file mode 100644 index 806c30be..00000000 --- a/examples/slim/nets/inception.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Brings all inception models under one namespace.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# pylint: disable=unused-import -from nets.inception_resnet_v2 import inception_resnet_v2 -from nets.inception_resnet_v2 import inception_resnet_v2_arg_scope -from nets.inception_v1 import inception_v1 -from nets.inception_v1 import inception_v1_arg_scope -from nets.inception_v1 import inception_v1_base -from nets.inception_v2 import inception_v2 -from nets.inception_v2 import inception_v2_arg_scope -from nets.inception_v2 import inception_v2_base -from nets.inception_v3 import inception_v3 -from nets.inception_v3 import inception_v3_arg_scope -from nets.inception_v3 import inception_v3_base -from nets.inception_v4 import inception_v4 -from nets.inception_v4 import inception_v4_arg_scope -from nets.inception_v4 import inception_v4_base -# pylint: enable=unused-import diff --git a/examples/slim/nets/inception_resnet_v2.py b/examples/slim/nets/inception_resnet_v2.py deleted file mode 100644 index b5a54c5b..00000000 --- a/examples/slim/nets/inception_resnet_v2.py +++ /dev/null @@ -1,280 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Contains the definition of the Inception Resnet V2 architecture. - -As described in http://arxiv.org/abs/1602.07261. - - Inception-v4, Inception-ResNet and the Impact of Residual Connections - on Learning - Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -import tensorflow as tf - -slim = tf.contrib.slim - - -def block35(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): - """Builds the 35x35 resnet block.""" - with tf.variable_scope(scope, 'Block35', [net], reuse=reuse): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d(net, 32, 1, scope='Conv2d_1x1') - with tf.variable_scope('Branch_1'): - tower_conv1_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d(tower_conv1_0, 32, 3, scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_2'): - tower_conv2_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1') - tower_conv2_1 = slim.conv2d(tower_conv2_0, 48, 3, scope='Conv2d_0b_3x3') - tower_conv2_2 = slim.conv2d(tower_conv2_1, 64, 3, scope='Conv2d_0c_3x3') - mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_1, tower_conv2_2]) - up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, - activation_fn=None, scope='Conv2d_1x1') - net += scale * up - if activation_fn: - net = activation_fn(net) - return net - - -def block17(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): - """Builds the 17x17 resnet block.""" - with tf.variable_scope(scope, 'Block17', [net], reuse=reuse): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1') - with tf.variable_scope('Branch_1'): - tower_conv1_0 = slim.conv2d(net, 128, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d(tower_conv1_0, 160, [1, 7], - scope='Conv2d_0b_1x7') - tower_conv1_2 = slim.conv2d(tower_conv1_1, 192, [7, 1], - scope='Conv2d_0c_7x1') - mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_2]) - up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, - activation_fn=None, scope='Conv2d_1x1') - net += scale * up - if activation_fn: - net = activation_fn(net) - return net - - -def block8(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): - """Builds the 8x8 resnet block.""" - with tf.variable_scope(scope, 'Block8', [net], reuse=reuse): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1') - with tf.variable_scope('Branch_1'): - tower_conv1_0 = slim.conv2d(net, 192, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d(tower_conv1_0, 224, [1, 3], - scope='Conv2d_0b_1x3') - tower_conv1_2 = slim.conv2d(tower_conv1_1, 256, [3, 1], - scope='Conv2d_0c_3x1') - mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_2]) - up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, - activation_fn=None, scope='Conv2d_1x1') - net += scale * up - if activation_fn: - net = activation_fn(net) - return net - - -def inception_resnet_v2(inputs, num_classes=1001, is_training=True, - dropout_keep_prob=0.8, - reuse=None, - scope='InceptionResnetV2'): - """Creates the Inception Resnet V2 model. - - Args: - inputs: a 4-D tensor of size [batch_size, height, width, 3]. - num_classes: number of predicted classes. - is_training: whether is training or not. - dropout_keep_prob: float, the fraction to keep before final layer. - reuse: whether or not the network and its variables should be reused. To be - able to reuse 'scope' must be given. - scope: Optional variable_scope. - - Returns: - logits: the logits outputs of the model. - end_points: the set of end_points from the inception model. - """ - end_points = {} - - with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], reuse=reuse): - with slim.arg_scope([slim.batch_norm, slim.dropout], - is_training=is_training): - with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], - stride=1, padding='SAME'): - - # 149 x 149 x 32 - net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', - scope='Conv2d_1a_3x3') - end_points['Conv2d_1a_3x3'] = net - # 147 x 147 x 32 - net = slim.conv2d(net, 32, 3, padding='VALID', - scope='Conv2d_2a_3x3') - end_points['Conv2d_2a_3x3'] = net - # 147 x 147 x 64 - net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') - end_points['Conv2d_2b_3x3'] = net - # 73 x 73 x 64 - net = slim.max_pool2d(net, 3, stride=2, padding='VALID', - scope='MaxPool_3a_3x3') - end_points['MaxPool_3a_3x3'] = net - # 73 x 73 x 80 - net = slim.conv2d(net, 80, 1, padding='VALID', - scope='Conv2d_3b_1x1') - end_points['Conv2d_3b_1x1'] = net - # 71 x 71 x 192 - net = slim.conv2d(net, 192, 3, padding='VALID', - scope='Conv2d_4a_3x3') - end_points['Conv2d_4a_3x3'] = net - # 35 x 35 x 192 - net = slim.max_pool2d(net, 3, stride=2, padding='VALID', - scope='MaxPool_5a_3x3') - end_points['MaxPool_5a_3x3'] = net - - # 35 x 35 x 320 - with tf.variable_scope('Mixed_5b'): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1') - with tf.variable_scope('Branch_1'): - tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5, - scope='Conv2d_0b_5x5') - with tf.variable_scope('Branch_2'): - tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1') - tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3, - scope='Conv2d_0b_3x3') - tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3, - scope='Conv2d_0c_3x3') - with tf.variable_scope('Branch_3'): - tower_pool = slim.avg_pool2d(net, 3, stride=1, padding='SAME', - scope='AvgPool_0a_3x3') - tower_pool_1 = slim.conv2d(tower_pool, 64, 1, - scope='Conv2d_0b_1x1') - net = tf.concat(axis=3, values=[tower_conv, tower_conv1_1, - tower_conv2_2, tower_pool_1]) - - end_points['Mixed_5b'] = net - net = slim.repeat(net, 10, block35, scale=0.17) - - # 17 x 17 x 1088 - with tf.variable_scope('Mixed_6a'): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d(net, 384, 3, stride=2, padding='VALID', - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_1'): - tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3, - scope='Conv2d_0b_3x3') - tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3, - stride=2, padding='VALID', - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_2'): - tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', - scope='MaxPool_1a_3x3') - net = tf.concat(axis=3, values=[tower_conv, tower_conv1_2, tower_pool]) - - end_points['Mixed_6a'] = net - net = slim.repeat(net, 20, block17, scale=0.10) - - # Auxiliary tower - with tf.variable_scope('AuxLogits'): - aux = slim.avg_pool2d(net, 5, stride=3, padding='VALID', - scope='Conv2d_1a_3x3') - aux = slim.conv2d(aux, 128, 1, scope='Conv2d_1b_1x1') - aux = slim.conv2d(aux, 768, aux.get_shape()[1:3], - padding='VALID', scope='Conv2d_2a_5x5') - aux = slim.flatten(aux) - aux = slim.fully_connected(aux, num_classes, activation_fn=None, - scope='Logits') - end_points['AuxLogits'] = aux - - with tf.variable_scope('Mixed_7a'): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') - tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2, - padding='VALID', scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_1'): - tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2, - padding='VALID', scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_2'): - tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') - tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3, - scope='Conv2d_0b_3x3') - tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2, - padding='VALID', scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_3'): - tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', - scope='MaxPool_1a_3x3') - net = tf.concat(axis=3, values=[tower_conv_1, tower_conv1_1, - tower_conv2_2, tower_pool]) - - end_points['Mixed_7a'] = net - - net = slim.repeat(net, 9, block8, scale=0.20) - net = block8(net, activation_fn=None) - - net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1') - end_points['Conv2d_7b_1x1'] = net - - with tf.variable_scope('Logits'): - end_points['PrePool'] = net - net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', - scope='AvgPool_1a_8x8') - net = slim.flatten(net) - - net = slim.dropout(net, dropout_keep_prob, is_training=is_training, - scope='Dropout') - - end_points['PreLogitsFlatten'] = net - logits = slim.fully_connected(net, num_classes, activation_fn=None, - scope='Logits') - end_points['Logits'] = logits - end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions') - - return logits, end_points -inception_resnet_v2.default_image_size = 299 - - -def inception_resnet_v2_arg_scope(weight_decay=0.00004, - batch_norm_decay=0.9997, - batch_norm_epsilon=0.001): - """Yields the scope with the default parameters for inception_resnet_v2. - - Args: - weight_decay: the weight decay for weights variables. - batch_norm_decay: decay for the moving average of batch_norm momentums. - batch_norm_epsilon: small float added to variance to avoid dividing by zero. - - Returns: - a arg_scope with the parameters needed for inception_resnet_v2. - """ - # Set weight_decay for weights in conv2d and fully_connected layers. - with slim.arg_scope([slim.conv2d, slim.fully_connected], - weights_regularizer=slim.l2_regularizer(weight_decay), - biases_regularizer=slim.l2_regularizer(weight_decay)): - - batch_norm_params = { - 'decay': batch_norm_decay, - 'epsilon': batch_norm_epsilon, - } - # Set activation_fn and parameters for batch_norm. - with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, - normalizer_fn=slim.batch_norm, - normalizer_params=batch_norm_params) as scope: - return scope diff --git a/examples/slim/nets/inception_resnet_v2_test.py b/examples/slim/nets/inception_resnet_v2_test.py deleted file mode 100644 index b1560fb0..00000000 --- a/examples/slim/nets/inception_resnet_v2_test.py +++ /dev/null @@ -1,136 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for slim.inception_resnet_v2.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from nets import inception - - -class InceptionTest(tf.test.TestCase): - - def testBuildLogits(self): - batch_size = 5 - height, width = 299, 299 - num_classes = 1000 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, _ = inception.inception_resnet_v2(inputs, num_classes) - self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits')) - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, num_classes]) - - def testBuildEndPoints(self): - batch_size = 5 - height, width = 299, 299 - num_classes = 1000 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - _, end_points = inception.inception_resnet_v2(inputs, num_classes) - self.assertTrue('Logits' in end_points) - logits = end_points['Logits'] - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, num_classes]) - self.assertTrue('AuxLogits' in end_points) - aux_logits = end_points['AuxLogits'] - self.assertListEqual(aux_logits.get_shape().as_list(), - [batch_size, num_classes]) - pre_pool = end_points['PrePool'] - self.assertListEqual(pre_pool.get_shape().as_list(), - [batch_size, 8, 8, 1536]) - - def testVariablesSetDevice(self): - batch_size = 5 - height, width = 299, 299 - num_classes = 1000 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - # Force all Variables to reside on the device. - with tf.variable_scope('on_cpu'), tf.device('/cpu:0'): - inception.inception_resnet_v2(inputs, num_classes) - with tf.variable_scope('on_gpu'), tf.device('/gpu:0'): - inception.inception_resnet_v2(inputs, num_classes) - for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='on_cpu'): - self.assertDeviceEqual(v.device, '/cpu:0') - for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='on_gpu'): - self.assertDeviceEqual(v.device, '/gpu:0') - - def testHalfSizeImages(self): - batch_size = 5 - height, width = 150, 150 - num_classes = 1000 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, end_points = inception.inception_resnet_v2(inputs, num_classes) - self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits')) - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, num_classes]) - pre_pool = end_points['PrePool'] - self.assertListEqual(pre_pool.get_shape().as_list(), - [batch_size, 3, 3, 1536]) - - def testUnknownBatchSize(self): - batch_size = 1 - height, width = 299, 299 - num_classes = 1000 - with self.test_session() as sess: - inputs = tf.placeholder(tf.float32, (None, height, width, 3)) - logits, _ = inception.inception_resnet_v2(inputs, num_classes) - self.assertTrue(logits.op.name.startswith('InceptionResnetV2/Logits')) - self.assertListEqual(logits.get_shape().as_list(), - [None, num_classes]) - images = tf.random_uniform((batch_size, height, width, 3)) - sess.run(tf.global_variables_initializer()) - output = sess.run(logits, {inputs: images.eval()}) - self.assertEquals(output.shape, (batch_size, num_classes)) - - def testEvaluation(self): - batch_size = 2 - height, width = 299, 299 - num_classes = 1000 - with self.test_session() as sess: - eval_inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, _ = inception.inception_resnet_v2(eval_inputs, - num_classes, - is_training=False) - predictions = tf.argmax(logits, 1) - sess.run(tf.global_variables_initializer()) - output = sess.run(predictions) - self.assertEquals(output.shape, (batch_size,)) - - def testTrainEvalWithReuse(self): - train_batch_size = 5 - eval_batch_size = 2 - height, width = 150, 150 - num_classes = 1000 - with self.test_session() as sess: - train_inputs = tf.random_uniform((train_batch_size, height, width, 3)) - inception.inception_resnet_v2(train_inputs, num_classes) - eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3)) - logits, _ = inception.inception_resnet_v2(eval_inputs, - num_classes, - is_training=False, - reuse=True) - predictions = tf.argmax(logits, 1) - sess.run(tf.global_variables_initializer()) - output = sess.run(predictions) - self.assertEquals(output.shape, (eval_batch_size,)) - - -if __name__ == '__main__': - tf.test.main() diff --git a/examples/slim/nets/inception_utils.py b/examples/slim/nets/inception_utils.py deleted file mode 100644 index 66ee41fb..00000000 --- a/examples/slim/nets/inception_utils.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Contains common code shared by all inception models. - -Usage of arg scope: - with slim.arg_scope(inception_arg_scope()): - logits, end_points = inception.inception_v3(images, num_classes, - is_training=is_training) - -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -slim = tf.contrib.slim - - -def inception_arg_scope(weight_decay=0.00004, - use_batch_norm=True, - batch_norm_decay=0.9997, - batch_norm_epsilon=0.001): - """Defines the default arg scope for inception models. - - Args: - weight_decay: The weight decay to use for regularizing the model. - use_batch_norm: "If `True`, batch_norm is applied after each convolution. - batch_norm_decay: Decay for batch norm moving average. - batch_norm_epsilon: Small float added to variance to avoid dividing by zero - in batch norm. - - Returns: - An `arg_scope` to use for the inception models. - """ - batch_norm_params = { - # Decay for the moving averages. - 'decay': batch_norm_decay, - # epsilon to prevent 0s in variance. - 'epsilon': batch_norm_epsilon, - # collection containing update_ops. - 'updates_collections': tf.GraphKeys.UPDATE_OPS, - } - if use_batch_norm: - normalizer_fn = slim.batch_norm - normalizer_params = batch_norm_params - else: - normalizer_fn = None - normalizer_params = {} - # Set weight_decay for weights in Conv and FC layers. - with slim.arg_scope([slim.conv2d, slim.fully_connected], - weights_regularizer=slim.l2_regularizer(weight_decay)): - with slim.arg_scope( - [slim.conv2d], - weights_initializer=slim.variance_scaling_initializer(), - activation_fn=tf.nn.relu, - normalizer_fn=normalizer_fn, - normalizer_params=normalizer_params) as sc: - return sc diff --git a/examples/slim/nets/inception_v1.py b/examples/slim/nets/inception_v1.py deleted file mode 100644 index 4207c2a7..00000000 --- a/examples/slim/nets/inception_v1.py +++ /dev/null @@ -1,305 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Contains the definition for inception v1 classification network.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from nets import inception_utils - -slim = tf.contrib.slim -trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev) - - -def inception_v1_base(inputs, - final_endpoint='Mixed_5c', - scope='InceptionV1'): - """Defines the Inception V1 base architecture. - - This architecture is defined in: - Going deeper with convolutions - Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed, - Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich. - http://arxiv.org/pdf/1409.4842v1.pdf. - - Args: - inputs: a tensor of size [batch_size, height, width, channels]. - final_endpoint: specifies the endpoint to construct the network up to. It - can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', - 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', - 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', - 'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b', 'Mixed_5c'] - scope: Optional variable_scope. - - Returns: - A dictionary from components of the network to the corresponding activation. - - Raises: - ValueError: if final_endpoint is not set to one of the predefined values. - """ - end_points = {} - with tf.variable_scope(scope, 'InceptionV1', [inputs]): - with slim.arg_scope( - [slim.conv2d, slim.fully_connected], - weights_initializer=trunc_normal(0.01)): - with slim.arg_scope([slim.conv2d, slim.max_pool2d], - stride=1, padding='SAME'): - end_point = 'Conv2d_1a_7x7' - net = slim.conv2d(inputs, 64, [7, 7], stride=2, scope=end_point) - end_points[end_point] = net - if final_endpoint == end_point: return net, end_points - end_point = 'MaxPool_2a_3x3' - net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point) - end_points[end_point] = net - if final_endpoint == end_point: return net, end_points - end_point = 'Conv2d_2b_1x1' - net = slim.conv2d(net, 64, [1, 1], scope=end_point) - end_points[end_point] = net - if final_endpoint == end_point: return net, end_points - end_point = 'Conv2d_2c_3x3' - net = slim.conv2d(net, 192, [3, 3], scope=end_point) - end_points[end_point] = net - if final_endpoint == end_point: return net, end_points - end_point = 'MaxPool_3a_3x3' - net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point) - end_points[end_point] = net - if final_endpoint == end_point: return net, end_points - - end_point = 'Mixed_3b' - with tf.variable_scope(end_point): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(net, 96, [1, 1], scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, 128, [3, 3], scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d(net, 16, [1, 1], scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, 32, [3, 3], scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_3'): - branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3') - branch_3 = slim.conv2d(branch_3, 32, [1, 1], scope='Conv2d_0b_1x1') - net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - end_points[end_point] = net - if final_endpoint == end_point: return net, end_points - - end_point = 'Mixed_3c' - with tf.variable_scope(end_point): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, 192, [3, 3], scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_3'): - branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3') - branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1') - net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - end_points[end_point] = net - if final_endpoint == end_point: return net, end_points - - end_point = 'MaxPool_4a_3x3' - net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point) - end_points[end_point] = net - if final_endpoint == end_point: return net, end_points - - end_point = 'Mixed_4b' - with tf.variable_scope(end_point): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(net, 96, [1, 1], scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, 208, [3, 3], scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d(net, 16, [1, 1], scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, 48, [3, 3], scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_3'): - branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3') - branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1') - net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - end_points[end_point] = net - if final_endpoint == end_point: return net, end_points - - end_point = 'Mixed_4c' - with tf.variable_scope(end_point): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(net, 112, [1, 1], scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, 224, [3, 3], scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d(net, 24, [1, 1], scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_3'): - branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3') - branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1') - net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - end_points[end_point] = net - if final_endpoint == end_point: return net, end_points - - end_point = 'Mixed_4d' - with tf.variable_scope(end_point): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, 256, [3, 3], scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d(net, 24, [1, 1], scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_3'): - branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3') - branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1') - net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - end_points[end_point] = net - if final_endpoint == end_point: return net, end_points - - end_point = 'Mixed_4e' - with tf.variable_scope(end_point): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, 112, [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(net, 144, [1, 1], scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, 288, [3, 3], scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_3'): - branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3') - branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1') - net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - end_points[end_point] = net - if final_endpoint == end_point: return net, end_points - - end_point = 'Mixed_4f' - with tf.variable_scope(end_point): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, 256, [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, 320, [3, 3], scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_3'): - branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3') - branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1') - net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - end_points[end_point] = net - if final_endpoint == end_point: return net, end_points - - end_point = 'MaxPool_5a_2x2' - net = slim.max_pool2d(net, [2, 2], stride=2, scope=end_point) - end_points[end_point] = net - if final_endpoint == end_point: return net, end_points - - end_point = 'Mixed_5b' - with tf.variable_scope(end_point): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, 256, [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, 320, [3, 3], scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0a_3x3') - with tf.variable_scope('Branch_3'): - branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3') - branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1') - net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - end_points[end_point] = net - if final_endpoint == end_point: return net, end_points - - end_point = 'Mixed_5c' - with tf.variable_scope(end_point): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, 384, [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, 384, [3, 3], scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d(net, 48, [1, 1], scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_3'): - branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3') - branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1') - net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - end_points[end_point] = net - if final_endpoint == end_point: return net, end_points - raise ValueError('Unknown final endpoint %s' % final_endpoint) - - -def inception_v1(inputs, - num_classes=1000, - is_training=True, - dropout_keep_prob=0.8, - prediction_fn=slim.softmax, - spatial_squeeze=True, - reuse=None, - scope='InceptionV1'): - """Defines the Inception V1 architecture. - - This architecture is defined in: - - Going deeper with convolutions - Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed, - Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich. - http://arxiv.org/pdf/1409.4842v1.pdf. - - The default image size used to train this network is 224x224. - - Args: - inputs: a tensor of size [batch_size, height, width, channels]. - num_classes: number of predicted classes. - is_training: whether is training or not. - dropout_keep_prob: the percentage of activation values that are retained. - prediction_fn: a function to get predictions out of logits. - spatial_squeeze: if True, logits is of shape [B, C], if false logits is - of shape [B, 1, 1, C], where B is batch_size and C is number of classes. - reuse: whether or not the network and its variables should be reused. To be - able to reuse 'scope' must be given. - scope: Optional variable_scope. - - Returns: - logits: the pre-softmax activations, a tensor of size - [batch_size, num_classes] - end_points: a dictionary from components of the network to the corresponding - activation. - """ - # Final pooling and prediction - with tf.variable_scope(scope, 'InceptionV1', [inputs, num_classes], - reuse=reuse) as scope: - with slim.arg_scope([slim.batch_norm, slim.dropout], - is_training=is_training): - net, end_points = inception_v1_base(inputs, scope=scope) - with tf.variable_scope('Logits'): - net = slim.avg_pool2d(net, [7, 7], stride=1, scope='AvgPool_0a_7x7') - net = slim.dropout(net, - dropout_keep_prob, scope='Dropout_0b') - logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, - normalizer_fn=None, scope='Conv2d_0c_1x1') - if spatial_squeeze: - logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze') - - end_points['Logits'] = logits - end_points['Predictions'] = prediction_fn(logits, scope='Predictions') - return logits, end_points -inception_v1.default_image_size = 224 - -inception_v1_arg_scope = inception_utils.inception_arg_scope diff --git a/examples/slim/nets/inception_v1_test.py b/examples/slim/nets/inception_v1_test.py deleted file mode 100644 index b9271814..00000000 --- a/examples/slim/nets/inception_v1_test.py +++ /dev/null @@ -1,210 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for nets.inception_v1.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow as tf - -from nets import inception - -slim = tf.contrib.slim - - -class InceptionV1Test(tf.test.TestCase): - - def testBuildClassificationNetwork(self): - batch_size = 5 - height, width = 224, 224 - num_classes = 1000 - - inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, end_points = inception.inception_v1(inputs, num_classes) - self.assertTrue(logits.op.name.startswith('InceptionV1/Logits')) - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, num_classes]) - self.assertTrue('Predictions' in end_points) - self.assertListEqual(end_points['Predictions'].get_shape().as_list(), - [batch_size, num_classes]) - - def testBuildBaseNetwork(self): - batch_size = 5 - height, width = 224, 224 - - inputs = tf.random_uniform((batch_size, height, width, 3)) - mixed_6c, end_points = inception.inception_v1_base(inputs) - self.assertTrue(mixed_6c.op.name.startswith('InceptionV1/Mixed_5c')) - self.assertListEqual(mixed_6c.get_shape().as_list(), - [batch_size, 7, 7, 1024]) - expected_endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', - 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', - 'Mixed_3c', 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', - 'Mixed_4d', 'Mixed_4e', 'Mixed_4f', 'MaxPool_5a_2x2', - 'Mixed_5b', 'Mixed_5c'] - self.assertItemsEqual(end_points.keys(), expected_endpoints) - - def testBuildOnlyUptoFinalEndpoint(self): - batch_size = 5 - height, width = 224, 224 - endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', - 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', - 'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', - 'Mixed_4e', 'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b', - 'Mixed_5c'] - for index, endpoint in enumerate(endpoints): - with tf.Graph().as_default(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - out_tensor, end_points = inception.inception_v1_base( - inputs, final_endpoint=endpoint) - self.assertTrue(out_tensor.op.name.startswith( - 'InceptionV1/' + endpoint)) - self.assertItemsEqual(endpoints[:index+1], end_points) - - def testBuildAndCheckAllEndPointsUptoMixed5c(self): - batch_size = 5 - height, width = 224, 224 - - inputs = tf.random_uniform((batch_size, height, width, 3)) - _, end_points = inception.inception_v1_base(inputs, - final_endpoint='Mixed_5c') - endpoints_shapes = {'Conv2d_1a_7x7': [5, 112, 112, 64], - 'MaxPool_2a_3x3': [5, 56, 56, 64], - 'Conv2d_2b_1x1': [5, 56, 56, 64], - 'Conv2d_2c_3x3': [5, 56, 56, 192], - 'MaxPool_3a_3x3': [5, 28, 28, 192], - 'Mixed_3b': [5, 28, 28, 256], - 'Mixed_3c': [5, 28, 28, 480], - 'MaxPool_4a_3x3': [5, 14, 14, 480], - 'Mixed_4b': [5, 14, 14, 512], - 'Mixed_4c': [5, 14, 14, 512], - 'Mixed_4d': [5, 14, 14, 512], - 'Mixed_4e': [5, 14, 14, 528], - 'Mixed_4f': [5, 14, 14, 832], - 'MaxPool_5a_2x2': [5, 7, 7, 832], - 'Mixed_5b': [5, 7, 7, 832], - 'Mixed_5c': [5, 7, 7, 1024]} - - self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys()) - for endpoint_name in endpoints_shapes: - expected_shape = endpoints_shapes[endpoint_name] - self.assertTrue(endpoint_name in end_points) - self.assertListEqual(end_points[endpoint_name].get_shape().as_list(), - expected_shape) - - def testModelHasExpectedNumberOfParameters(self): - batch_size = 5 - height, width = 224, 224 - inputs = tf.random_uniform((batch_size, height, width, 3)) - with slim.arg_scope(inception.inception_v1_arg_scope()): - inception.inception_v1_base(inputs) - total_params, _ = slim.model_analyzer.analyze_vars( - slim.get_model_variables()) - self.assertAlmostEqual(5607184, total_params) - - def testHalfSizeImages(self): - batch_size = 5 - height, width = 112, 112 - - inputs = tf.random_uniform((batch_size, height, width, 3)) - mixed_5c, _ = inception.inception_v1_base(inputs) - self.assertTrue(mixed_5c.op.name.startswith('InceptionV1/Mixed_5c')) - self.assertListEqual(mixed_5c.get_shape().as_list(), - [batch_size, 4, 4, 1024]) - - def testUnknownImageShape(self): - tf.reset_default_graph() - batch_size = 2 - height, width = 224, 224 - num_classes = 1000 - input_np = np.random.uniform(0, 1, (batch_size, height, width, 3)) - with self.test_session() as sess: - inputs = tf.placeholder(tf.float32, shape=(batch_size, None, None, 3)) - logits, end_points = inception.inception_v1(inputs, num_classes) - self.assertTrue(logits.op.name.startswith('InceptionV1/Logits')) - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, num_classes]) - pre_pool = end_points['Mixed_5c'] - feed_dict = {inputs: input_np} - tf.global_variables_initializer().run() - pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict) - self.assertListEqual(list(pre_pool_out.shape), [batch_size, 7, 7, 1024]) - - def testUnknowBatchSize(self): - batch_size = 1 - height, width = 224, 224 - num_classes = 1000 - - inputs = tf.placeholder(tf.float32, (None, height, width, 3)) - logits, _ = inception.inception_v1(inputs, num_classes) - self.assertTrue(logits.op.name.startswith('InceptionV1/Logits')) - self.assertListEqual(logits.get_shape().as_list(), - [None, num_classes]) - images = tf.random_uniform((batch_size, height, width, 3)) - - with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - output = sess.run(logits, {inputs: images.eval()}) - self.assertEquals(output.shape, (batch_size, num_classes)) - - def testEvaluation(self): - batch_size = 2 - height, width = 224, 224 - num_classes = 1000 - - eval_inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, _ = inception.inception_v1(eval_inputs, num_classes, - is_training=False) - predictions = tf.argmax(logits, 1) - - with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - output = sess.run(predictions) - self.assertEquals(output.shape, (batch_size,)) - - def testTrainEvalWithReuse(self): - train_batch_size = 5 - eval_batch_size = 2 - height, width = 224, 224 - num_classes = 1000 - - train_inputs = tf.random_uniform((train_batch_size, height, width, 3)) - inception.inception_v1(train_inputs, num_classes) - eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3)) - logits, _ = inception.inception_v1(eval_inputs, num_classes, reuse=True) - predictions = tf.argmax(logits, 1) - - with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - output = sess.run(predictions) - self.assertEquals(output.shape, (eval_batch_size,)) - - def testLogitsNotSqueezed(self): - num_classes = 25 - images = tf.random_uniform([1, 224, 224, 3]) - logits, _ = inception.inception_v1(images, - num_classes=num_classes, - spatial_squeeze=False) - - with self.test_session() as sess: - tf.global_variables_initializer().run() - logits_out = sess.run(logits) - self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/examples/slim/nets/inception_v2.py b/examples/slim/nets/inception_v2.py deleted file mode 100644 index 2651f71f..00000000 --- a/examples/slim/nets/inception_v2.py +++ /dev/null @@ -1,520 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Contains the definition for inception v2 classification network.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from nets import inception_utils - -slim = tf.contrib.slim -trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev) - - -def inception_v2_base(inputs, - final_endpoint='Mixed_5c', - min_depth=16, - depth_multiplier=1.0, - scope=None): - """Inception v2 (6a2). - - Constructs an Inception v2 network from inputs to the given final endpoint. - This method can construct the network up to the layer inception(5b) as - described in http://arxiv.org/abs/1502.03167. - - Args: - inputs: a tensor of shape [batch_size, height, width, channels]. - final_endpoint: specifies the endpoint to construct the network up to. It - can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', - 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', 'Mixed_4a', - 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_5a', 'Mixed_5b', - 'Mixed_5c']. - min_depth: Minimum depth value (number of channels) for all convolution ops. - Enforced when depth_multiplier < 1, and not an active constraint when - depth_multiplier >= 1. - depth_multiplier: Float multiplier for the depth (number of channels) - for all convolution ops. The value must be greater than zero. Typical - usage will be to set this value in (0, 1) to reduce the number of - parameters or computation cost of the model. - scope: Optional variable_scope. - - Returns: - tensor_out: output tensor corresponding to the final_endpoint. - end_points: a set of activations for external use, for example summaries or - losses. - - Raises: - ValueError: if final_endpoint is not set to one of the predefined values, - or depth_multiplier <= 0 - """ - - # end_points will collect relevant activations for external use, for example - # summaries or losses. - end_points = {} - - # Used to find thinned depths for each layer. - if depth_multiplier <= 0: - raise ValueError('depth_multiplier is not greater than zero.') - depth = lambda d: max(int(d * depth_multiplier), min_depth) - - with tf.variable_scope(scope, 'InceptionV2', [inputs]): - with slim.arg_scope( - [slim.conv2d, slim.max_pool2d, slim.avg_pool2d, slim.separable_conv2d], - stride=1, padding='SAME'): - - # Note that sizes in the comments below assume an input spatial size of - # 224x224, however, the inputs can be of any size greater 32x32. - - # 224 x 224 x 3 - end_point = 'Conv2d_1a_7x7' - # depthwise_multiplier here is different from depth_multiplier. - # depthwise_multiplier determines the output channels of the initial - # depthwise conv (see docs for tf.nn.separable_conv2d), while - # depth_multiplier controls the # channels of the subsequent 1x1 - # convolution. Must have - # in_channels * depthwise_multipler <= out_channels - # so that the separable convolution is not overparameterized. - depthwise_multiplier = min(int(depth(64) / 3), 8) - net = slim.separable_conv2d( - inputs, depth(64), [7, 7], depth_multiplier=depthwise_multiplier, - stride=2, weights_initializer=trunc_normal(1.0), - scope=end_point) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - # 112 x 112 x 64 - end_point = 'MaxPool_2a_3x3' - net = slim.max_pool2d(net, [3, 3], scope=end_point, stride=2) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - # 56 x 56 x 64 - end_point = 'Conv2d_2b_1x1' - net = slim.conv2d(net, depth(64), [1, 1], scope=end_point, - weights_initializer=trunc_normal(0.1)) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - # 56 x 56 x 64 - end_point = 'Conv2d_2c_3x3' - net = slim.conv2d(net, depth(192), [3, 3], scope=end_point) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - # 56 x 56 x 192 - end_point = 'MaxPool_3a_3x3' - net = slim.max_pool2d(net, [3, 3], scope=end_point, stride=2) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - # 28 x 28 x 192 - # Inception module. - end_point = 'Mixed_3b' - with tf.variable_scope(end_point): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d( - net, depth(64), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, depth(64), [3, 3], - scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d( - net, depth(64), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], - scope='Conv2d_0b_3x3') - branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], - scope='Conv2d_0c_3x3') - with tf.variable_scope('Branch_3'): - branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') - branch_3 = slim.conv2d( - branch_3, depth(32), [1, 1], - weights_initializer=trunc_normal(0.1), - scope='Conv2d_0b_1x1') - net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - # 28 x 28 x 256 - end_point = 'Mixed_3c' - with tf.variable_scope(end_point): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d( - net, depth(64), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, depth(96), [3, 3], - scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d( - net, depth(64), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], - scope='Conv2d_0b_3x3') - branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], - scope='Conv2d_0c_3x3') - with tf.variable_scope('Branch_3'): - branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') - branch_3 = slim.conv2d( - branch_3, depth(64), [1, 1], - weights_initializer=trunc_normal(0.1), - scope='Conv2d_0b_1x1') - net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - # 28 x 28 x 320 - end_point = 'Mixed_4a' - with tf.variable_scope(end_point): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d( - net, depth(128), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_0 = slim.conv2d(branch_0, depth(160), [3, 3], stride=2, - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d( - net, depth(64), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d( - branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3') - branch_1 = slim.conv2d( - branch_1, depth(96), [3, 3], stride=2, scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.max_pool2d( - net, [3, 3], stride=2, scope='MaxPool_1a_3x3') - net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2]) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - # 14 x 14 x 576 - end_point = 'Mixed_4b' - with tf.variable_scope(end_point): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, depth(224), [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d( - net, depth(64), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d( - branch_1, depth(96), [3, 3], scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d( - net, depth(96), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, depth(128), [3, 3], - scope='Conv2d_0b_3x3') - branch_2 = slim.conv2d(branch_2, depth(128), [3, 3], - scope='Conv2d_0c_3x3') - with tf.variable_scope('Branch_3'): - branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') - branch_3 = slim.conv2d( - branch_3, depth(128), [1, 1], - weights_initializer=trunc_normal(0.1), - scope='Conv2d_0b_1x1') - net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - # 14 x 14 x 576 - end_point = 'Mixed_4c' - with tf.variable_scope(end_point): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d( - net, depth(96), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, depth(128), [3, 3], - scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d( - net, depth(96), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, depth(128), [3, 3], - scope='Conv2d_0b_3x3') - branch_2 = slim.conv2d(branch_2, depth(128), [3, 3], - scope='Conv2d_0c_3x3') - with tf.variable_scope('Branch_3'): - branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') - branch_3 = slim.conv2d( - branch_3, depth(128), [1, 1], - weights_initializer=trunc_normal(0.1), - scope='Conv2d_0b_1x1') - net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - # 14 x 14 x 576 - end_point = 'Mixed_4d' - with tf.variable_scope(end_point): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d( - net, depth(128), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, depth(160), [3, 3], - scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d( - net, depth(128), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, depth(160), [3, 3], - scope='Conv2d_0b_3x3') - branch_2 = slim.conv2d(branch_2, depth(160), [3, 3], - scope='Conv2d_0c_3x3') - with tf.variable_scope('Branch_3'): - branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') - branch_3 = slim.conv2d( - branch_3, depth(96), [1, 1], - weights_initializer=trunc_normal(0.1), - scope='Conv2d_0b_1x1') - net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - - # 14 x 14 x 576 - end_point = 'Mixed_4e' - with tf.variable_scope(end_point): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, depth(96), [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d( - net, depth(128), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, depth(192), [3, 3], - scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d( - net, depth(160), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, depth(192), [3, 3], - scope='Conv2d_0b_3x3') - branch_2 = slim.conv2d(branch_2, depth(192), [3, 3], - scope='Conv2d_0c_3x3') - with tf.variable_scope('Branch_3'): - branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') - branch_3 = slim.conv2d( - branch_3, depth(96), [1, 1], - weights_initializer=trunc_normal(0.1), - scope='Conv2d_0b_1x1') - net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - # 14 x 14 x 576 - end_point = 'Mixed_5a' - with tf.variable_scope(end_point): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d( - net, depth(128), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_0 = slim.conv2d(branch_0, depth(192), [3, 3], stride=2, - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d( - net, depth(192), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, depth(256), [3, 3], - scope='Conv2d_0b_3x3') - branch_1 = slim.conv2d(branch_1, depth(256), [3, 3], stride=2, - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.max_pool2d(net, [3, 3], stride=2, - scope='MaxPool_1a_3x3') - net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2]) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - # 7 x 7 x 1024 - end_point = 'Mixed_5b' - with tf.variable_scope(end_point): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, depth(352), [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d( - net, depth(192), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, depth(320), [3, 3], - scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d( - net, depth(160), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], - scope='Conv2d_0b_3x3') - branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], - scope='Conv2d_0c_3x3') - with tf.variable_scope('Branch_3'): - branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') - branch_3 = slim.conv2d( - branch_3, depth(128), [1, 1], - weights_initializer=trunc_normal(0.1), - scope='Conv2d_0b_1x1') - net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - - # 7 x 7 x 1024 - end_point = 'Mixed_5c' - with tf.variable_scope(end_point): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, depth(352), [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d( - net, depth(192), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, depth(320), [3, 3], - scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d( - net, depth(192), [1, 1], - weights_initializer=trunc_normal(0.09), - scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], - scope='Conv2d_0b_3x3') - branch_2 = slim.conv2d(branch_2, depth(224), [3, 3], - scope='Conv2d_0c_3x3') - with tf.variable_scope('Branch_3'): - branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3') - branch_3 = slim.conv2d( - branch_3, depth(128), [1, 1], - weights_initializer=trunc_normal(0.1), - scope='Conv2d_0b_1x1') - net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - raise ValueError('Unknown final endpoint %s' % final_endpoint) - - -def inception_v2(inputs, - num_classes=1000, - is_training=True, - dropout_keep_prob=0.8, - min_depth=16, - depth_multiplier=1.0, - prediction_fn=slim.softmax, - spatial_squeeze=True, - reuse=None, - scope='InceptionV2'): - """Inception v2 model for classification. - - Constructs an Inception v2 network for classification as described in - http://arxiv.org/abs/1502.03167. - - The default image size used to train this network is 224x224. - - Args: - inputs: a tensor of shape [batch_size, height, width, channels]. - num_classes: number of predicted classes. - is_training: whether is training or not. - dropout_keep_prob: the percentage of activation values that are retained. - min_depth: Minimum depth value (number of channels) for all convolution ops. - Enforced when depth_multiplier < 1, and not an active constraint when - depth_multiplier >= 1. - depth_multiplier: Float multiplier for the depth (number of channels) - for all convolution ops. The value must be greater than zero. Typical - usage will be to set this value in (0, 1) to reduce the number of - parameters or computation cost of the model. - prediction_fn: a function to get predictions out of logits. - spatial_squeeze: if True, logits is of shape [B, C], if false logits is - of shape [B, 1, 1, C], where B is batch_size and C is number of classes. - reuse: whether or not the network and its variables should be reused. To be - able to reuse 'scope' must be given. - scope: Optional variable_scope. - - Returns: - logits: the pre-softmax activations, a tensor of size - [batch_size, num_classes] - end_points: a dictionary from components of the network to the corresponding - activation. - - Raises: - ValueError: if final_endpoint is not set to one of the predefined values, - or depth_multiplier <= 0 - """ - if depth_multiplier <= 0: - raise ValueError('depth_multiplier is not greater than zero.') - - # Final pooling and prediction - with tf.variable_scope(scope, 'InceptionV2', [inputs, num_classes], - reuse=reuse) as scope: - with slim.arg_scope([slim.batch_norm, slim.dropout], - is_training=is_training): - net, end_points = inception_v2_base( - inputs, scope=scope, min_depth=min_depth, - depth_multiplier=depth_multiplier) - with tf.variable_scope('Logits'): - kernel_size = _reduced_kernel_size_for_small_input(net, [7, 7]) - net = slim.avg_pool2d(net, kernel_size, padding='VALID', - scope='AvgPool_1a_{}x{}'.format(*kernel_size)) - # 1 x 1 x 1024 - net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b') - logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, - normalizer_fn=None, scope='Conv2d_1c_1x1') - if spatial_squeeze: - logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze') - end_points['Logits'] = logits - end_points['Predictions'] = prediction_fn(logits, scope='Predictions') - return logits, end_points -inception_v2.default_image_size = 224 - - -def _reduced_kernel_size_for_small_input(input_tensor, kernel_size): - """Define kernel size which is automatically reduced for small input. - - If the shape of the input images is unknown at graph construction time this - function assumes that the input images are is large enough. - - Args: - input_tensor: input tensor of size [batch_size, height, width, channels]. - kernel_size: desired kernel size of length 2: [kernel_height, kernel_width] - - Returns: - a tensor with the kernel size. - - TODO(jrru): Make this function work with unknown shapes. Theoretically, this - can be done with the code below. Problems are two-fold: (1) If the shape was - known, it will be lost. (2) inception.slim.ops._two_element_tuple cannot - handle tensors that define the kernel size. - shape = tf.shape(input_tensor) - return = tf.pack([tf.minimum(shape[1], kernel_size[0]), - tf.minimum(shape[2], kernel_size[1])]) - - """ - shape = input_tensor.get_shape().as_list() - if shape[1] is None or shape[2] is None: - kernel_size_out = kernel_size - else: - kernel_size_out = [min(shape[1], kernel_size[0]), - min(shape[2], kernel_size[1])] - return kernel_size_out - - -inception_v2_arg_scope = inception_utils.inception_arg_scope diff --git a/examples/slim/nets/inception_v2_test.py b/examples/slim/nets/inception_v2_test.py deleted file mode 100644 index a1318c7c..00000000 --- a/examples/slim/nets/inception_v2_test.py +++ /dev/null @@ -1,262 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for nets.inception_v2.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow as tf - -from nets import inception - -slim = tf.contrib.slim - - -class InceptionV2Test(tf.test.TestCase): - - def testBuildClassificationNetwork(self): - batch_size = 5 - height, width = 224, 224 - num_classes = 1000 - - inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, end_points = inception.inception_v2(inputs, num_classes) - self.assertTrue(logits.op.name.startswith('InceptionV2/Logits')) - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, num_classes]) - self.assertTrue('Predictions' in end_points) - self.assertListEqual(end_points['Predictions'].get_shape().as_list(), - [batch_size, num_classes]) - - def testBuildBaseNetwork(self): - batch_size = 5 - height, width = 224, 224 - - inputs = tf.random_uniform((batch_size, height, width, 3)) - mixed_5c, end_points = inception.inception_v2_base(inputs) - self.assertTrue(mixed_5c.op.name.startswith('InceptionV2/Mixed_5c')) - self.assertListEqual(mixed_5c.get_shape().as_list(), - [batch_size, 7, 7, 1024]) - expected_endpoints = ['Mixed_3b', 'Mixed_3c', 'Mixed_4a', 'Mixed_4b', - 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_5a', - 'Mixed_5b', 'Mixed_5c', 'Conv2d_1a_7x7', - 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 'Conv2d_2c_3x3', - 'MaxPool_3a_3x3'] - self.assertItemsEqual(end_points.keys(), expected_endpoints) - - def testBuildOnlyUptoFinalEndpoint(self): - batch_size = 5 - height, width = 224, 224 - endpoints = ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', - 'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', - 'Mixed_4a', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', - 'Mixed_5a', 'Mixed_5b', 'Mixed_5c'] - for index, endpoint in enumerate(endpoints): - with tf.Graph().as_default(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - out_tensor, end_points = inception.inception_v2_base( - inputs, final_endpoint=endpoint) - self.assertTrue(out_tensor.op.name.startswith( - 'InceptionV2/' + endpoint)) - self.assertItemsEqual(endpoints[:index+1], end_points) - - def testBuildAndCheckAllEndPointsUptoMixed5c(self): - batch_size = 5 - height, width = 224, 224 - - inputs = tf.random_uniform((batch_size, height, width, 3)) - _, end_points = inception.inception_v2_base(inputs, - final_endpoint='Mixed_5c') - endpoints_shapes = {'Mixed_3b': [batch_size, 28, 28, 256], - 'Mixed_3c': [batch_size, 28, 28, 320], - 'Mixed_4a': [batch_size, 14, 14, 576], - 'Mixed_4b': [batch_size, 14, 14, 576], - 'Mixed_4c': [batch_size, 14, 14, 576], - 'Mixed_4d': [batch_size, 14, 14, 576], - 'Mixed_4e': [batch_size, 14, 14, 576], - 'Mixed_5a': [batch_size, 7, 7, 1024], - 'Mixed_5b': [batch_size, 7, 7, 1024], - 'Mixed_5c': [batch_size, 7, 7, 1024], - 'Conv2d_1a_7x7': [batch_size, 112, 112, 64], - 'MaxPool_2a_3x3': [batch_size, 56, 56, 64], - 'Conv2d_2b_1x1': [batch_size, 56, 56, 64], - 'Conv2d_2c_3x3': [batch_size, 56, 56, 192], - 'MaxPool_3a_3x3': [batch_size, 28, 28, 192]} - self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys()) - for endpoint_name in endpoints_shapes: - expected_shape = endpoints_shapes[endpoint_name] - self.assertTrue(endpoint_name in end_points) - self.assertListEqual(end_points[endpoint_name].get_shape().as_list(), - expected_shape) - - def testModelHasExpectedNumberOfParameters(self): - batch_size = 5 - height, width = 224, 224 - inputs = tf.random_uniform((batch_size, height, width, 3)) - with slim.arg_scope(inception.inception_v2_arg_scope()): - inception.inception_v2_base(inputs) - total_params, _ = slim.model_analyzer.analyze_vars( - slim.get_model_variables()) - self.assertAlmostEqual(10173112, total_params) - - def testBuildEndPointsWithDepthMultiplierLessThanOne(self): - batch_size = 5 - height, width = 224, 224 - num_classes = 1000 - - inputs = tf.random_uniform((batch_size, height, width, 3)) - _, end_points = inception.inception_v2(inputs, num_classes) - - endpoint_keys = [key for key in end_points.keys() - if key.startswith('Mixed') or key.startswith('Conv')] - - _, end_points_with_multiplier = inception.inception_v2( - inputs, num_classes, scope='depth_multiplied_net', - depth_multiplier=0.5) - - for key in endpoint_keys: - original_depth = end_points[key].get_shape().as_list()[3] - new_depth = end_points_with_multiplier[key].get_shape().as_list()[3] - self.assertEqual(0.5 * original_depth, new_depth) - - def testBuildEndPointsWithDepthMultiplierGreaterThanOne(self): - batch_size = 5 - height, width = 224, 224 - num_classes = 1000 - - inputs = tf.random_uniform((batch_size, height, width, 3)) - _, end_points = inception.inception_v2(inputs, num_classes) - - endpoint_keys = [key for key in end_points.keys() - if key.startswith('Mixed') or key.startswith('Conv')] - - _, end_points_with_multiplier = inception.inception_v2( - inputs, num_classes, scope='depth_multiplied_net', - depth_multiplier=2.0) - - for key in endpoint_keys: - original_depth = end_points[key].get_shape().as_list()[3] - new_depth = end_points_with_multiplier[key].get_shape().as_list()[3] - self.assertEqual(2.0 * original_depth, new_depth) - - def testRaiseValueErrorWithInvalidDepthMultiplier(self): - batch_size = 5 - height, width = 224, 224 - num_classes = 1000 - - inputs = tf.random_uniform((batch_size, height, width, 3)) - with self.assertRaises(ValueError): - _ = inception.inception_v2(inputs, num_classes, depth_multiplier=-0.1) - with self.assertRaises(ValueError): - _ = inception.inception_v2(inputs, num_classes, depth_multiplier=0.0) - - def testHalfSizeImages(self): - batch_size = 5 - height, width = 112, 112 - num_classes = 1000 - - inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, end_points = inception.inception_v2(inputs, num_classes) - self.assertTrue(logits.op.name.startswith('InceptionV2/Logits')) - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, num_classes]) - pre_pool = end_points['Mixed_5c'] - self.assertListEqual(pre_pool.get_shape().as_list(), - [batch_size, 4, 4, 1024]) - - def testUnknownImageShape(self): - tf.reset_default_graph() - batch_size = 2 - height, width = 224, 224 - num_classes = 1000 - input_np = np.random.uniform(0, 1, (batch_size, height, width, 3)) - with self.test_session() as sess: - inputs = tf.placeholder(tf.float32, shape=(batch_size, None, None, 3)) - logits, end_points = inception.inception_v2(inputs, num_classes) - self.assertTrue(logits.op.name.startswith('InceptionV2/Logits')) - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, num_classes]) - pre_pool = end_points['Mixed_5c'] - feed_dict = {inputs: input_np} - tf.global_variables_initializer().run() - pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict) - self.assertListEqual(list(pre_pool_out.shape), [batch_size, 7, 7, 1024]) - - def testUnknowBatchSize(self): - batch_size = 1 - height, width = 224, 224 - num_classes = 1000 - - inputs = tf.placeholder(tf.float32, (None, height, width, 3)) - logits, _ = inception.inception_v2(inputs, num_classes) - self.assertTrue(logits.op.name.startswith('InceptionV2/Logits')) - self.assertListEqual(logits.get_shape().as_list(), - [None, num_classes]) - images = tf.random_uniform((batch_size, height, width, 3)) - - with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - output = sess.run(logits, {inputs: images.eval()}) - self.assertEquals(output.shape, (batch_size, num_classes)) - - def testEvaluation(self): - batch_size = 2 - height, width = 224, 224 - num_classes = 1000 - - eval_inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, _ = inception.inception_v2(eval_inputs, num_classes, - is_training=False) - predictions = tf.argmax(logits, 1) - - with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - output = sess.run(predictions) - self.assertEquals(output.shape, (batch_size,)) - - def testTrainEvalWithReuse(self): - train_batch_size = 5 - eval_batch_size = 2 - height, width = 150, 150 - num_classes = 1000 - - train_inputs = tf.random_uniform((train_batch_size, height, width, 3)) - inception.inception_v2(train_inputs, num_classes) - eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3)) - logits, _ = inception.inception_v2(eval_inputs, num_classes, reuse=True) - predictions = tf.argmax(logits, 1) - - with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - output = sess.run(predictions) - self.assertEquals(output.shape, (eval_batch_size,)) - - def testLogitsNotSqueezed(self): - num_classes = 25 - images = tf.random_uniform([1, 224, 224, 3]) - logits, _ = inception.inception_v2(images, - num_classes=num_classes, - spatial_squeeze=False) - - with self.test_session() as sess: - tf.global_variables_initializer().run() - logits_out = sess.run(logits) - self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/examples/slim/nets/inception_v3.py b/examples/slim/nets/inception_v3.py deleted file mode 100644 index d64bcfd4..00000000 --- a/examples/slim/nets/inception_v3.py +++ /dev/null @@ -1,560 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Contains the definition for inception v3 classification network.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from nets import inception_utils - -slim = tf.contrib.slim -trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev) - - -def inception_v3_base(inputs, - final_endpoint='Mixed_7c', - min_depth=16, - depth_multiplier=1.0, - scope=None): - """Inception model from http://arxiv.org/abs/1512.00567. - - Constructs an Inception v3 network from inputs to the given final endpoint. - This method can construct the network up to the final inception block - Mixed_7c. - - Note that the names of the layers in the paper do not correspond to the names - of the endpoints registered by this function although they build the same - network. - - Here is a mapping from the old_names to the new names: - Old name | New name - ======================================= - conv0 | Conv2d_1a_3x3 - conv1 | Conv2d_2a_3x3 - conv2 | Conv2d_2b_3x3 - pool1 | MaxPool_3a_3x3 - conv3 | Conv2d_3b_1x1 - conv4 | Conv2d_4a_3x3 - pool2 | MaxPool_5a_3x3 - mixed_35x35x256a | Mixed_5b - mixed_35x35x288a | Mixed_5c - mixed_35x35x288b | Mixed_5d - mixed_17x17x768a | Mixed_6a - mixed_17x17x768b | Mixed_6b - mixed_17x17x768c | Mixed_6c - mixed_17x17x768d | Mixed_6d - mixed_17x17x768e | Mixed_6e - mixed_8x8x1280a | Mixed_7a - mixed_8x8x2048a | Mixed_7b - mixed_8x8x2048b | Mixed_7c - - Args: - inputs: a tensor of size [batch_size, height, width, channels]. - final_endpoint: specifies the endpoint to construct the network up to. It - can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', - 'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3', - 'Mixed_5b', 'Mixed_5c', 'Mixed_5d', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', - 'Mixed_6d', 'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c']. - min_depth: Minimum depth value (number of channels) for all convolution ops. - Enforced when depth_multiplier < 1, and not an active constraint when - depth_multiplier >= 1. - depth_multiplier: Float multiplier for the depth (number of channels) - for all convolution ops. The value must be greater than zero. Typical - usage will be to set this value in (0, 1) to reduce the number of - parameters or computation cost of the model. - scope: Optional variable_scope. - - Returns: - tensor_out: output tensor corresponding to the final_endpoint. - end_points: a set of activations for external use, for example summaries or - losses. - - Raises: - ValueError: if final_endpoint is not set to one of the predefined values, - or depth_multiplier <= 0 - """ - # end_points will collect relevant activations for external use, for example - # summaries or losses. - end_points = {} - - if depth_multiplier <= 0: - raise ValueError('depth_multiplier is not greater than zero.') - depth = lambda d: max(int(d * depth_multiplier), min_depth) - - with tf.variable_scope(scope, 'InceptionV3', [inputs]): - with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], - stride=1, padding='VALID'): - # 299 x 299 x 3 - end_point = 'Conv2d_1a_3x3' - net = slim.conv2d(inputs, depth(32), [3, 3], stride=2, scope=end_point) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - # 149 x 149 x 32 - end_point = 'Conv2d_2a_3x3' - net = slim.conv2d(net, depth(32), [3, 3], scope=end_point) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - # 147 x 147 x 32 - end_point = 'Conv2d_2b_3x3' - net = slim.conv2d(net, depth(64), [3, 3], padding='SAME', scope=end_point) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - # 147 x 147 x 64 - end_point = 'MaxPool_3a_3x3' - net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - # 73 x 73 x 64 - end_point = 'Conv2d_3b_1x1' - net = slim.conv2d(net, depth(80), [1, 1], scope=end_point) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - # 73 x 73 x 80. - end_point = 'Conv2d_4a_3x3' - net = slim.conv2d(net, depth(192), [3, 3], scope=end_point) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - # 71 x 71 x 192. - end_point = 'MaxPool_5a_3x3' - net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - # 35 x 35 x 192. - - # Inception blocks - with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], - stride=1, padding='SAME'): - # mixed: 35 x 35 x 256. - end_point = 'Mixed_5b' - with tf.variable_scope(end_point): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, depth(64), [5, 5], - scope='Conv2d_0b_5x5') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], - scope='Conv2d_0b_3x3') - branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], - scope='Conv2d_0c_3x3') - with tf.variable_scope('Branch_3'): - branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') - branch_3 = slim.conv2d(branch_3, depth(32), [1, 1], - scope='Conv2d_0b_1x1') - net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - - # mixed_1: 35 x 35 x 288. - end_point = 'Mixed_5c' - with tf.variable_scope(end_point): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0b_1x1') - branch_1 = slim.conv2d(branch_1, depth(64), [5, 5], - scope='Conv_1_0c_5x5') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d(net, depth(64), [1, 1], - scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], - scope='Conv2d_0b_3x3') - branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], - scope='Conv2d_0c_3x3') - with tf.variable_scope('Branch_3'): - branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') - branch_3 = slim.conv2d(branch_3, depth(64), [1, 1], - scope='Conv2d_0b_1x1') - net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - - # mixed_2: 35 x 35 x 288. - end_point = 'Mixed_5d' - with tf.variable_scope(end_point): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, depth(64), [5, 5], - scope='Conv2d_0b_5x5') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], - scope='Conv2d_0b_3x3') - branch_2 = slim.conv2d(branch_2, depth(96), [3, 3], - scope='Conv2d_0c_3x3') - with tf.variable_scope('Branch_3'): - branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') - branch_3 = slim.conv2d(branch_3, depth(64), [1, 1], - scope='Conv2d_0b_1x1') - net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - - # mixed_3: 17 x 17 x 768. - end_point = 'Mixed_6a' - with tf.variable_scope(end_point): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, depth(384), [3, 3], stride=2, - padding='VALID', scope='Conv2d_1a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, depth(96), [3, 3], - scope='Conv2d_0b_3x3') - branch_1 = slim.conv2d(branch_1, depth(96), [3, 3], stride=2, - padding='VALID', scope='Conv2d_1a_1x1') - with tf.variable_scope('Branch_2'): - branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', - scope='MaxPool_1a_3x3') - net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2]) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - - # mixed4: 17 x 17 x 768. - end_point = 'Mixed_6b' - with tf.variable_scope(end_point): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(net, depth(128), [1, 1], scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, depth(128), [1, 7], - scope='Conv2d_0b_1x7') - branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], - scope='Conv2d_0c_7x1') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d(net, depth(128), [1, 1], scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, depth(128), [7, 1], - scope='Conv2d_0b_7x1') - branch_2 = slim.conv2d(branch_2, depth(128), [1, 7], - scope='Conv2d_0c_1x7') - branch_2 = slim.conv2d(branch_2, depth(128), [7, 1], - scope='Conv2d_0d_7x1') - branch_2 = slim.conv2d(branch_2, depth(192), [1, 7], - scope='Conv2d_0e_1x7') - with tf.variable_scope('Branch_3'): - branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') - branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], - scope='Conv2d_0b_1x1') - net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - - # mixed_5: 17 x 17 x 768. - end_point = 'Mixed_6c' - with tf.variable_scope(end_point): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, depth(160), [1, 7], - scope='Conv2d_0b_1x7') - branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], - scope='Conv2d_0c_7x1') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, depth(160), [7, 1], - scope='Conv2d_0b_7x1') - branch_2 = slim.conv2d(branch_2, depth(160), [1, 7], - scope='Conv2d_0c_1x7') - branch_2 = slim.conv2d(branch_2, depth(160), [7, 1], - scope='Conv2d_0d_7x1') - branch_2 = slim.conv2d(branch_2, depth(192), [1, 7], - scope='Conv2d_0e_1x7') - with tf.variable_scope('Branch_3'): - branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') - branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], - scope='Conv2d_0b_1x1') - net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - # mixed_6: 17 x 17 x 768. - end_point = 'Mixed_6d' - with tf.variable_scope(end_point): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, depth(160), [1, 7], - scope='Conv2d_0b_1x7') - branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], - scope='Conv2d_0c_7x1') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, depth(160), [7, 1], - scope='Conv2d_0b_7x1') - branch_2 = slim.conv2d(branch_2, depth(160), [1, 7], - scope='Conv2d_0c_1x7') - branch_2 = slim.conv2d(branch_2, depth(160), [7, 1], - scope='Conv2d_0d_7x1') - branch_2 = slim.conv2d(branch_2, depth(192), [1, 7], - scope='Conv2d_0e_1x7') - with tf.variable_scope('Branch_3'): - branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') - branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], - scope='Conv2d_0b_1x1') - net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - - # mixed_7: 17 x 17 x 768. - end_point = 'Mixed_6e' - with tf.variable_scope(end_point): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, depth(192), [1, 7], - scope='Conv2d_0b_1x7') - branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], - scope='Conv2d_0c_7x1') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, depth(192), [7, 1], - scope='Conv2d_0b_7x1') - branch_2 = slim.conv2d(branch_2, depth(192), [1, 7], - scope='Conv2d_0c_1x7') - branch_2 = slim.conv2d(branch_2, depth(192), [7, 1], - scope='Conv2d_0d_7x1') - branch_2 = slim.conv2d(branch_2, depth(192), [1, 7], - scope='Conv2d_0e_1x7') - with tf.variable_scope('Branch_3'): - branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') - branch_3 = slim.conv2d(branch_3, depth(192), [1, 1], - scope='Conv2d_0b_1x1') - net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - - # mixed_8: 8 x 8 x 1280. - end_point = 'Mixed_7a' - with tf.variable_scope(end_point): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') - branch_0 = slim.conv2d(branch_0, depth(320), [3, 3], stride=2, - padding='VALID', scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, depth(192), [1, 7], - scope='Conv2d_0b_1x7') - branch_1 = slim.conv2d(branch_1, depth(192), [7, 1], - scope='Conv2d_0c_7x1') - branch_1 = slim.conv2d(branch_1, depth(192), [3, 3], stride=2, - padding='VALID', scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', - scope='MaxPool_1a_3x3') - net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2]) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - # mixed_9: 8 x 8 x 2048. - end_point = 'Mixed_7b' - with tf.variable_scope(end_point): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, depth(320), [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(net, depth(384), [1, 1], scope='Conv2d_0a_1x1') - branch_1 = tf.concat(axis=3, values=[ - slim.conv2d(branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'), - slim.conv2d(branch_1, depth(384), [3, 1], scope='Conv2d_0b_3x1')]) - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d(net, depth(448), [1, 1], scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d( - branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3') - branch_2 = tf.concat(axis=3, values=[ - slim.conv2d(branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'), - slim.conv2d(branch_2, depth(384), [3, 1], scope='Conv2d_0d_3x1')]) - with tf.variable_scope('Branch_3'): - branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') - branch_3 = slim.conv2d( - branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') - net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - - # mixed_10: 8 x 8 x 2048. - end_point = 'Mixed_7c' - with tf.variable_scope(end_point): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, depth(320), [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(net, depth(384), [1, 1], scope='Conv2d_0a_1x1') - branch_1 = tf.concat(axis=3, values=[ - slim.conv2d(branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'), - slim.conv2d(branch_1, depth(384), [3, 1], scope='Conv2d_0c_3x1')]) - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d(net, depth(448), [1, 1], scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d( - branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3') - branch_2 = tf.concat(axis=3, values=[ - slim.conv2d(branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'), - slim.conv2d(branch_2, depth(384), [3, 1], scope='Conv2d_0d_3x1')]) - with tf.variable_scope('Branch_3'): - branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') - branch_3 = slim.conv2d( - branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1') - net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - end_points[end_point] = net - if end_point == final_endpoint: return net, end_points - raise ValueError('Unknown final endpoint %s' % final_endpoint) - - -def inception_v3(inputs, - num_classes=1000, - is_training=True, - dropout_keep_prob=0.8, - min_depth=16, - depth_multiplier=1.0, - prediction_fn=slim.softmax, - spatial_squeeze=True, - reuse=None, - scope='InceptionV3'): - """Inception model from http://arxiv.org/abs/1512.00567. - - "Rethinking the Inception Architecture for Computer Vision" - - Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens, - Zbigniew Wojna. - - With the default arguments this method constructs the exact model defined in - the paper. However, one can experiment with variations of the inception_v3 - network by changing arguments dropout_keep_prob, min_depth and - depth_multiplier. - - The default image size used to train this network is 299x299. - - Args: - inputs: a tensor of size [batch_size, height, width, channels]. - num_classes: number of predicted classes. - is_training: whether is training or not. - dropout_keep_prob: the percentage of activation values that are retained. - min_depth: Minimum depth value (number of channels) for all convolution ops. - Enforced when depth_multiplier < 1, and not an active constraint when - depth_multiplier >= 1. - depth_multiplier: Float multiplier for the depth (number of channels) - for all convolution ops. The value must be greater than zero. Typical - usage will be to set this value in (0, 1) to reduce the number of - parameters or computation cost of the model. - prediction_fn: a function to get predictions out of logits. - spatial_squeeze: if True, logits is of shape [B, C], if false logits is - of shape [B, 1, 1, C], where B is batch_size and C is number of classes. - reuse: whether or not the network and its variables should be reused. To be - able to reuse 'scope' must be given. - scope: Optional variable_scope. - - Returns: - logits: the pre-softmax activations, a tensor of size - [batch_size, num_classes] - end_points: a dictionary from components of the network to the corresponding - activation. - - Raises: - ValueError: if 'depth_multiplier' is less than or equal to zero. - """ - if depth_multiplier <= 0: - raise ValueError('depth_multiplier is not greater than zero.') - depth = lambda d: max(int(d * depth_multiplier), min_depth) - - with tf.variable_scope(scope, 'InceptionV3', [inputs, num_classes], - reuse=reuse) as scope: - with slim.arg_scope([slim.batch_norm, slim.dropout], - is_training=is_training): - net, end_points = inception_v3_base( - inputs, scope=scope, min_depth=min_depth, - depth_multiplier=depth_multiplier) - - # Auxiliary Head logits - with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], - stride=1, padding='SAME'): - aux_logits = end_points['Mixed_6e'] - with tf.variable_scope('AuxLogits'): - aux_logits = slim.avg_pool2d( - aux_logits, [5, 5], stride=3, padding='VALID', - scope='AvgPool_1a_5x5') - aux_logits = slim.conv2d(aux_logits, depth(128), [1, 1], - scope='Conv2d_1b_1x1') - - # Shape of feature map before the final layer. - kernel_size = _reduced_kernel_size_for_small_input( - aux_logits, [5, 5]) - aux_logits = slim.conv2d( - aux_logits, depth(768), kernel_size, - weights_initializer=trunc_normal(0.01), - padding='VALID', scope='Conv2d_2a_{}x{}'.format(*kernel_size)) - aux_logits = slim.conv2d( - aux_logits, num_classes, [1, 1], activation_fn=None, - normalizer_fn=None, weights_initializer=trunc_normal(0.001), - scope='Conv2d_2b_1x1') - if spatial_squeeze: - aux_logits = tf.squeeze(aux_logits, [1, 2], name='SpatialSqueeze') - end_points['AuxLogits'] = aux_logits - - # Final pooling and prediction - with tf.variable_scope('Logits'): - kernel_size = _reduced_kernel_size_for_small_input(net, [8, 8]) - net = slim.avg_pool2d(net, kernel_size, padding='VALID', - scope='AvgPool_1a_{}x{}'.format(*kernel_size)) - # 1 x 1 x 2048 - net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b') - end_points['PreLogits'] = net - # 2048 - logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, - normalizer_fn=None, scope='Conv2d_1c_1x1') - if spatial_squeeze: - logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze') - # 1000 - end_points['Logits'] = logits - end_points['Predictions'] = prediction_fn(logits, scope='Predictions') - return logits, end_points -inception_v3.default_image_size = 299 - - -def _reduced_kernel_size_for_small_input(input_tensor, kernel_size): - """Define kernel size which is automatically reduced for small input. - - If the shape of the input images is unknown at graph construction time this - function assumes that the input images are is large enough. - - Args: - input_tensor: input tensor of size [batch_size, height, width, channels]. - kernel_size: desired kernel size of length 2: [kernel_height, kernel_width] - - Returns: - a tensor with the kernel size. - - TODO(jrru): Make this function work with unknown shapes. Theoretically, this - can be done with the code below. Problems are two-fold: (1) If the shape was - known, it will be lost. (2) inception.slim.ops._two_element_tuple cannot - handle tensors that define the kernel size. - shape = tf.shape(input_tensor) - return = tf.pack([tf.minimum(shape[1], kernel_size[0]), - tf.minimum(shape[2], kernel_size[1])]) - - """ - shape = input_tensor.get_shape().as_list() - if shape[1] is None or shape[2] is None: - kernel_size_out = kernel_size - else: - kernel_size_out = [min(shape[1], kernel_size[0]), - min(shape[2], kernel_size[1])] - return kernel_size_out - - -inception_v3_arg_scope = inception_utils.inception_arg_scope diff --git a/examples/slim/nets/inception_v3_test.py b/examples/slim/nets/inception_v3_test.py deleted file mode 100644 index a1e870d3..00000000 --- a/examples/slim/nets/inception_v3_test.py +++ /dev/null @@ -1,292 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for nets.inception_v1.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow as tf - -from nets import inception - -slim = tf.contrib.slim - - -class InceptionV3Test(tf.test.TestCase): - - def testBuildClassificationNetwork(self): - batch_size = 5 - height, width = 299, 299 - num_classes = 1000 - - inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, end_points = inception.inception_v3(inputs, num_classes) - self.assertTrue(logits.op.name.startswith('InceptionV3/Logits')) - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, num_classes]) - self.assertTrue('Predictions' in end_points) - self.assertListEqual(end_points['Predictions'].get_shape().as_list(), - [batch_size, num_classes]) - - def testBuildBaseNetwork(self): - batch_size = 5 - height, width = 299, 299 - - inputs = tf.random_uniform((batch_size, height, width, 3)) - final_endpoint, end_points = inception.inception_v3_base(inputs) - self.assertTrue(final_endpoint.op.name.startswith( - 'InceptionV3/Mixed_7c')) - self.assertListEqual(final_endpoint.get_shape().as_list(), - [batch_size, 8, 8, 2048]) - expected_endpoints = ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', - 'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', - 'MaxPool_5a_3x3', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d', - 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', - 'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c'] - self.assertItemsEqual(end_points.keys(), expected_endpoints) - - def testBuildOnlyUptoFinalEndpoint(self): - batch_size = 5 - height, width = 299, 299 - endpoints = ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', - 'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', - 'MaxPool_5a_3x3', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d', - 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', - 'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c'] - - for index, endpoint in enumerate(endpoints): - with tf.Graph().as_default(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - out_tensor, end_points = inception.inception_v3_base( - inputs, final_endpoint=endpoint) - self.assertTrue(out_tensor.op.name.startswith( - 'InceptionV3/' + endpoint)) - self.assertItemsEqual(endpoints[:index+1], end_points) - - def testBuildAndCheckAllEndPointsUptoMixed7c(self): - batch_size = 5 - height, width = 299, 299 - - inputs = tf.random_uniform((batch_size, height, width, 3)) - _, end_points = inception.inception_v3_base( - inputs, final_endpoint='Mixed_7c') - endpoints_shapes = {'Conv2d_1a_3x3': [batch_size, 149, 149, 32], - 'Conv2d_2a_3x3': [batch_size, 147, 147, 32], - 'Conv2d_2b_3x3': [batch_size, 147, 147, 64], - 'MaxPool_3a_3x3': [batch_size, 73, 73, 64], - 'Conv2d_3b_1x1': [batch_size, 73, 73, 80], - 'Conv2d_4a_3x3': [batch_size, 71, 71, 192], - 'MaxPool_5a_3x3': [batch_size, 35, 35, 192], - 'Mixed_5b': [batch_size, 35, 35, 256], - 'Mixed_5c': [batch_size, 35, 35, 288], - 'Mixed_5d': [batch_size, 35, 35, 288], - 'Mixed_6a': [batch_size, 17, 17, 768], - 'Mixed_6b': [batch_size, 17, 17, 768], - 'Mixed_6c': [batch_size, 17, 17, 768], - 'Mixed_6d': [batch_size, 17, 17, 768], - 'Mixed_6e': [batch_size, 17, 17, 768], - 'Mixed_7a': [batch_size, 8, 8, 1280], - 'Mixed_7b': [batch_size, 8, 8, 2048], - 'Mixed_7c': [batch_size, 8, 8, 2048]} - self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys()) - for endpoint_name in endpoints_shapes: - expected_shape = endpoints_shapes[endpoint_name] - self.assertTrue(endpoint_name in end_points) - self.assertListEqual(end_points[endpoint_name].get_shape().as_list(), - expected_shape) - - def testModelHasExpectedNumberOfParameters(self): - batch_size = 5 - height, width = 299, 299 - inputs = tf.random_uniform((batch_size, height, width, 3)) - with slim.arg_scope(inception.inception_v3_arg_scope()): - inception.inception_v3_base(inputs) - total_params, _ = slim.model_analyzer.analyze_vars( - slim.get_model_variables()) - self.assertAlmostEqual(21802784, total_params) - - def testBuildEndPoints(self): - batch_size = 5 - height, width = 299, 299 - num_classes = 1000 - - inputs = tf.random_uniform((batch_size, height, width, 3)) - _, end_points = inception.inception_v3(inputs, num_classes) - self.assertTrue('Logits' in end_points) - logits = end_points['Logits'] - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, num_classes]) - self.assertTrue('AuxLogits' in end_points) - aux_logits = end_points['AuxLogits'] - self.assertListEqual(aux_logits.get_shape().as_list(), - [batch_size, num_classes]) - self.assertTrue('Mixed_7c' in end_points) - pre_pool = end_points['Mixed_7c'] - self.assertListEqual(pre_pool.get_shape().as_list(), - [batch_size, 8, 8, 2048]) - self.assertTrue('PreLogits' in end_points) - pre_logits = end_points['PreLogits'] - self.assertListEqual(pre_logits.get_shape().as_list(), - [batch_size, 1, 1, 2048]) - - def testBuildEndPointsWithDepthMultiplierLessThanOne(self): - batch_size = 5 - height, width = 299, 299 - num_classes = 1000 - - inputs = tf.random_uniform((batch_size, height, width, 3)) - _, end_points = inception.inception_v3(inputs, num_classes) - - endpoint_keys = [key for key in end_points.keys() - if key.startswith('Mixed') or key.startswith('Conv')] - - _, end_points_with_multiplier = inception.inception_v3( - inputs, num_classes, scope='depth_multiplied_net', - depth_multiplier=0.5) - - for key in endpoint_keys: - original_depth = end_points[key].get_shape().as_list()[3] - new_depth = end_points_with_multiplier[key].get_shape().as_list()[3] - self.assertEqual(0.5 * original_depth, new_depth) - - def testBuildEndPointsWithDepthMultiplierGreaterThanOne(self): - batch_size = 5 - height, width = 299, 299 - num_classes = 1000 - - inputs = tf.random_uniform((batch_size, height, width, 3)) - _, end_points = inception.inception_v3(inputs, num_classes) - - endpoint_keys = [key for key in end_points.keys() - if key.startswith('Mixed') or key.startswith('Conv')] - - _, end_points_with_multiplier = inception.inception_v3( - inputs, num_classes, scope='depth_multiplied_net', - depth_multiplier=2.0) - - for key in endpoint_keys: - original_depth = end_points[key].get_shape().as_list()[3] - new_depth = end_points_with_multiplier[key].get_shape().as_list()[3] - self.assertEqual(2.0 * original_depth, new_depth) - - def testRaiseValueErrorWithInvalidDepthMultiplier(self): - batch_size = 5 - height, width = 299, 299 - num_classes = 1000 - - inputs = tf.random_uniform((batch_size, height, width, 3)) - with self.assertRaises(ValueError): - _ = inception.inception_v3(inputs, num_classes, depth_multiplier=-0.1) - with self.assertRaises(ValueError): - _ = inception.inception_v3(inputs, num_classes, depth_multiplier=0.0) - - def testHalfSizeImages(self): - batch_size = 5 - height, width = 150, 150 - num_classes = 1000 - - inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, end_points = inception.inception_v3(inputs, num_classes) - self.assertTrue(logits.op.name.startswith('InceptionV3/Logits')) - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, num_classes]) - pre_pool = end_points['Mixed_7c'] - self.assertListEqual(pre_pool.get_shape().as_list(), - [batch_size, 3, 3, 2048]) - - def testUnknownImageShape(self): - tf.reset_default_graph() - batch_size = 2 - height, width = 299, 299 - num_classes = 1000 - input_np = np.random.uniform(0, 1, (batch_size, height, width, 3)) - with self.test_session() as sess: - inputs = tf.placeholder(tf.float32, shape=(batch_size, None, None, 3)) - logits, end_points = inception.inception_v3(inputs, num_classes) - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, num_classes]) - pre_pool = end_points['Mixed_7c'] - feed_dict = {inputs: input_np} - tf.global_variables_initializer().run() - pre_pool_out = sess.run(pre_pool, feed_dict=feed_dict) - self.assertListEqual(list(pre_pool_out.shape), [batch_size, 8, 8, 2048]) - - def testUnknowBatchSize(self): - batch_size = 1 - height, width = 299, 299 - num_classes = 1000 - - inputs = tf.placeholder(tf.float32, (None, height, width, 3)) - logits, _ = inception.inception_v3(inputs, num_classes) - self.assertTrue(logits.op.name.startswith('InceptionV3/Logits')) - self.assertListEqual(logits.get_shape().as_list(), - [None, num_classes]) - images = tf.random_uniform((batch_size, height, width, 3)) - - with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - output = sess.run(logits, {inputs: images.eval()}) - self.assertEquals(output.shape, (batch_size, num_classes)) - - def testEvaluation(self): - batch_size = 2 - height, width = 299, 299 - num_classes = 1000 - - eval_inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, _ = inception.inception_v3(eval_inputs, num_classes, - is_training=False) - predictions = tf.argmax(logits, 1) - - with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - output = sess.run(predictions) - self.assertEquals(output.shape, (batch_size,)) - - def testTrainEvalWithReuse(self): - train_batch_size = 5 - eval_batch_size = 2 - height, width = 150, 150 - num_classes = 1000 - - train_inputs = tf.random_uniform((train_batch_size, height, width, 3)) - inception.inception_v3(train_inputs, num_classes) - eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3)) - logits, _ = inception.inception_v3(eval_inputs, num_classes, - is_training=False, reuse=True) - predictions = tf.argmax(logits, 1) - - with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - output = sess.run(predictions) - self.assertEquals(output.shape, (eval_batch_size,)) - - def testLogitsNotSqueezed(self): - num_classes = 25 - images = tf.random_uniform([1, 299, 299, 3]) - logits, _ = inception.inception_v3(images, - num_classes=num_classes, - spatial_squeeze=False) - - with self.test_session() as sess: - tf.global_variables_initializer().run() - logits_out = sess.run(logits) - self.assertListEqual(list(logits_out.shape), [1, 1, 1, num_classes]) - - -if __name__ == '__main__': - tf.test.main() diff --git a/examples/slim/nets/inception_v4.py b/examples/slim/nets/inception_v4.py deleted file mode 100644 index a03e4127..00000000 --- a/examples/slim/nets/inception_v4.py +++ /dev/null @@ -1,323 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Contains the definition of the Inception V4 architecture. - -As described in http://arxiv.org/abs/1602.07261. - - Inception-v4, Inception-ResNet and the Impact of Residual Connections - on Learning - Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from nets import inception_utils - -slim = tf.contrib.slim - - -def block_inception_a(inputs, scope=None, reuse=None): - """Builds Inception-A block for Inception v4 network.""" - # By default use stride=1 and SAME padding - with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d], - stride=1, padding='SAME'): - with tf.variable_scope(scope, 'BlockInceptionA', [inputs], reuse=reuse): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(inputs, 96, [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(inputs, 64, [1, 1], scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, 96, [3, 3], scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d(inputs, 64, [1, 1], scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3') - branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0c_3x3') - with tf.variable_scope('Branch_3'): - branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3') - branch_3 = slim.conv2d(branch_3, 96, [1, 1], scope='Conv2d_0b_1x1') - return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - - -def block_reduction_a(inputs, scope=None, reuse=None): - """Builds Reduction-A block for Inception v4 network.""" - # By default use stride=1 and SAME padding - with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d], - stride=1, padding='SAME'): - with tf.variable_scope(scope, 'BlockReductionA', [inputs], reuse=reuse): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(inputs, 384, [3, 3], stride=2, padding='VALID', - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, 224, [3, 3], scope='Conv2d_0b_3x3') - branch_1 = slim.conv2d(branch_1, 256, [3, 3], stride=2, - padding='VALID', scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.max_pool2d(inputs, [3, 3], stride=2, padding='VALID', - scope='MaxPool_1a_3x3') - return tf.concat(axis=3, values=[branch_0, branch_1, branch_2]) - - -def block_inception_b(inputs, scope=None, reuse=None): - """Builds Inception-B block for Inception v4 network.""" - # By default use stride=1 and SAME padding - with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d], - stride=1, padding='SAME'): - with tf.variable_scope(scope, 'BlockInceptionB', [inputs], reuse=reuse): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, 224, [1, 7], scope='Conv2d_0b_1x7') - branch_1 = slim.conv2d(branch_1, 256, [7, 1], scope='Conv2d_0c_7x1') - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, 192, [7, 1], scope='Conv2d_0b_7x1') - branch_2 = slim.conv2d(branch_2, 224, [1, 7], scope='Conv2d_0c_1x7') - branch_2 = slim.conv2d(branch_2, 224, [7, 1], scope='Conv2d_0d_7x1') - branch_2 = slim.conv2d(branch_2, 256, [1, 7], scope='Conv2d_0e_1x7') - with tf.variable_scope('Branch_3'): - branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3') - branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1') - return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - - -def block_reduction_b(inputs, scope=None, reuse=None): - """Builds Reduction-B block for Inception v4 network.""" - # By default use stride=1 and SAME padding - with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d], - stride=1, padding='SAME'): - with tf.variable_scope(scope, 'BlockReductionB', [inputs], reuse=reuse): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1') - branch_0 = slim.conv2d(branch_0, 192, [3, 3], stride=2, - padding='VALID', scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, 256, [1, 7], scope='Conv2d_0b_1x7') - branch_1 = slim.conv2d(branch_1, 320, [7, 1], scope='Conv2d_0c_7x1') - branch_1 = slim.conv2d(branch_1, 320, [3, 3], stride=2, - padding='VALID', scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_2'): - branch_2 = slim.max_pool2d(inputs, [3, 3], stride=2, padding='VALID', - scope='MaxPool_1a_3x3') - return tf.concat(axis=3, values=[branch_0, branch_1, branch_2]) - - -def block_inception_c(inputs, scope=None, reuse=None): - """Builds Inception-C block for Inception v4 network.""" - # By default use stride=1 and SAME padding - with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d], - stride=1, padding='SAME'): - with tf.variable_scope(scope, 'BlockInceptionC', [inputs], reuse=reuse): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1') - branch_1 = tf.concat(axis=3, values=[ - slim.conv2d(branch_1, 256, [1, 3], scope='Conv2d_0b_1x3'), - slim.conv2d(branch_1, 256, [3, 1], scope='Conv2d_0c_3x1')]) - with tf.variable_scope('Branch_2'): - branch_2 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1') - branch_2 = slim.conv2d(branch_2, 448, [3, 1], scope='Conv2d_0b_3x1') - branch_2 = slim.conv2d(branch_2, 512, [1, 3], scope='Conv2d_0c_1x3') - branch_2 = tf.concat(axis=3, values=[ - slim.conv2d(branch_2, 256, [1, 3], scope='Conv2d_0d_1x3'), - slim.conv2d(branch_2, 256, [3, 1], scope='Conv2d_0e_3x1')]) - with tf.variable_scope('Branch_3'): - branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3') - branch_3 = slim.conv2d(branch_3, 256, [1, 1], scope='Conv2d_0b_1x1') - return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3]) - - -def inception_v4_base(inputs, final_endpoint='Mixed_7d', scope=None): - """Creates the Inception V4 network up to the given final endpoint. - - Args: - inputs: a 4-D tensor of size [batch_size, height, width, 3]. - final_endpoint: specifies the endpoint to construct the network up to. - It can be one of [ 'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', - 'Mixed_3a', 'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d', - 'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', 'Mixed_6e', - 'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c', - 'Mixed_7d'] - scope: Optional variable_scope. - - Returns: - logits: the logits outputs of the model. - end_points: the set of end_points from the inception model. - - Raises: - ValueError: if final_endpoint is not set to one of the predefined values, - """ - end_points = {} - - def add_and_check_final(name, net): - end_points[name] = net - return name == final_endpoint - - with tf.variable_scope(scope, 'InceptionV4', [inputs]): - with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], - stride=1, padding='SAME'): - # 299 x 299 x 3 - net = slim.conv2d(inputs, 32, [3, 3], stride=2, - padding='VALID', scope='Conv2d_1a_3x3') - if add_and_check_final('Conv2d_1a_3x3', net): return net, end_points - # 149 x 149 x 32 - net = slim.conv2d(net, 32, [3, 3], padding='VALID', - scope='Conv2d_2a_3x3') - if add_and_check_final('Conv2d_2a_3x3', net): return net, end_points - # 147 x 147 x 32 - net = slim.conv2d(net, 64, [3, 3], scope='Conv2d_2b_3x3') - if add_and_check_final('Conv2d_2b_3x3', net): return net, end_points - # 147 x 147 x 64 - with tf.variable_scope('Mixed_3a'): - with tf.variable_scope('Branch_0'): - branch_0 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', - scope='MaxPool_0a_3x3') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(net, 96, [3, 3], stride=2, padding='VALID', - scope='Conv2d_0a_3x3') - net = tf.concat(axis=3, values=[branch_0, branch_1]) - if add_and_check_final('Mixed_3a', net): return net, end_points - - # 73 x 73 x 160 - with tf.variable_scope('Mixed_4a'): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1') - branch_0 = slim.conv2d(branch_0, 96, [3, 3], padding='VALID', - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_1'): - branch_1 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1') - branch_1 = slim.conv2d(branch_1, 64, [1, 7], scope='Conv2d_0b_1x7') - branch_1 = slim.conv2d(branch_1, 64, [7, 1], scope='Conv2d_0c_7x1') - branch_1 = slim.conv2d(branch_1, 96, [3, 3], padding='VALID', - scope='Conv2d_1a_3x3') - net = tf.concat(axis=3, values=[branch_0, branch_1]) - if add_and_check_final('Mixed_4a', net): return net, end_points - - # 71 x 71 x 192 - with tf.variable_scope('Mixed_5a'): - with tf.variable_scope('Branch_0'): - branch_0 = slim.conv2d(net, 192, [3, 3], stride=2, padding='VALID', - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_1'): - branch_1 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID', - scope='MaxPool_1a_3x3') - net = tf.concat(axis=3, values=[branch_0, branch_1]) - if add_and_check_final('Mixed_5a', net): return net, end_points - - # 35 x 35 x 384 - # 4 x Inception-A blocks - for idx in xrange(4): - block_scope = 'Mixed_5' + chr(ord('b') + idx) - net = block_inception_a(net, block_scope) - if add_and_check_final(block_scope, net): return net, end_points - - # 35 x 35 x 384 - # Reduction-A block - net = block_reduction_a(net, 'Mixed_6a') - if add_and_check_final('Mixed_6a', net): return net, end_points - - # 17 x 17 x 1024 - # 7 x Inception-B blocks - for idx in xrange(7): - block_scope = 'Mixed_6' + chr(ord('b') + idx) - net = block_inception_b(net, block_scope) - if add_and_check_final(block_scope, net): return net, end_points - - # 17 x 17 x 1024 - # Reduction-B block - net = block_reduction_b(net, 'Mixed_7a') - if add_and_check_final('Mixed_7a', net): return net, end_points - - # 8 x 8 x 1536 - # 3 x Inception-C blocks - for idx in xrange(3): - block_scope = 'Mixed_7' + chr(ord('b') + idx) - net = block_inception_c(net, block_scope) - if add_and_check_final(block_scope, net): return net, end_points - raise ValueError('Unknown final endpoint %s' % final_endpoint) - - -def inception_v4(inputs, num_classes=1001, is_training=True, - dropout_keep_prob=0.8, - reuse=None, - scope='InceptionV4', - create_aux_logits=True): - """Creates the Inception V4 model. - - Args: - inputs: a 4-D tensor of size [batch_size, height, width, 3]. - num_classes: number of predicted classes. - is_training: whether is training or not. - dropout_keep_prob: float, the fraction to keep before final layer. - reuse: whether or not the network and its variables should be reused. To be - able to reuse 'scope' must be given. - scope: Optional variable_scope. - create_aux_logits: Whether to include the auxiliary logits. - - Returns: - logits: the logits outputs of the model. - end_points: the set of end_points from the inception model. - """ - end_points = {} - with tf.variable_scope(scope, 'InceptionV4', [inputs], reuse=reuse) as scope: - with slim.arg_scope([slim.batch_norm, slim.dropout], - is_training=is_training): - net, end_points = inception_v4_base(inputs, scope=scope) - - with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], - stride=1, padding='SAME'): - # Auxiliary Head logits - if create_aux_logits: - with tf.variable_scope('AuxLogits'): - # 17 x 17 x 1024 - aux_logits = end_points['Mixed_6h'] - aux_logits = slim.avg_pool2d(aux_logits, [5, 5], stride=3, - padding='VALID', - scope='AvgPool_1a_5x5') - aux_logits = slim.conv2d(aux_logits, 128, [1, 1], - scope='Conv2d_1b_1x1') - aux_logits = slim.conv2d(aux_logits, 768, - aux_logits.get_shape()[1:3], - padding='VALID', scope='Conv2d_2a') - aux_logits = slim.flatten(aux_logits) - aux_logits = slim.fully_connected(aux_logits, num_classes, - activation_fn=None, - scope='Aux_logits') - end_points['AuxLogits'] = aux_logits - - # Final pooling and prediction - with tf.variable_scope('Logits'): - # 8 x 8 x 1536 - net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', - scope='AvgPool_1a') - # 1 x 1 x 1536 - net = slim.dropout(net, dropout_keep_prob, scope='Dropout_1b') - net = slim.flatten(net, scope='PreLogitsFlatten') - end_points['PreLogitsFlatten'] = net - # 1536 - logits = slim.fully_connected(net, num_classes, activation_fn=None, - scope='Logits') - end_points['Logits'] = logits - end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions') - return logits, end_points -inception_v4.default_image_size = 299 - - -inception_v4_arg_scope = inception_utils.inception_arg_scope diff --git a/examples/slim/nets/inception_v4_test.py b/examples/slim/nets/inception_v4_test.py deleted file mode 100644 index 11cffb63..00000000 --- a/examples/slim/nets/inception_v4_test.py +++ /dev/null @@ -1,216 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for slim.inception_v4.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from nets import inception - - -class InceptionTest(tf.test.TestCase): - - def testBuildLogits(self): - batch_size = 5 - height, width = 299, 299 - num_classes = 1000 - inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, end_points = inception.inception_v4(inputs, num_classes) - auxlogits = end_points['AuxLogits'] - predictions = end_points['Predictions'] - self.assertTrue(auxlogits.op.name.startswith('InceptionV4/AuxLogits')) - self.assertListEqual(auxlogits.get_shape().as_list(), - [batch_size, num_classes]) - self.assertTrue(logits.op.name.startswith('InceptionV4/Logits')) - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, num_classes]) - self.assertTrue(predictions.op.name.startswith( - 'InceptionV4/Logits/Predictions')) - self.assertListEqual(predictions.get_shape().as_list(), - [batch_size, num_classes]) - - def testBuildWithoutAuxLogits(self): - batch_size = 5 - height, width = 299, 299 - num_classes = 1000 - inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, endpoints = inception.inception_v4(inputs, num_classes, - create_aux_logits=False) - self.assertFalse('AuxLogits' in endpoints) - self.assertTrue(logits.op.name.startswith('InceptionV4/Logits')) - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, num_classes]) - - def testAllEndPointsShapes(self): - batch_size = 5 - height, width = 299, 299 - num_classes = 1000 - inputs = tf.random_uniform((batch_size, height, width, 3)) - _, end_points = inception.inception_v4(inputs, num_classes) - endpoints_shapes = {'Conv2d_1a_3x3': [batch_size, 149, 149, 32], - 'Conv2d_2a_3x3': [batch_size, 147, 147, 32], - 'Conv2d_2b_3x3': [batch_size, 147, 147, 64], - 'Mixed_3a': [batch_size, 73, 73, 160], - 'Mixed_4a': [batch_size, 71, 71, 192], - 'Mixed_5a': [batch_size, 35, 35, 384], - # 4 x Inception-A blocks - 'Mixed_5b': [batch_size, 35, 35, 384], - 'Mixed_5c': [batch_size, 35, 35, 384], - 'Mixed_5d': [batch_size, 35, 35, 384], - 'Mixed_5e': [batch_size, 35, 35, 384], - # Reduction-A block - 'Mixed_6a': [batch_size, 17, 17, 1024], - # 7 x Inception-B blocks - 'Mixed_6b': [batch_size, 17, 17, 1024], - 'Mixed_6c': [batch_size, 17, 17, 1024], - 'Mixed_6d': [batch_size, 17, 17, 1024], - 'Mixed_6e': [batch_size, 17, 17, 1024], - 'Mixed_6f': [batch_size, 17, 17, 1024], - 'Mixed_6g': [batch_size, 17, 17, 1024], - 'Mixed_6h': [batch_size, 17, 17, 1024], - # Reduction-A block - 'Mixed_7a': [batch_size, 8, 8, 1536], - # 3 x Inception-C blocks - 'Mixed_7b': [batch_size, 8, 8, 1536], - 'Mixed_7c': [batch_size, 8, 8, 1536], - 'Mixed_7d': [batch_size, 8, 8, 1536], - # Logits and predictions - 'AuxLogits': [batch_size, num_classes], - 'PreLogitsFlatten': [batch_size, 1536], - 'Logits': [batch_size, num_classes], - 'Predictions': [batch_size, num_classes]} - self.assertItemsEqual(endpoints_shapes.keys(), end_points.keys()) - for endpoint_name in endpoints_shapes: - expected_shape = endpoints_shapes[endpoint_name] - self.assertTrue(endpoint_name in end_points) - self.assertListEqual(end_points[endpoint_name].get_shape().as_list(), - expected_shape) - - def testBuildBaseNetwork(self): - batch_size = 5 - height, width = 299, 299 - inputs = tf.random_uniform((batch_size, height, width, 3)) - net, end_points = inception.inception_v4_base(inputs) - self.assertTrue(net.op.name.startswith( - 'InceptionV4/Mixed_7d')) - self.assertListEqual(net.get_shape().as_list(), [batch_size, 8, 8, 1536]) - expected_endpoints = [ - 'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'Mixed_3a', - 'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d', - 'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', - 'Mixed_6e', 'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a', - 'Mixed_7b', 'Mixed_7c', 'Mixed_7d'] - self.assertItemsEqual(end_points.keys(), expected_endpoints) - for name, op in end_points.iteritems(): - self.assertTrue(op.name.startswith('InceptionV4/' + name)) - - def testBuildOnlyUpToFinalEndpoint(self): - batch_size = 5 - height, width = 299, 299 - all_endpoints = [ - 'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'Mixed_3a', - 'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d', - 'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', - 'Mixed_6e', 'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a', - 'Mixed_7b', 'Mixed_7c', 'Mixed_7d'] - for index, endpoint in enumerate(all_endpoints): - with tf.Graph().as_default(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - out_tensor, end_points = inception.inception_v4_base( - inputs, final_endpoint=endpoint) - self.assertTrue(out_tensor.op.name.startswith( - 'InceptionV4/' + endpoint)) - self.assertItemsEqual(all_endpoints[:index+1], end_points) - - def testVariablesSetDevice(self): - batch_size = 5 - height, width = 299, 299 - num_classes = 1000 - inputs = tf.random_uniform((batch_size, height, width, 3)) - # Force all Variables to reside on the device. - with tf.variable_scope('on_cpu'), tf.device('/cpu:0'): - inception.inception_v4(inputs, num_classes) - with tf.variable_scope('on_gpu'), tf.device('/gpu:0'): - inception.inception_v4(inputs, num_classes) - for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='on_cpu'): - self.assertDeviceEqual(v.device, '/cpu:0') - for v in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='on_gpu'): - self.assertDeviceEqual(v.device, '/gpu:0') - - def testHalfSizeImages(self): - batch_size = 5 - height, width = 150, 150 - num_classes = 1000 - inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, end_points = inception.inception_v4(inputs, num_classes) - self.assertTrue(logits.op.name.startswith('InceptionV4/Logits')) - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, num_classes]) - pre_pool = end_points['Mixed_7d'] - self.assertListEqual(pre_pool.get_shape().as_list(), - [batch_size, 3, 3, 1536]) - - def testUnknownBatchSize(self): - batch_size = 1 - height, width = 299, 299 - num_classes = 1000 - with self.test_session() as sess: - inputs = tf.placeholder(tf.float32, (None, height, width, 3)) - logits, _ = inception.inception_v4(inputs, num_classes) - self.assertTrue(logits.op.name.startswith('InceptionV4/Logits')) - self.assertListEqual(logits.get_shape().as_list(), - [None, num_classes]) - images = tf.random_uniform((batch_size, height, width, 3)) - sess.run(tf.global_variables_initializer()) - output = sess.run(logits, {inputs: images.eval()}) - self.assertEquals(output.shape, (batch_size, num_classes)) - - def testEvaluation(self): - batch_size = 2 - height, width = 299, 299 - num_classes = 1000 - with self.test_session() as sess: - eval_inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, _ = inception.inception_v4(eval_inputs, - num_classes, - is_training=False) - predictions = tf.argmax(logits, 1) - sess.run(tf.global_variables_initializer()) - output = sess.run(predictions) - self.assertEquals(output.shape, (batch_size,)) - - def testTrainEvalWithReuse(self): - train_batch_size = 5 - eval_batch_size = 2 - height, width = 150, 150 - num_classes = 1000 - with self.test_session() as sess: - train_inputs = tf.random_uniform((train_batch_size, height, width, 3)) - inception.inception_v4(train_inputs, num_classes) - eval_inputs = tf.random_uniform((eval_batch_size, height, width, 3)) - logits, _ = inception.inception_v4(eval_inputs, - num_classes, - is_training=False, - reuse=True) - predictions = tf.argmax(logits, 1) - sess.run(tf.global_variables_initializer()) - output = sess.run(predictions) - self.assertEquals(output.shape, (eval_batch_size,)) - - -if __name__ == '__main__': - tf.test.main() diff --git a/examples/slim/nets/lenet.py b/examples/slim/nets/lenet.py deleted file mode 100644 index 789d2bdc..00000000 --- a/examples/slim/nets/lenet.py +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Contains a variant of the LeNet model definition.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -slim = tf.contrib.slim - - -def lenet(images, num_classes=10, is_training=False, - dropout_keep_prob=0.5, - prediction_fn=slim.softmax, - scope='LeNet'): - """Creates a variant of the LeNet model. - - Note that since the output is a set of 'logits', the values fall in the - interval of (-infinity, infinity). Consequently, to convert the outputs to a - probability distribution over the characters, one will need to convert them - using the softmax function: - - logits = lenet.lenet(images, is_training=False) - probabilities = tf.nn.softmax(logits) - predictions = tf.argmax(logits, 1) - - Args: - images: A batch of `Tensors` of size [batch_size, height, width, channels]. - num_classes: the number of classes in the dataset. - is_training: specifies whether or not we're currently training the model. - This variable will determine the behaviour of the dropout layer. - dropout_keep_prob: the percentage of activation values that are retained. - prediction_fn: a function to get predictions out of logits. - scope: Optional variable_scope. - - Returns: - logits: the pre-softmax activations, a tensor of size - [batch_size, `num_classes`] - end_points: a dictionary from components of the network to the corresponding - activation. - """ - end_points = {} - - with tf.variable_scope(scope, 'LeNet', [images, num_classes]): - net = slim.conv2d(images, 32, [5, 5], scope='conv1') - net = slim.max_pool2d(net, [2, 2], 2, scope='pool1') - net = slim.conv2d(net, 64, [5, 5], scope='conv2') - net = slim.max_pool2d(net, [2, 2], 2, scope='pool2') - net = slim.flatten(net) - end_points['Flatten'] = net - - net = slim.fully_connected(net, 1024, scope='fc3') - net = slim.dropout(net, dropout_keep_prob, is_training=is_training, - scope='dropout3') - logits = slim.fully_connected(net, num_classes, activation_fn=None, - scope='fc4') - - end_points['Logits'] = logits - end_points['Predictions'] = prediction_fn(logits, scope='Predictions') - - return logits, end_points -lenet.default_image_size = 28 - - -def lenet_arg_scope(weight_decay=0.0): - """Defines the default lenet argument scope. - - Args: - weight_decay: The weight decay to use for regularizing the model. - - Returns: - An `arg_scope` to use for the inception v3 model. - """ - with slim.arg_scope( - [slim.conv2d, slim.fully_connected], - weights_regularizer=slim.l2_regularizer(weight_decay), - weights_initializer=tf.truncated_normal_initializer(stddev=0.1), - activation_fn=tf.nn.relu) as sc: - return sc diff --git a/examples/slim/nets/nets_factory.py b/examples/slim/nets/nets_factory.py deleted file mode 100644 index bd8d7127..00000000 --- a/examples/slim/nets/nets_factory.py +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Contains a factory for building various models.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import functools - -import tensorflow as tf - -from nets import alexnet -from nets import cifarnet -from nets import inception -from nets import lenet -from nets import overfeat -from nets import resnet_v1 -from nets import resnet_v2 -from nets import vgg - -slim = tf.contrib.slim - -networks_map = {'alexnet_v2': alexnet.alexnet_v2, - 'cifarnet': cifarnet.cifarnet, - 'overfeat': overfeat.overfeat, - 'vgg_a': vgg.vgg_a, - 'vgg_16': vgg.vgg_16, - 'vgg_19': vgg.vgg_19, - 'inception_v1': inception.inception_v1, - 'inception_v2': inception.inception_v2, - 'inception_v3': inception.inception_v3, - 'inception_v4': inception.inception_v4, - 'inception_resnet_v2': inception.inception_resnet_v2, - 'lenet': lenet.lenet, - 'resnet_v1_50': resnet_v1.resnet_v1_50, - 'resnet_v1_101': resnet_v1.resnet_v1_101, - 'resnet_v1_152': resnet_v1.resnet_v1_152, - 'resnet_v1_200': resnet_v1.resnet_v1_200, - 'resnet_v2_50': resnet_v2.resnet_v2_50, - 'resnet_v2_101': resnet_v2.resnet_v2_101, - 'resnet_v2_152': resnet_v2.resnet_v2_152, - 'resnet_v2_200': resnet_v2.resnet_v2_200, - } - -arg_scopes_map = {'alexnet_v2': alexnet.alexnet_v2_arg_scope, - 'cifarnet': cifarnet.cifarnet_arg_scope, - 'overfeat': overfeat.overfeat_arg_scope, - 'vgg_a': vgg.vgg_arg_scope, - 'vgg_16': vgg.vgg_arg_scope, - 'vgg_19': vgg.vgg_arg_scope, - 'inception_v1': inception.inception_v3_arg_scope, - 'inception_v2': inception.inception_v3_arg_scope, - 'inception_v3': inception.inception_v3_arg_scope, - 'inception_v4': inception.inception_v4_arg_scope, - 'inception_resnet_v2': - inception.inception_resnet_v2_arg_scope, - 'lenet': lenet.lenet_arg_scope, - 'resnet_v1_50': resnet_v1.resnet_arg_scope, - 'resnet_v1_101': resnet_v1.resnet_arg_scope, - 'resnet_v1_152': resnet_v1.resnet_arg_scope, - 'resnet_v1_200': resnet_v1.resnet_arg_scope, - 'resnet_v2_50': resnet_v2.resnet_arg_scope, - 'resnet_v2_101': resnet_v2.resnet_arg_scope, - 'resnet_v2_152': resnet_v2.resnet_arg_scope, - 'resnet_v2_200': resnet_v2.resnet_arg_scope, - } - - -def get_network_fn(name, num_classes, weight_decay=0.0, is_training=False): - """Returns a network_fn such as `logits, end_points = network_fn(images)`. - - Args: - name: The name of the network. - num_classes: The number of classes to use for classification. - weight_decay: The l2 coefficient for the model weights. - is_training: `True` if the model is being used for training and `False` - otherwise. - - Returns: - network_fn: A function that applies the model to a batch of images. It has - the following signature: - logits, end_points = network_fn(images) - Raises: - ValueError: If network `name` is not recognized. - """ - if name not in networks_map: - raise ValueError('Name of network unknown %s' % name) - func = networks_map[name] - @functools.wraps(func) - def network_fn(images): - arg_scope = arg_scopes_map[name](weight_decay=weight_decay) - with slim.arg_scope(arg_scope): - return func(images, num_classes, is_training=is_training) - if hasattr(func, 'default_image_size'): - network_fn.default_image_size = func.default_image_size - - return network_fn diff --git a/examples/slim/nets/nets_factory_test.py b/examples/slim/nets/nets_factory_test.py deleted file mode 100644 index b4ab1f82..00000000 --- a/examples/slim/nets/nets_factory_test.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Tests for slim.inception.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from nets import nets_factory - -slim = tf.contrib.slim - - -class NetworksTest(tf.test.TestCase): - - def testGetNetworkFn(self): - batch_size = 5 - num_classes = 1000 - for net in nets_factory.networks_map: - with self.test_session(): - net_fn = nets_factory.get_network_fn(net, num_classes) - # Most networks use 224 as their default_image_size - image_size = getattr(net_fn, 'default_image_size', 224) - inputs = tf.random_uniform((batch_size, image_size, image_size, 3)) - logits, end_points = net_fn(inputs) - self.assertTrue(isinstance(logits, tf.Tensor)) - self.assertTrue(isinstance(end_points, dict)) - self.assertEqual(logits.get_shape().as_list()[0], batch_size) - self.assertEqual(logits.get_shape().as_list()[-1], num_classes) - - def testGetNetworkFnArgScope(self): - batch_size = 5 - num_classes = 10 - net = 'cifarnet' - with self.test_session(use_gpu=True): - net_fn = nets_factory.get_network_fn(net, num_classes) - image_size = getattr(net_fn, 'default_image_size', 224) - with slim.arg_scope([slim.model_variable, slim.variable], - device='/CPU:0'): - inputs = tf.random_uniform((batch_size, image_size, image_size, 3)) - net_fn(inputs) - weights = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, 'CifarNet/conv1')[0] - self.assertDeviceEqual('/CPU:0', weights.device) - -if __name__ == '__main__': - tf.test.main() diff --git a/examples/slim/nets/overfeat.py b/examples/slim/nets/overfeat.py deleted file mode 100644 index 64a54252..00000000 --- a/examples/slim/nets/overfeat.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Contains the model definition for the OverFeat network. - -The definition for the network was obtained from: - OverFeat: Integrated Recognition, Localization and Detection using - Convolutional Networks - Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and - Yann LeCun, 2014 - http://arxiv.org/abs/1312.6229 - -Usage: - with slim.arg_scope(overfeat.overfeat_arg_scope()): - outputs, end_points = overfeat.overfeat(inputs) - -@@overfeat -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -slim = tf.contrib.slim -trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev) - - -def overfeat_arg_scope(weight_decay=0.0005): - with slim.arg_scope([slim.conv2d, slim.fully_connected], - activation_fn=tf.nn.relu, - weights_regularizer=slim.l2_regularizer(weight_decay), - biases_initializer=tf.zeros_initializer()): - with slim.arg_scope([slim.conv2d], padding='SAME'): - with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc: - return arg_sc - - -def overfeat(inputs, - num_classes=1000, - is_training=True, - dropout_keep_prob=0.5, - spatial_squeeze=True, - scope='overfeat'): - """Contains the model definition for the OverFeat network. - - The definition for the network was obtained from: - OverFeat: Integrated Recognition, Localization and Detection using - Convolutional Networks - Pierre Sermanet, David Eigen, Xiang Zhang, Michael Mathieu, Rob Fergus and - Yann LeCun, 2014 - http://arxiv.org/abs/1312.6229 - - Note: All the fully_connected layers have been transformed to conv2d layers. - To use in classification mode, resize input to 231x231. To use in fully - convolutional mode, set spatial_squeeze to false. - - Args: - inputs: a tensor of size [batch_size, height, width, channels]. - num_classes: number of predicted classes. - is_training: whether or not the model is being trained. - dropout_keep_prob: the probability that activations are kept in the dropout - layers during training. - spatial_squeeze: whether or not should squeeze the spatial dimensions of the - outputs. Useful to remove unnecessary dimensions for classification. - scope: Optional scope for the variables. - - Returns: - the last op containing the log predictions and end_points dict. - - """ - with tf.variable_scope(scope, 'overfeat', [inputs]) as sc: - end_points_collection = sc.name + '_end_points' - # Collect outputs for conv2d, fully_connected and max_pool2d - with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], - outputs_collections=end_points_collection): - net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID', - scope='conv1') - net = slim.max_pool2d(net, [2, 2], scope='pool1') - net = slim.conv2d(net, 256, [5, 5], padding='VALID', scope='conv2') - net = slim.max_pool2d(net, [2, 2], scope='pool2') - net = slim.conv2d(net, 512, [3, 3], scope='conv3') - net = slim.conv2d(net, 1024, [3, 3], scope='conv4') - net = slim.conv2d(net, 1024, [3, 3], scope='conv5') - net = slim.max_pool2d(net, [2, 2], scope='pool5') - with slim.arg_scope([slim.conv2d], - weights_initializer=trunc_normal(0.005), - biases_initializer=tf.constant_initializer(0.1)): - # Use conv2d instead of fully_connected layers. - net = slim.conv2d(net, 3072, [6, 6], padding='VALID', scope='fc6') - net = slim.dropout(net, dropout_keep_prob, is_training=is_training, - scope='dropout6') - net = slim.conv2d(net, 4096, [1, 1], scope='fc7') - net = slim.dropout(net, dropout_keep_prob, is_training=is_training, - scope='dropout7') - net = slim.conv2d(net, num_classes, [1, 1], - activation_fn=None, - normalizer_fn=None, - biases_initializer=tf.zeros_initializer(), - scope='fc8') - # Convert end_points_collection into a end_point dict. - end_points = slim.utils.convert_collection_to_dict(end_points_collection) - if spatial_squeeze: - net = tf.squeeze(net, [1, 2], name='fc8/squeezed') - end_points[sc.name + '/fc8'] = net - return net, end_points -overfeat.default_image_size = 231 diff --git a/examples/slim/nets/overfeat_test.py b/examples/slim/nets/overfeat_test.py deleted file mode 100644 index c6314e3f..00000000 --- a/examples/slim/nets/overfeat_test.py +++ /dev/null @@ -1,145 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for slim.nets.overfeat.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from nets import overfeat - -slim = tf.contrib.slim - - -class OverFeatTest(tf.test.TestCase): - - def testBuild(self): - batch_size = 5 - height, width = 231, 231 - num_classes = 1000 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, _ = overfeat.overfeat(inputs, num_classes) - self.assertEquals(logits.op.name, 'overfeat/fc8/squeezed') - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, num_classes]) - - def testFullyConvolutional(self): - batch_size = 1 - height, width = 281, 281 - num_classes = 1000 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, _ = overfeat.overfeat(inputs, num_classes, spatial_squeeze=False) - self.assertEquals(logits.op.name, 'overfeat/fc8/BiasAdd') - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, 2, 2, num_classes]) - - def testEndPoints(self): - batch_size = 5 - height, width = 231, 231 - num_classes = 1000 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - _, end_points = overfeat.overfeat(inputs, num_classes) - expected_names = ['overfeat/conv1', - 'overfeat/pool1', - 'overfeat/conv2', - 'overfeat/pool2', - 'overfeat/conv3', - 'overfeat/conv4', - 'overfeat/conv5', - 'overfeat/pool5', - 'overfeat/fc6', - 'overfeat/fc7', - 'overfeat/fc8' - ] - self.assertSetEqual(set(end_points.keys()), set(expected_names)) - - def testModelVariables(self): - batch_size = 5 - height, width = 231, 231 - num_classes = 1000 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - overfeat.overfeat(inputs, num_classes) - expected_names = ['overfeat/conv1/weights', - 'overfeat/conv1/biases', - 'overfeat/conv2/weights', - 'overfeat/conv2/biases', - 'overfeat/conv3/weights', - 'overfeat/conv3/biases', - 'overfeat/conv4/weights', - 'overfeat/conv4/biases', - 'overfeat/conv5/weights', - 'overfeat/conv5/biases', - 'overfeat/fc6/weights', - 'overfeat/fc6/biases', - 'overfeat/fc7/weights', - 'overfeat/fc7/biases', - 'overfeat/fc8/weights', - 'overfeat/fc8/biases', - ] - model_variables = [v.op.name for v in slim.get_model_variables()] - self.assertSetEqual(set(model_variables), set(expected_names)) - - def testEvaluation(self): - batch_size = 2 - height, width = 231, 231 - num_classes = 1000 - with self.test_session(): - eval_inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, _ = overfeat.overfeat(eval_inputs, is_training=False) - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, num_classes]) - predictions = tf.argmax(logits, 1) - self.assertListEqual(predictions.get_shape().as_list(), [batch_size]) - - def testTrainEvalWithReuse(self): - train_batch_size = 2 - eval_batch_size = 1 - train_height, train_width = 231, 231 - eval_height, eval_width = 281, 281 - num_classes = 1000 - with self.test_session(): - train_inputs = tf.random_uniform( - (train_batch_size, train_height, train_width, 3)) - logits, _ = overfeat.overfeat(train_inputs) - self.assertListEqual(logits.get_shape().as_list(), - [train_batch_size, num_classes]) - tf.get_variable_scope().reuse_variables() - eval_inputs = tf.random_uniform( - (eval_batch_size, eval_height, eval_width, 3)) - logits, _ = overfeat.overfeat(eval_inputs, is_training=False, - spatial_squeeze=False) - self.assertListEqual(logits.get_shape().as_list(), - [eval_batch_size, 2, 2, num_classes]) - logits = tf.reduce_mean(logits, [1, 2]) - predictions = tf.argmax(logits, 1) - self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size]) - - def testForward(self): - batch_size = 1 - height, width = 231, 231 - with self.test_session() as sess: - inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, _ = overfeat.overfeat(inputs) - sess.run(tf.global_variables_initializer()) - output = sess.run(logits) - self.assertTrue(output.any()) - -if __name__ == '__main__': - tf.test.main() diff --git a/examples/slim/nets/resnet_utils.py b/examples/slim/nets/resnet_utils.py deleted file mode 100644 index 1e1dd829..00000000 --- a/examples/slim/nets/resnet_utils.py +++ /dev/null @@ -1,254 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Contains building blocks for various versions of Residual Networks. - -Residual networks (ResNets) were proposed in: - Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun - Deep Residual Learning for Image Recognition. arXiv:1512.03385, 2015 - -More variants were introduced in: - Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun - Identity Mappings in Deep Residual Networks. arXiv: 1603.05027, 2016 - -We can obtain different ResNet variants by changing the network depth, width, -and form of residual unit. This module implements the infrastructure for -building them. Concrete ResNet units and full ResNet networks are implemented in -the accompanying resnet_v1.py and resnet_v2.py modules. - -Compared to https://github.com/KaimingHe/deep-residual-networks, in the current -implementation we subsample the output activations in the last residual unit of -each block, instead of subsampling the input activations in the first residual -unit of each block. The two implementations give identical results but our -implementation is more memory efficient. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -import tensorflow as tf - -slim = tf.contrib.slim - - -class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])): - """A named tuple describing a ResNet block. - - Its parts are: - scope: The scope of the `Block`. - unit_fn: The ResNet unit function which takes as input a `Tensor` and - returns another `Tensor` with the output of the ResNet unit. - args: A list of length equal to the number of units in the `Block`. The list - contains one (depth, depth_bottleneck, stride) tuple for each unit in the - block to serve as argument to unit_fn. - """ - - -def subsample(inputs, factor, scope=None): - """Subsamples the input along the spatial dimensions. - - Args: - inputs: A `Tensor` of size [batch, height_in, width_in, channels]. - factor: The subsampling factor. - scope: Optional variable_scope. - - Returns: - output: A `Tensor` of size [batch, height_out, width_out, channels] with the - input, either intact (if factor == 1) or subsampled (if factor > 1). - """ - if factor == 1: - return inputs - else: - return slim.max_pool2d(inputs, [1, 1], stride=factor, scope=scope) - - -def conv2d_same(inputs, num_outputs, kernel_size, stride, rate=1, scope=None): - """Strided 2-D convolution with 'SAME' padding. - - When stride > 1, then we do explicit zero-padding, followed by conv2d with - 'VALID' padding. - - Note that - - net = conv2d_same(inputs, num_outputs, 3, stride=stride) - - is equivalent to - - net = slim.conv2d(inputs, num_outputs, 3, stride=1, padding='SAME') - net = subsample(net, factor=stride) - - whereas - - net = slim.conv2d(inputs, num_outputs, 3, stride=stride, padding='SAME') - - is different when the input's height or width is even, which is why we add the - current function. For more details, see ResnetUtilsTest.testConv2DSameEven(). - - Args: - inputs: A 4-D tensor of size [batch, height_in, width_in, channels]. - num_outputs: An integer, the number of output filters. - kernel_size: An int with the kernel_size of the filters. - stride: An integer, the output stride. - rate: An integer, rate for atrous convolution. - scope: Scope. - - Returns: - output: A 4-D tensor of size [batch, height_out, width_out, channels] with - the convolution output. - """ - if stride == 1: - return slim.conv2d(inputs, num_outputs, kernel_size, stride=1, rate=rate, - padding='SAME', scope=scope) - else: - kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1) - pad_total = kernel_size_effective - 1 - pad_beg = pad_total // 2 - pad_end = pad_total - pad_beg - inputs = tf.pad(inputs, - [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]]) - return slim.conv2d(inputs, num_outputs, kernel_size, stride=stride, - rate=rate, padding='VALID', scope=scope) - - -@slim.add_arg_scope -def stack_blocks_dense(net, blocks, output_stride=None, - outputs_collections=None): - """Stacks ResNet `Blocks` and controls output feature density. - - First, this function creates scopes for the ResNet in the form of - 'block_name/unit_1', 'block_name/unit_2', etc. - - Second, this function allows the user to explicitly control the ResNet - output_stride, which is the ratio of the input to output spatial resolution. - This is useful for dense prediction tasks such as semantic segmentation or - object detection. - - Most ResNets consist of 4 ResNet blocks and subsample the activations by a - factor of 2 when transitioning between consecutive ResNet blocks. This results - to a nominal ResNet output_stride equal to 8. If we set the output_stride to - half the nominal network stride (e.g., output_stride=4), then we compute - responses twice. - - Control of the output feature density is implemented by atrous convolution. - - Args: - net: A `Tensor` of size [batch, height, width, channels]. - blocks: A list of length equal to the number of ResNet `Blocks`. Each - element is a ResNet `Block` object describing the units in the `Block`. - output_stride: If `None`, then the output will be computed at the nominal - network stride. If output_stride is not `None`, it specifies the requested - ratio of input to output spatial resolution, which needs to be equal to - the product of unit strides from the start up to some level of the ResNet. - For example, if the ResNet employs units with strides 1, 2, 1, 3, 4, 1, - then valid values for the output_stride are 1, 2, 6, 24 or None (which - is equivalent to output_stride=24). - outputs_collections: Collection to add the ResNet block outputs. - - Returns: - net: Output tensor with stride equal to the specified output_stride. - - Raises: - ValueError: If the target output_stride is not valid. - """ - # The current_stride variable keeps track of the effective stride of the - # activations. This allows us to invoke atrous convolution whenever applying - # the next residual unit would result in the activations having stride larger - # than the target output_stride. - current_stride = 1 - - # The atrous convolution rate parameter. - rate = 1 - - for block in blocks: - with tf.variable_scope(block.scope, 'block', [net]) as sc: - for i, unit in enumerate(block.args): - if output_stride is not None and current_stride > output_stride: - raise ValueError('The target output_stride cannot be reached.') - - with tf.variable_scope('unit_%d' % (i + 1), values=[net]): - unit_depth, unit_depth_bottleneck, unit_stride = unit - - # If we have reached the target output_stride, then we need to employ - # atrous convolution with stride=1 and multiply the atrous rate by the - # current unit's stride for use in subsequent layers. - if output_stride is not None and current_stride == output_stride: - net = block.unit_fn(net, - depth=unit_depth, - depth_bottleneck=unit_depth_bottleneck, - stride=1, - rate=rate) - rate *= unit_stride - - else: - net = block.unit_fn(net, - depth=unit_depth, - depth_bottleneck=unit_depth_bottleneck, - stride=unit_stride, - rate=1) - current_stride *= unit_stride - net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net) - - if output_stride is not None and current_stride != output_stride: - raise ValueError('The target output_stride cannot be reached.') - - return net - - -def resnet_arg_scope(weight_decay=0.0001, - batch_norm_decay=0.997, - batch_norm_epsilon=1e-5, - batch_norm_scale=True): - """Defines the default ResNet arg scope. - - TODO(gpapan): The batch-normalization related default values above are - appropriate for use in conjunction with the reference ResNet models - released at https://github.com/KaimingHe/deep-residual-networks. When - training ResNets from scratch, they might need to be tuned. - - Args: - weight_decay: The weight decay to use for regularizing the model. - batch_norm_decay: The moving average decay when estimating layer activation - statistics in batch normalization. - batch_norm_epsilon: Small constant to prevent division by zero when - normalizing activations by their variance in batch normalization. - batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the - activations in the batch normalization layer. - - Returns: - An `arg_scope` to use for the resnet models. - """ - batch_norm_params = { - 'decay': batch_norm_decay, - 'epsilon': batch_norm_epsilon, - 'scale': batch_norm_scale, - 'updates_collections': tf.GraphKeys.UPDATE_OPS, - } - - with slim.arg_scope( - [slim.conv2d], - weights_regularizer=slim.l2_regularizer(weight_decay), - weights_initializer=slim.variance_scaling_initializer(), - activation_fn=tf.nn.relu, - normalizer_fn=slim.batch_norm, - normalizer_params=batch_norm_params): - with slim.arg_scope([slim.batch_norm], **batch_norm_params): - # The following implies padding='SAME' for pool1, which makes feature - # alignment easier for dense prediction tasks. This is also used in - # https://github.com/facebook/fb.resnet.torch. However the accompanying - # code of 'Deep Residual Learning for Image Recognition' uses - # padding='VALID' for pool1. You can switch to that choice by setting - # slim.arg_scope([slim.max_pool2d], padding='VALID'). - with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc: - return arg_sc diff --git a/examples/slim/nets/resnet_v1.py b/examples/slim/nets/resnet_v1.py deleted file mode 100644 index 7e46fd2e..00000000 --- a/examples/slim/nets/resnet_v1.py +++ /dev/null @@ -1,304 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Contains definitions for the original form of Residual Networks. - -The 'v1' residual networks (ResNets) implemented in this module were proposed -by: -[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun - Deep Residual Learning for Image Recognition. arXiv:1512.03385 - -Other variants were introduced in: -[2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun - Identity Mappings in Deep Residual Networks. arXiv: 1603.05027 - -The networks defined in this module utilize the bottleneck building block of -[1] with projection shortcuts only for increasing depths. They employ batch -normalization *after* every weight layer. This is the architecture used by -MSRA in the Imagenet and MSCOCO 2016 competition models ResNet-101 and -ResNet-152. See [2; Fig. 1a] for a comparison between the current 'v1' -architecture and the alternative 'v2' architecture of [2] which uses batch -normalization *before* every weight layer in the so-called full pre-activation -units. - -Typical use: - - from tensorflow.contrib.slim.nets import resnet_v1 - -ResNet-101 for image classification into 1000 classes: - - # inputs has shape [batch, 224, 224, 3] - with slim.arg_scope(resnet_v1.resnet_arg_scope()): - net, end_points = resnet_v1.resnet_v1_101(inputs, 1000, is_training=False) - -ResNet-101 for semantic segmentation into 21 classes: - - # inputs has shape [batch, 513, 513, 3] - with slim.arg_scope(resnet_v1.resnet_arg_scope()): - net, end_points = resnet_v1.resnet_v1_101(inputs, - 21, - is_training=False, - global_pool=False, - output_stride=16) -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from nets import resnet_utils - - -resnet_arg_scope = resnet_utils.resnet_arg_scope -slim = tf.contrib.slim - - -@slim.add_arg_scope -def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1, - outputs_collections=None, scope=None): - """Bottleneck residual unit variant with BN after convolutions. - - This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for - its definition. Note that we use here the bottleneck variant which has an - extra bottleneck layer. - - When putting together two consecutive ResNet blocks that use this unit, one - should use stride = 2 in the last unit of the first block. - - Args: - inputs: A tensor of size [batch, height, width, channels]. - depth: The depth of the ResNet unit output. - depth_bottleneck: The depth of the bottleneck layers. - stride: The ResNet unit's stride. Determines the amount of downsampling of - the units output compared to its input. - rate: An integer, rate for atrous convolution. - outputs_collections: Collection to add the ResNet unit output. - scope: Optional variable_scope. - - Returns: - The ResNet unit's output. - """ - with tf.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc: - depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4) - if depth == depth_in: - shortcut = resnet_utils.subsample(inputs, stride, 'shortcut') - else: - shortcut = slim.conv2d(inputs, depth, [1, 1], stride=stride, - activation_fn=None, scope='shortcut') - - residual = slim.conv2d(inputs, depth_bottleneck, [1, 1], stride=1, - scope='conv1') - residual = resnet_utils.conv2d_same(residual, depth_bottleneck, 3, stride, - rate=rate, scope='conv2') - residual = slim.conv2d(residual, depth, [1, 1], stride=1, - activation_fn=None, scope='conv3') - - output = tf.nn.relu(shortcut + residual) - - return slim.utils.collect_named_outputs(outputs_collections, - sc.original_name_scope, - output) - - -def resnet_v1(inputs, - blocks, - num_classes=None, - is_training=True, - global_pool=True, - output_stride=None, - include_root_block=True, - spatial_squeeze=True, - reuse=None, - scope=None): - """Generator for v1 ResNet models. - - This function generates a family of ResNet v1 models. See the resnet_v1_*() - methods for specific model instantiations, obtained by selecting different - block instantiations that produce ResNets of various depths. - - Training for image classification on Imagenet is usually done with [224, 224] - inputs, resulting in [7, 7] feature maps at the output of the last ResNet - block for the ResNets defined in [1] that have nominal stride equal to 32. - However, for dense prediction tasks we advise that one uses inputs with - spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In - this case the feature maps at the ResNet output will have spatial shape - [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1] - and corners exactly aligned with the input image corners, which greatly - facilitates alignment of the features to the image. Using as input [225, 225] - images results in [8, 8] feature maps at the output of the last ResNet block. - - For dense prediction tasks, the ResNet needs to run in fully-convolutional - (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all - have nominal stride equal to 32 and a good choice in FCN mode is to use - output_stride=16 in order to increase the density of the computed features at - small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915. - - Args: - inputs: A tensor of size [batch, height_in, width_in, channels]. - blocks: A list of length equal to the number of ResNet blocks. Each element - is a resnet_utils.Block object describing the units in the block. - num_classes: Number of predicted classes for classification tasks. If None - we return the features before the logit layer. - is_training: whether is training or not. - global_pool: If True, we perform global average pooling before computing the - logits. Set to True for image classification, False for dense prediction. - output_stride: If None, then the output will be computed at the nominal - network stride. If output_stride is not None, it specifies the requested - ratio of input to output spatial resolution. - include_root_block: If True, include the initial convolution followed by - max-pooling, if False excludes it. - spatial_squeeze: if True, logits is of shape [B, C], if false logits is - of shape [B, 1, 1, C], where B is batch_size and C is number of classes. - reuse: whether or not the network and its variables should be reused. To be - able to reuse 'scope' must be given. - scope: Optional variable_scope. - - Returns: - net: A rank-4 tensor of size [batch, height_out, width_out, channels_out]. - If global_pool is False, then height_out and width_out are reduced by a - factor of output_stride compared to the respective height_in and width_in, - else both height_out and width_out equal one. If num_classes is None, then - net is the output of the last ResNet block, potentially after global - average pooling. If num_classes is not None, net contains the pre-softmax - activations. - end_points: A dictionary from components of the network to the corresponding - activation. - - Raises: - ValueError: If the target output_stride is not valid. - """ - with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc: - end_points_collection = sc.name + '_end_points' - with slim.arg_scope([slim.conv2d, bottleneck, - resnet_utils.stack_blocks_dense], - outputs_collections=end_points_collection): - with slim.arg_scope([slim.batch_norm], is_training=is_training): - net = inputs - if include_root_block: - if output_stride is not None: - if output_stride % 4 != 0: - raise ValueError('The output_stride needs to be a multiple of 4.') - output_stride /= 4 - net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1') - net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1') - net = resnet_utils.stack_blocks_dense(net, blocks, output_stride) - if global_pool: - # Global average pooling. - net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True) - if num_classes is not None: - net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, - normalizer_fn=None, scope='logits') - if spatial_squeeze: - logits = tf.squeeze(net, [1, 2], name='SpatialSqueeze') - # Convert end_points_collection into a dictionary of end_points. - end_points = slim.utils.convert_collection_to_dict(end_points_collection) - if num_classes is not None: - end_points['predictions'] = slim.softmax(logits, scope='predictions') - return logits, end_points -resnet_v1.default_image_size = 224 - - -def resnet_v1_50(inputs, - num_classes=None, - is_training=True, - global_pool=True, - output_stride=None, - reuse=None, - scope='resnet_v1_50'): - """ResNet-50 model of [1]. See resnet_v1() for arg and return description.""" - blocks = [ - resnet_utils.Block( - 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), - resnet_utils.Block( - 'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), - resnet_utils.Block( - 'block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 2)]), - resnet_utils.Block( - 'block4', bottleneck, [(2048, 512, 1)] * 3) - ] - return resnet_v1(inputs, blocks, num_classes, is_training, - global_pool=global_pool, output_stride=output_stride, - include_root_block=True, reuse=reuse, scope=scope) -resnet_v1_50.default_image_size = resnet_v1.default_image_size - - -def resnet_v1_101(inputs, - num_classes=None, - is_training=True, - global_pool=True, - output_stride=None, - reuse=None, - scope='resnet_v1_101'): - """ResNet-101 model of [1]. See resnet_v1() for arg and return description.""" - blocks = [ - resnet_utils.Block( - 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), - resnet_utils.Block( - 'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), - resnet_utils.Block( - 'block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 2)]), - resnet_utils.Block( - 'block4', bottleneck, [(2048, 512, 1)] * 3) - ] - return resnet_v1(inputs, blocks, num_classes, is_training, - global_pool=global_pool, output_stride=output_stride, - include_root_block=True, reuse=reuse, scope=scope) -resnet_v1_101.default_image_size = resnet_v1.default_image_size - - -def resnet_v1_152(inputs, - num_classes=None, - is_training=True, - global_pool=True, - output_stride=None, - reuse=None, - scope='resnet_v1_152'): - """ResNet-152 model of [1]. See resnet_v1() for arg and return description.""" - blocks = [ - resnet_utils.Block( - 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), - resnet_utils.Block( - 'block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]), - resnet_utils.Block( - 'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]), - resnet_utils.Block( - 'block4', bottleneck, [(2048, 512, 1)] * 3)] - return resnet_v1(inputs, blocks, num_classes, is_training, - global_pool=global_pool, output_stride=output_stride, - include_root_block=True, reuse=reuse, scope=scope) -resnet_v1_152.default_image_size = resnet_v1.default_image_size - - -def resnet_v1_200(inputs, - num_classes=None, - is_training=True, - global_pool=True, - output_stride=None, - reuse=None, - scope='resnet_v1_200'): - """ResNet-200 model of [2]. See resnet_v1() for arg and return description.""" - blocks = [ - resnet_utils.Block( - 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), - resnet_utils.Block( - 'block2', bottleneck, [(512, 128, 1)] * 23 + [(512, 128, 2)]), - resnet_utils.Block( - 'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]), - resnet_utils.Block( - 'block4', bottleneck, [(2048, 512, 1)] * 3)] - return resnet_v1(inputs, blocks, num_classes, is_training, - global_pool=global_pool, output_stride=output_stride, - include_root_block=True, reuse=reuse, scope=scope) -resnet_v1_200.default_image_size = resnet_v1.default_image_size diff --git a/examples/slim/nets/resnet_v1_test.py b/examples/slim/nets/resnet_v1_test.py deleted file mode 100644 index 5c229a51..00000000 --- a/examples/slim/nets/resnet_v1_test.py +++ /dev/null @@ -1,450 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for slim.nets.resnet_v1.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow as tf - -from nets import resnet_utils -from nets import resnet_v1 - -slim = tf.contrib.slim - - -def create_test_input(batch_size, height, width, channels): - """Create test input tensor. - - Args: - batch_size: The number of images per batch or `None` if unknown. - height: The height of each image or `None` if unknown. - width: The width of each image or `None` if unknown. - channels: The number of channels per image or `None` if unknown. - - Returns: - Either a placeholder `Tensor` of dimension - [batch_size, height, width, channels] if any of the inputs are `None` or a - constant `Tensor` with the mesh grid values along the spatial dimensions. - """ - if None in [batch_size, height, width, channels]: - return tf.placeholder(tf.float32, (batch_size, height, width, channels)) - else: - return tf.to_float( - np.tile( - np.reshape( - np.reshape(np.arange(height), [height, 1]) + - np.reshape(np.arange(width), [1, width]), - [1, height, width, 1]), - [batch_size, 1, 1, channels])) - - -class ResnetUtilsTest(tf.test.TestCase): - - def testSubsampleThreeByThree(self): - x = tf.reshape(tf.to_float(tf.range(9)), [1, 3, 3, 1]) - x = resnet_utils.subsample(x, 2) - expected = tf.reshape(tf.constant([0, 2, 6, 8]), [1, 2, 2, 1]) - with self.test_session(): - self.assertAllClose(x.eval(), expected.eval()) - - def testSubsampleFourByFour(self): - x = tf.reshape(tf.to_float(tf.range(16)), [1, 4, 4, 1]) - x = resnet_utils.subsample(x, 2) - expected = tf.reshape(tf.constant([0, 2, 8, 10]), [1, 2, 2, 1]) - with self.test_session(): - self.assertAllClose(x.eval(), expected.eval()) - - def testConv2DSameEven(self): - n, n2 = 4, 2 - - # Input image. - x = create_test_input(1, n, n, 1) - - # Convolution kernel. - w = create_test_input(1, 3, 3, 1) - w = tf.reshape(w, [3, 3, 1, 1]) - - tf.get_variable('Conv/weights', initializer=w) - tf.get_variable('Conv/biases', initializer=tf.zeros([1])) - tf.get_variable_scope().reuse_variables() - - y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv') - y1_expected = tf.to_float([[14, 28, 43, 26], - [28, 48, 66, 37], - [43, 66, 84, 46], - [26, 37, 46, 22]]) - y1_expected = tf.reshape(y1_expected, [1, n, n, 1]) - - y2 = resnet_utils.subsample(y1, 2) - y2_expected = tf.to_float([[14, 43], - [43, 84]]) - y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1]) - - y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv') - y3_expected = y2_expected - - y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv') - y4_expected = tf.to_float([[48, 37], - [37, 22]]) - y4_expected = tf.reshape(y4_expected, [1, n2, n2, 1]) - - with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - self.assertAllClose(y1.eval(), y1_expected.eval()) - self.assertAllClose(y2.eval(), y2_expected.eval()) - self.assertAllClose(y3.eval(), y3_expected.eval()) - self.assertAllClose(y4.eval(), y4_expected.eval()) - - def testConv2DSameOdd(self): - n, n2 = 5, 3 - - # Input image. - x = create_test_input(1, n, n, 1) - - # Convolution kernel. - w = create_test_input(1, 3, 3, 1) - w = tf.reshape(w, [3, 3, 1, 1]) - - tf.get_variable('Conv/weights', initializer=w) - tf.get_variable('Conv/biases', initializer=tf.zeros([1])) - tf.get_variable_scope().reuse_variables() - - y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv') - y1_expected = tf.to_float([[14, 28, 43, 58, 34], - [28, 48, 66, 84, 46], - [43, 66, 84, 102, 55], - [58, 84, 102, 120, 64], - [34, 46, 55, 64, 30]]) - y1_expected = tf.reshape(y1_expected, [1, n, n, 1]) - - y2 = resnet_utils.subsample(y1, 2) - y2_expected = tf.to_float([[14, 43, 34], - [43, 84, 55], - [34, 55, 30]]) - y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1]) - - y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv') - y3_expected = y2_expected - - y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv') - y4_expected = y2_expected - - with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - self.assertAllClose(y1.eval(), y1_expected.eval()) - self.assertAllClose(y2.eval(), y2_expected.eval()) - self.assertAllClose(y3.eval(), y3_expected.eval()) - self.assertAllClose(y4.eval(), y4_expected.eval()) - - def _resnet_plain(self, inputs, blocks, output_stride=None, scope=None): - """A plain ResNet without extra layers before or after the ResNet blocks.""" - with tf.variable_scope(scope, values=[inputs]): - with slim.arg_scope([slim.conv2d], outputs_collections='end_points'): - net = resnet_utils.stack_blocks_dense(inputs, blocks, output_stride) - end_points = dict(tf.get_collection('end_points')) - return net, end_points - - def testEndPointsV1(self): - """Test the end points of a tiny v1 bottleneck network.""" - bottleneck = resnet_v1.bottleneck - blocks = [resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]), - resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 1)])] - inputs = create_test_input(2, 32, 16, 3) - with slim.arg_scope(resnet_utils.resnet_arg_scope()): - _, end_points = self._resnet_plain(inputs, blocks, scope='tiny') - expected = [ - 'tiny/block1/unit_1/bottleneck_v1/shortcut', - 'tiny/block1/unit_1/bottleneck_v1/conv1', - 'tiny/block1/unit_1/bottleneck_v1/conv2', - 'tiny/block1/unit_1/bottleneck_v1/conv3', - 'tiny/block1/unit_2/bottleneck_v1/conv1', - 'tiny/block1/unit_2/bottleneck_v1/conv2', - 'tiny/block1/unit_2/bottleneck_v1/conv3', - 'tiny/block2/unit_1/bottleneck_v1/shortcut', - 'tiny/block2/unit_1/bottleneck_v1/conv1', - 'tiny/block2/unit_1/bottleneck_v1/conv2', - 'tiny/block2/unit_1/bottleneck_v1/conv3', - 'tiny/block2/unit_2/bottleneck_v1/conv1', - 'tiny/block2/unit_2/bottleneck_v1/conv2', - 'tiny/block2/unit_2/bottleneck_v1/conv3'] - self.assertItemsEqual(expected, end_points) - - def _stack_blocks_nondense(self, net, blocks): - """A simplified ResNet Block stacker without output stride control.""" - for block in blocks: - with tf.variable_scope(block.scope, 'block', [net]): - for i, unit in enumerate(block.args): - depth, depth_bottleneck, stride = unit - with tf.variable_scope('unit_%d' % (i + 1), values=[net]): - net = block.unit_fn(net, - depth=depth, - depth_bottleneck=depth_bottleneck, - stride=stride, - rate=1) - return net - - def _atrousValues(self, bottleneck): - """Verify the values of dense feature extraction by atrous convolution. - - Make sure that dense feature extraction by stack_blocks_dense() followed by - subsampling gives identical results to feature extraction at the nominal - network output stride using the simple self._stack_blocks_nondense() above. - - Args: - bottleneck: The bottleneck function. - """ - blocks = [ - resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]), - resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 2)]), - resnet_utils.Block('block3', bottleneck, [(16, 4, 1), (16, 4, 2)]), - resnet_utils.Block('block4', bottleneck, [(32, 8, 1), (32, 8, 1)]) - ] - nominal_stride = 8 - - # Test both odd and even input dimensions. - height = 30 - width = 31 - with slim.arg_scope(resnet_utils.resnet_arg_scope()): - with slim.arg_scope([slim.batch_norm], is_training=False): - for output_stride in [1, 2, 4, 8, None]: - with tf.Graph().as_default(): - with self.test_session() as sess: - tf.set_random_seed(0) - inputs = create_test_input(1, height, width, 3) - # Dense feature extraction followed by subsampling. - output = resnet_utils.stack_blocks_dense(inputs, - blocks, - output_stride) - if output_stride is None: - factor = 1 - else: - factor = nominal_stride // output_stride - - output = resnet_utils.subsample(output, factor) - # Make the two networks use the same weights. - tf.get_variable_scope().reuse_variables() - # Feature extraction at the nominal network rate. - expected = self._stack_blocks_nondense(inputs, blocks) - sess.run(tf.global_variables_initializer()) - output, expected = sess.run([output, expected]) - self.assertAllClose(output, expected, atol=1e-4, rtol=1e-4) - - def testAtrousValuesBottleneck(self): - self._atrousValues(resnet_v1.bottleneck) - - -class ResnetCompleteNetworkTest(tf.test.TestCase): - """Tests with complete small ResNet v1 networks.""" - - def _resnet_small(self, - inputs, - num_classes=None, - is_training=True, - global_pool=True, - output_stride=None, - include_root_block=True, - reuse=None, - scope='resnet_v1_small'): - """A shallow and thin ResNet v1 for faster tests.""" - bottleneck = resnet_v1.bottleneck - blocks = [ - resnet_utils.Block( - 'block1', bottleneck, [(4, 1, 1)] * 2 + [(4, 1, 2)]), - resnet_utils.Block( - 'block2', bottleneck, [(8, 2, 1)] * 2 + [(8, 2, 2)]), - resnet_utils.Block( - 'block3', bottleneck, [(16, 4, 1)] * 2 + [(16, 4, 2)]), - resnet_utils.Block( - 'block4', bottleneck, [(32, 8, 1)] * 2)] - return resnet_v1.resnet_v1(inputs, blocks, num_classes, - is_training=is_training, - global_pool=global_pool, - output_stride=output_stride, - include_root_block=include_root_block, - reuse=reuse, - scope=scope) - - def testClassificationEndPoints(self): - global_pool = True - num_classes = 10 - inputs = create_test_input(2, 224, 224, 3) - with slim.arg_scope(resnet_utils.resnet_arg_scope()): - logits, end_points = self._resnet_small(inputs, num_classes, - global_pool=global_pool, - scope='resnet') - self.assertTrue(logits.op.name.startswith('resnet/logits')) - self.assertListEqual(logits.get_shape().as_list(), [2, 1, 1, num_classes]) - self.assertTrue('predictions' in end_points) - self.assertListEqual(end_points['predictions'].get_shape().as_list(), - [2, 1, 1, num_classes]) - - def testClassificationShapes(self): - global_pool = True - num_classes = 10 - inputs = create_test_input(2, 224, 224, 3) - with slim.arg_scope(resnet_utils.resnet_arg_scope()): - _, end_points = self._resnet_small(inputs, num_classes, - global_pool=global_pool, - scope='resnet') - endpoint_to_shape = { - 'resnet/block1': [2, 28, 28, 4], - 'resnet/block2': [2, 14, 14, 8], - 'resnet/block3': [2, 7, 7, 16], - 'resnet/block4': [2, 7, 7, 32]} - for endpoint in endpoint_to_shape: - shape = endpoint_to_shape[endpoint] - self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape) - - def testFullyConvolutionalEndpointShapes(self): - global_pool = False - num_classes = 10 - inputs = create_test_input(2, 321, 321, 3) - with slim.arg_scope(resnet_utils.resnet_arg_scope()): - _, end_points = self._resnet_small(inputs, num_classes, - global_pool=global_pool, - scope='resnet') - endpoint_to_shape = { - 'resnet/block1': [2, 41, 41, 4], - 'resnet/block2': [2, 21, 21, 8], - 'resnet/block3': [2, 11, 11, 16], - 'resnet/block4': [2, 11, 11, 32]} - for endpoint in endpoint_to_shape: - shape = endpoint_to_shape[endpoint] - self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape) - - def testRootlessFullyConvolutionalEndpointShapes(self): - global_pool = False - num_classes = 10 - inputs = create_test_input(2, 128, 128, 3) - with slim.arg_scope(resnet_utils.resnet_arg_scope()): - _, end_points = self._resnet_small(inputs, num_classes, - global_pool=global_pool, - include_root_block=False, - scope='resnet') - endpoint_to_shape = { - 'resnet/block1': [2, 64, 64, 4], - 'resnet/block2': [2, 32, 32, 8], - 'resnet/block3': [2, 16, 16, 16], - 'resnet/block4': [2, 16, 16, 32]} - for endpoint in endpoint_to_shape: - shape = endpoint_to_shape[endpoint] - self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape) - - def testAtrousFullyConvolutionalEndpointShapes(self): - global_pool = False - num_classes = 10 - output_stride = 8 - inputs = create_test_input(2, 321, 321, 3) - with slim.arg_scope(resnet_utils.resnet_arg_scope()): - _, end_points = self._resnet_small(inputs, - num_classes, - global_pool=global_pool, - output_stride=output_stride, - scope='resnet') - endpoint_to_shape = { - 'resnet/block1': [2, 41, 41, 4], - 'resnet/block2': [2, 41, 41, 8], - 'resnet/block3': [2, 41, 41, 16], - 'resnet/block4': [2, 41, 41, 32]} - for endpoint in endpoint_to_shape: - shape = endpoint_to_shape[endpoint] - self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape) - - def testAtrousFullyConvolutionalValues(self): - """Verify dense feature extraction with atrous convolution.""" - nominal_stride = 32 - for output_stride in [4, 8, 16, 32, None]: - with slim.arg_scope(resnet_utils.resnet_arg_scope()): - with tf.Graph().as_default(): - with self.test_session() as sess: - tf.set_random_seed(0) - inputs = create_test_input(2, 81, 81, 3) - # Dense feature extraction followed by subsampling. - output, _ = self._resnet_small(inputs, None, is_training=False, - global_pool=False, - output_stride=output_stride) - if output_stride is None: - factor = 1 - else: - factor = nominal_stride // output_stride - output = resnet_utils.subsample(output, factor) - # Make the two networks use the same weights. - tf.get_variable_scope().reuse_variables() - # Feature extraction at the nominal network rate. - expected, _ = self._resnet_small(inputs, None, is_training=False, - global_pool=False) - sess.run(tf.global_variables_initializer()) - self.assertAllClose(output.eval(), expected.eval(), - atol=1e-4, rtol=1e-4) - - def testUnknownBatchSize(self): - batch = 2 - height, width = 65, 65 - global_pool = True - num_classes = 10 - inputs = create_test_input(None, height, width, 3) - with slim.arg_scope(resnet_utils.resnet_arg_scope()): - logits, _ = self._resnet_small(inputs, num_classes, - global_pool=global_pool, - scope='resnet') - self.assertTrue(logits.op.name.startswith('resnet/logits')) - self.assertListEqual(logits.get_shape().as_list(), - [None, 1, 1, num_classes]) - images = create_test_input(batch, height, width, 3) - with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - output = sess.run(logits, {inputs: images.eval()}) - self.assertEqual(output.shape, (batch, 1, 1, num_classes)) - - def testFullyConvolutionalUnknownHeightWidth(self): - batch = 2 - height, width = 65, 65 - global_pool = False - inputs = create_test_input(batch, None, None, 3) - with slim.arg_scope(resnet_utils.resnet_arg_scope()): - output, _ = self._resnet_small(inputs, None, global_pool=global_pool) - self.assertListEqual(output.get_shape().as_list(), - [batch, None, None, 32]) - images = create_test_input(batch, height, width, 3) - with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - output = sess.run(output, {inputs: images.eval()}) - self.assertEqual(output.shape, (batch, 3, 3, 32)) - - def testAtrousFullyConvolutionalUnknownHeightWidth(self): - batch = 2 - height, width = 65, 65 - global_pool = False - output_stride = 8 - inputs = create_test_input(batch, None, None, 3) - with slim.arg_scope(resnet_utils.resnet_arg_scope()): - output, _ = self._resnet_small(inputs, - None, - global_pool=global_pool, - output_stride=output_stride) - self.assertListEqual(output.get_shape().as_list(), - [batch, None, None, 32]) - images = create_test_input(batch, height, width, 3) - with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - output = sess.run(output, {inputs: images.eval()}) - self.assertEqual(output.shape, (batch, 9, 9, 32)) - - -if __name__ == '__main__': - tf.test.main() diff --git a/examples/slim/nets/resnet_v2.py b/examples/slim/nets/resnet_v2.py deleted file mode 100644 index a05eb3e3..00000000 --- a/examples/slim/nets/resnet_v2.py +++ /dev/null @@ -1,311 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Contains definitions for the preactivation form of Residual Networks. - -Residual networks (ResNets) were originally proposed in: -[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun - Deep Residual Learning for Image Recognition. arXiv:1512.03385 - -The full preactivation 'v2' ResNet variant implemented in this module was -introduced by: -[2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun - Identity Mappings in Deep Residual Networks. arXiv: 1603.05027 - -The key difference of the full preactivation 'v2' variant compared to the -'v1' variant in [1] is the use of batch normalization before every weight layer. -Another difference is that 'v2' ResNets do not include an activation function in -the main pathway. Also see [2; Fig. 4e]. - -Typical use: - - from tensorflow.contrib.slim.nets import resnet_v2 - -ResNet-101 for image classification into 1000 classes: - - # inputs has shape [batch, 224, 224, 3] - with slim.arg_scope(resnet_v2.resnet_arg_scope()): - net, end_points = resnet_v2.resnet_v2_101(inputs, 1000, is_training=False) - -ResNet-101 for semantic segmentation into 21 classes: - - # inputs has shape [batch, 513, 513, 3] - with slim.arg_scope(resnet_v2.resnet_arg_scope(is_training)): - net, end_points = resnet_v2.resnet_v2_101(inputs, - 21, - is_training=False, - global_pool=False, - output_stride=16) -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from nets import resnet_utils - -slim = tf.contrib.slim -resnet_arg_scope = resnet_utils.resnet_arg_scope - - -@slim.add_arg_scope -def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1, - outputs_collections=None, scope=None): - """Bottleneck residual unit variant with BN before convolutions. - - This is the full preactivation residual unit variant proposed in [2]. See - Fig. 1(b) of [2] for its definition. Note that we use here the bottleneck - variant which has an extra bottleneck layer. - - When putting together two consecutive ResNet blocks that use this unit, one - should use stride = 2 in the last unit of the first block. - - Args: - inputs: A tensor of size [batch, height, width, channels]. - depth: The depth of the ResNet unit output. - depth_bottleneck: The depth of the bottleneck layers. - stride: The ResNet unit's stride. Determines the amount of downsampling of - the units output compared to its input. - rate: An integer, rate for atrous convolution. - outputs_collections: Collection to add the ResNet unit output. - scope: Optional variable_scope. - - Returns: - The ResNet unit's output. - """ - with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc: - depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4) - preact = slim.batch_norm(inputs, activation_fn=tf.nn.relu, scope='preact') - if depth == depth_in: - shortcut = resnet_utils.subsample(inputs, stride, 'shortcut') - else: - shortcut = slim.conv2d(preact, depth, [1, 1], stride=stride, - normalizer_fn=None, activation_fn=None, - scope='shortcut') - - residual = slim.conv2d(preact, depth_bottleneck, [1, 1], stride=1, - scope='conv1') - residual = resnet_utils.conv2d_same(residual, depth_bottleneck, 3, stride, - rate=rate, scope='conv2') - residual = slim.conv2d(residual, depth, [1, 1], stride=1, - normalizer_fn=None, activation_fn=None, - scope='conv3') - - output = shortcut + residual - - return slim.utils.collect_named_outputs(outputs_collections, - sc.original_name_scope, - output) - - -def resnet_v2(inputs, - blocks, - num_classes=None, - is_training=True, - global_pool=True, - output_stride=None, - include_root_block=True, - spatial_squeeze=True, - reuse=None, - scope=None): - """Generator for v2 (preactivation) ResNet models. - - This function generates a family of ResNet v2 models. See the resnet_v2_*() - methods for specific model instantiations, obtained by selecting different - block instantiations that produce ResNets of various depths. - - Training for image classification on Imagenet is usually done with [224, 224] - inputs, resulting in [7, 7] feature maps at the output of the last ResNet - block for the ResNets defined in [1] that have nominal stride equal to 32. - However, for dense prediction tasks we advise that one uses inputs with - spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In - this case the feature maps at the ResNet output will have spatial shape - [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1] - and corners exactly aligned with the input image corners, which greatly - facilitates alignment of the features to the image. Using as input [225, 225] - images results in [8, 8] feature maps at the output of the last ResNet block. - - For dense prediction tasks, the ResNet needs to run in fully-convolutional - (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all - have nominal stride equal to 32 and a good choice in FCN mode is to use - output_stride=16 in order to increase the density of the computed features at - small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915. - - Args: - inputs: A tensor of size [batch, height_in, width_in, channels]. - blocks: A list of length equal to the number of ResNet blocks. Each element - is a resnet_utils.Block object describing the units in the block. - num_classes: Number of predicted classes for classification tasks. If None - we return the features before the logit layer. - is_training: whether is training or not. - global_pool: If True, we perform global average pooling before computing the - logits. Set to True for image classification, False for dense prediction. - output_stride: If None, then the output will be computed at the nominal - network stride. If output_stride is not None, it specifies the requested - ratio of input to output spatial resolution. - include_root_block: If True, include the initial convolution followed by - max-pooling, if False excludes it. If excluded, `inputs` should be the - results of an activation-less convolution. - spatial_squeeze: if True, logits is of shape [B, C], if false logits is - of shape [B, 1, 1, C], where B is batch_size and C is number of classes. - reuse: whether or not the network and its variables should be reused. To be - able to reuse 'scope' must be given. - scope: Optional variable_scope. - - - Returns: - net: A rank-4 tensor of size [batch, height_out, width_out, channels_out]. - If global_pool is False, then height_out and width_out are reduced by a - factor of output_stride compared to the respective height_in and width_in, - else both height_out and width_out equal one. If num_classes is None, then - net is the output of the last ResNet block, potentially after global - average pooling. If num_classes is not None, net contains the pre-softmax - activations. - end_points: A dictionary from components of the network to the corresponding - activation. - - Raises: - ValueError: If the target output_stride is not valid. - """ - with tf.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc: - end_points_collection = sc.name + '_end_points' - with slim.arg_scope([slim.conv2d, bottleneck, - resnet_utils.stack_blocks_dense], - outputs_collections=end_points_collection): - with slim.arg_scope([slim.batch_norm], is_training=is_training): - net = inputs - if include_root_block: - if output_stride is not None: - if output_stride % 4 != 0: - raise ValueError('The output_stride needs to be a multiple of 4.') - output_stride /= 4 - # We do not include batch normalization or activation functions in - # conv1 because the first ResNet unit will perform these. Cf. - # Appendix of [2]. - with slim.arg_scope([slim.conv2d], - activation_fn=None, normalizer_fn=None): - net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1') - net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1') - net = resnet_utils.stack_blocks_dense(net, blocks, output_stride) - # This is needed because the pre-activation variant does not have batch - # normalization or activation functions in the residual unit output. See - # Appendix of [2]. - net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm') - if global_pool: - # Global average pooling. - net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True) - if num_classes is not None: - net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, - normalizer_fn=None, scope='logits') - if spatial_squeeze: - logits = tf.squeeze(net, [1, 2], name='SpatialSqueeze') - # Convert end_points_collection into a dictionary of end_points. - end_points = slim.utils.convert_collection_to_dict(end_points_collection) - if num_classes is not None: - end_points['predictions'] = slim.softmax(logits, scope='predictions') - return logits, end_points -resnet_v2.default_image_size = 224 - - -def resnet_v2_50(inputs, - num_classes=None, - is_training=True, - global_pool=True, - output_stride=None, - reuse=None, - scope='resnet_v2_50'): - """ResNet-50 model of [1]. See resnet_v2() for arg and return description.""" - blocks = [ - resnet_utils.Block( - 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), - resnet_utils.Block( - 'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), - resnet_utils.Block( - 'block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 2)]), - resnet_utils.Block( - 'block4', bottleneck, [(2048, 512, 1)] * 3)] - return resnet_v2(inputs, blocks, num_classes, is_training=is_training, - global_pool=global_pool, output_stride=output_stride, - include_root_block=True, reuse=reuse, scope=scope) -resnet_v2_50.default_image_size = resnet_v2.default_image_size - - -def resnet_v2_101(inputs, - num_classes=None, - is_training=True, - global_pool=True, - output_stride=None, - reuse=None, - scope='resnet_v2_101'): - """ResNet-101 model of [1]. See resnet_v2() for arg and return description.""" - blocks = [ - resnet_utils.Block( - 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), - resnet_utils.Block( - 'block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), - resnet_utils.Block( - 'block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 2)]), - resnet_utils.Block( - 'block4', bottleneck, [(2048, 512, 1)] * 3)] - return resnet_v2(inputs, blocks, num_classes, is_training=is_training, - global_pool=global_pool, output_stride=output_stride, - include_root_block=True, reuse=reuse, scope=scope) -resnet_v2_101.default_image_size = resnet_v2.default_image_size - - -def resnet_v2_152(inputs, - num_classes=None, - is_training=True, - global_pool=True, - output_stride=None, - reuse=None, - scope='resnet_v2_152'): - """ResNet-152 model of [1]. See resnet_v2() for arg and return description.""" - blocks = [ - resnet_utils.Block( - 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), - resnet_utils.Block( - 'block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]), - resnet_utils.Block( - 'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]), - resnet_utils.Block( - 'block4', bottleneck, [(2048, 512, 1)] * 3)] - return resnet_v2(inputs, blocks, num_classes, is_training=is_training, - global_pool=global_pool, output_stride=output_stride, - include_root_block=True, reuse=reuse, scope=scope) -resnet_v2_152.default_image_size = resnet_v2.default_image_size - - -def resnet_v2_200(inputs, - num_classes=None, - is_training=True, - global_pool=True, - output_stride=None, - reuse=None, - scope='resnet_v2_200'): - """ResNet-200 model of [2]. See resnet_v2() for arg and return description.""" - blocks = [ - resnet_utils.Block( - 'block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), - resnet_utils.Block( - 'block2', bottleneck, [(512, 128, 1)] * 23 + [(512, 128, 2)]), - resnet_utils.Block( - 'block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]), - resnet_utils.Block( - 'block4', bottleneck, [(2048, 512, 1)] * 3)] - return resnet_v2(inputs, blocks, num_classes, is_training=is_training, - global_pool=global_pool, output_stride=output_stride, - include_root_block=True, reuse=reuse, scope=scope) -resnet_v2_200.default_image_size = resnet_v2.default_image_size diff --git a/examples/slim/nets/resnet_v2_test.py b/examples/slim/nets/resnet_v2_test.py deleted file mode 100644 index 141937d1..00000000 --- a/examples/slim/nets/resnet_v2_test.py +++ /dev/null @@ -1,453 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for slim.nets.resnet_v2.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow as tf - -from nets import resnet_utils -from nets import resnet_v2 - -slim = tf.contrib.slim - - -def create_test_input(batch_size, height, width, channels): - """Create test input tensor. - - Args: - batch_size: The number of images per batch or `None` if unknown. - height: The height of each image or `None` if unknown. - width: The width of each image or `None` if unknown. - channels: The number of channels per image or `None` if unknown. - - Returns: - Either a placeholder `Tensor` of dimension - [batch_size, height, width, channels] if any of the inputs are `None` or a - constant `Tensor` with the mesh grid values along the spatial dimensions. - """ - if None in [batch_size, height, width, channels]: - return tf.placeholder(tf.float32, (batch_size, height, width, channels)) - else: - return tf.to_float( - np.tile( - np.reshape( - np.reshape(np.arange(height), [height, 1]) + - np.reshape(np.arange(width), [1, width]), - [1, height, width, 1]), - [batch_size, 1, 1, channels])) - - -class ResnetUtilsTest(tf.test.TestCase): - - def testSubsampleThreeByThree(self): - x = tf.reshape(tf.to_float(tf.range(9)), [1, 3, 3, 1]) - x = resnet_utils.subsample(x, 2) - expected = tf.reshape(tf.constant([0, 2, 6, 8]), [1, 2, 2, 1]) - with self.test_session(): - self.assertAllClose(x.eval(), expected.eval()) - - def testSubsampleFourByFour(self): - x = tf.reshape(tf.to_float(tf.range(16)), [1, 4, 4, 1]) - x = resnet_utils.subsample(x, 2) - expected = tf.reshape(tf.constant([0, 2, 8, 10]), [1, 2, 2, 1]) - with self.test_session(): - self.assertAllClose(x.eval(), expected.eval()) - - def testConv2DSameEven(self): - n, n2 = 4, 2 - - # Input image. - x = create_test_input(1, n, n, 1) - - # Convolution kernel. - w = create_test_input(1, 3, 3, 1) - w = tf.reshape(w, [3, 3, 1, 1]) - - tf.get_variable('Conv/weights', initializer=w) - tf.get_variable('Conv/biases', initializer=tf.zeros([1])) - tf.get_variable_scope().reuse_variables() - - y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv') - y1_expected = tf.to_float([[14, 28, 43, 26], - [28, 48, 66, 37], - [43, 66, 84, 46], - [26, 37, 46, 22]]) - y1_expected = tf.reshape(y1_expected, [1, n, n, 1]) - - y2 = resnet_utils.subsample(y1, 2) - y2_expected = tf.to_float([[14, 43], - [43, 84]]) - y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1]) - - y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv') - y3_expected = y2_expected - - y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv') - y4_expected = tf.to_float([[48, 37], - [37, 22]]) - y4_expected = tf.reshape(y4_expected, [1, n2, n2, 1]) - - with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - self.assertAllClose(y1.eval(), y1_expected.eval()) - self.assertAllClose(y2.eval(), y2_expected.eval()) - self.assertAllClose(y3.eval(), y3_expected.eval()) - self.assertAllClose(y4.eval(), y4_expected.eval()) - - def testConv2DSameOdd(self): - n, n2 = 5, 3 - - # Input image. - x = create_test_input(1, n, n, 1) - - # Convolution kernel. - w = create_test_input(1, 3, 3, 1) - w = tf.reshape(w, [3, 3, 1, 1]) - - tf.get_variable('Conv/weights', initializer=w) - tf.get_variable('Conv/biases', initializer=tf.zeros([1])) - tf.get_variable_scope().reuse_variables() - - y1 = slim.conv2d(x, 1, [3, 3], stride=1, scope='Conv') - y1_expected = tf.to_float([[14, 28, 43, 58, 34], - [28, 48, 66, 84, 46], - [43, 66, 84, 102, 55], - [58, 84, 102, 120, 64], - [34, 46, 55, 64, 30]]) - y1_expected = tf.reshape(y1_expected, [1, n, n, 1]) - - y2 = resnet_utils.subsample(y1, 2) - y2_expected = tf.to_float([[14, 43, 34], - [43, 84, 55], - [34, 55, 30]]) - y2_expected = tf.reshape(y2_expected, [1, n2, n2, 1]) - - y3 = resnet_utils.conv2d_same(x, 1, 3, stride=2, scope='Conv') - y3_expected = y2_expected - - y4 = slim.conv2d(x, 1, [3, 3], stride=2, scope='Conv') - y4_expected = y2_expected - - with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - self.assertAllClose(y1.eval(), y1_expected.eval()) - self.assertAllClose(y2.eval(), y2_expected.eval()) - self.assertAllClose(y3.eval(), y3_expected.eval()) - self.assertAllClose(y4.eval(), y4_expected.eval()) - - def _resnet_plain(self, inputs, blocks, output_stride=None, scope=None): - """A plain ResNet without extra layers before or after the ResNet blocks.""" - with tf.variable_scope(scope, values=[inputs]): - with slim.arg_scope([slim.conv2d], outputs_collections='end_points'): - net = resnet_utils.stack_blocks_dense(inputs, blocks, output_stride) - end_points = dict(tf.get_collection('end_points')) - return net, end_points - - def testEndPointsV2(self): - """Test the end points of a tiny v2 bottleneck network.""" - bottleneck = resnet_v2.bottleneck - blocks = [resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]), - resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 1)])] - inputs = create_test_input(2, 32, 16, 3) - with slim.arg_scope(resnet_utils.resnet_arg_scope()): - _, end_points = self._resnet_plain(inputs, blocks, scope='tiny') - expected = [ - 'tiny/block1/unit_1/bottleneck_v2/shortcut', - 'tiny/block1/unit_1/bottleneck_v2/conv1', - 'tiny/block1/unit_1/bottleneck_v2/conv2', - 'tiny/block1/unit_1/bottleneck_v2/conv3', - 'tiny/block1/unit_2/bottleneck_v2/conv1', - 'tiny/block1/unit_2/bottleneck_v2/conv2', - 'tiny/block1/unit_2/bottleneck_v2/conv3', - 'tiny/block2/unit_1/bottleneck_v2/shortcut', - 'tiny/block2/unit_1/bottleneck_v2/conv1', - 'tiny/block2/unit_1/bottleneck_v2/conv2', - 'tiny/block2/unit_1/bottleneck_v2/conv3', - 'tiny/block2/unit_2/bottleneck_v2/conv1', - 'tiny/block2/unit_2/bottleneck_v2/conv2', - 'tiny/block2/unit_2/bottleneck_v2/conv3'] - self.assertItemsEqual(expected, end_points) - - def _stack_blocks_nondense(self, net, blocks): - """A simplified ResNet Block stacker without output stride control.""" - for block in blocks: - with tf.variable_scope(block.scope, 'block', [net]): - for i, unit in enumerate(block.args): - depth, depth_bottleneck, stride = unit - with tf.variable_scope('unit_%d' % (i + 1), values=[net]): - net = block.unit_fn(net, - depth=depth, - depth_bottleneck=depth_bottleneck, - stride=stride, - rate=1) - return net - - def _atrousValues(self, bottleneck): - """Verify the values of dense feature extraction by atrous convolution. - - Make sure that dense feature extraction by stack_blocks_dense() followed by - subsampling gives identical results to feature extraction at the nominal - network output stride using the simple self._stack_blocks_nondense() above. - - Args: - bottleneck: The bottleneck function. - """ - blocks = [ - resnet_utils.Block('block1', bottleneck, [(4, 1, 1), (4, 1, 2)]), - resnet_utils.Block('block2', bottleneck, [(8, 2, 1), (8, 2, 2)]), - resnet_utils.Block('block3', bottleneck, [(16, 4, 1), (16, 4, 2)]), - resnet_utils.Block('block4', bottleneck, [(32, 8, 1), (32, 8, 1)]) - ] - nominal_stride = 8 - - # Test both odd and even input dimensions. - height = 30 - width = 31 - with slim.arg_scope(resnet_utils.resnet_arg_scope()): - with slim.arg_scope([slim.batch_norm], is_training=False): - for output_stride in [1, 2, 4, 8, None]: - with tf.Graph().as_default(): - with self.test_session() as sess: - tf.set_random_seed(0) - inputs = create_test_input(1, height, width, 3) - # Dense feature extraction followed by subsampling. - output = resnet_utils.stack_blocks_dense(inputs, - blocks, - output_stride) - if output_stride is None: - factor = 1 - else: - factor = nominal_stride // output_stride - - output = resnet_utils.subsample(output, factor) - # Make the two networks use the same weights. - tf.get_variable_scope().reuse_variables() - # Feature extraction at the nominal network rate. - expected = self._stack_blocks_nondense(inputs, blocks) - sess.run(tf.global_variables_initializer()) - output, expected = sess.run([output, expected]) - self.assertAllClose(output, expected, atol=1e-4, rtol=1e-4) - - def testAtrousValuesBottleneck(self): - self._atrousValues(resnet_v2.bottleneck) - - -class ResnetCompleteNetworkTest(tf.test.TestCase): - """Tests with complete small ResNet v2 networks.""" - - def _resnet_small(self, - inputs, - num_classes=None, - is_training=True, - global_pool=True, - output_stride=None, - include_root_block=True, - reuse=None, - scope='resnet_v2_small'): - """A shallow and thin ResNet v2 for faster tests.""" - bottleneck = resnet_v2.bottleneck - blocks = [ - resnet_utils.Block( - 'block1', bottleneck, [(4, 1, 1)] * 2 + [(4, 1, 2)]), - resnet_utils.Block( - 'block2', bottleneck, [(8, 2, 1)] * 2 + [(8, 2, 2)]), - resnet_utils.Block( - 'block3', bottleneck, [(16, 4, 1)] * 2 + [(16, 4, 2)]), - resnet_utils.Block( - 'block4', bottleneck, [(32, 8, 1)] * 2)] - return resnet_v2.resnet_v2(inputs, blocks, num_classes, - is_training=is_training, - global_pool=global_pool, - output_stride=output_stride, - include_root_block=include_root_block, - reuse=reuse, - scope=scope) - - def testClassificationEndPoints(self): - global_pool = True - num_classes = 10 - inputs = create_test_input(2, 224, 224, 3) - with slim.arg_scope(resnet_utils.resnet_arg_scope()): - logits, end_points = self._resnet_small(inputs, num_classes, - global_pool=global_pool, - scope='resnet') - self.assertTrue(logits.op.name.startswith('resnet/logits')) - self.assertListEqual(logits.get_shape().as_list(), [2, 1, 1, num_classes]) - self.assertTrue('predictions' in end_points) - self.assertListEqual(end_points['predictions'].get_shape().as_list(), - [2, 1, 1, num_classes]) - - def testClassificationShapes(self): - global_pool = True - num_classes = 10 - inputs = create_test_input(2, 224, 224, 3) - with slim.arg_scope(resnet_utils.resnet_arg_scope()): - _, end_points = self._resnet_small(inputs, num_classes, - global_pool=global_pool, - scope='resnet') - endpoint_to_shape = { - 'resnet/block1': [2, 28, 28, 4], - 'resnet/block2': [2, 14, 14, 8], - 'resnet/block3': [2, 7, 7, 16], - 'resnet/block4': [2, 7, 7, 32]} - for endpoint in endpoint_to_shape: - shape = endpoint_to_shape[endpoint] - self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape) - - def testFullyConvolutionalEndpointShapes(self): - global_pool = False - num_classes = 10 - inputs = create_test_input(2, 321, 321, 3) - with slim.arg_scope(resnet_utils.resnet_arg_scope()): - _, end_points = self._resnet_small(inputs, num_classes, - global_pool=global_pool, - scope='resnet') - endpoint_to_shape = { - 'resnet/block1': [2, 41, 41, 4], - 'resnet/block2': [2, 21, 21, 8], - 'resnet/block3': [2, 11, 11, 16], - 'resnet/block4': [2, 11, 11, 32]} - for endpoint in endpoint_to_shape: - shape = endpoint_to_shape[endpoint] - self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape) - - def testRootlessFullyConvolutionalEndpointShapes(self): - global_pool = False - num_classes = 10 - inputs = create_test_input(2, 128, 128, 3) - with slim.arg_scope(resnet_utils.resnet_arg_scope()): - _, end_points = self._resnet_small(inputs, num_classes, - global_pool=global_pool, - include_root_block=False, - scope='resnet') - endpoint_to_shape = { - 'resnet/block1': [2, 64, 64, 4], - 'resnet/block2': [2, 32, 32, 8], - 'resnet/block3': [2, 16, 16, 16], - 'resnet/block4': [2, 16, 16, 32]} - for endpoint in endpoint_to_shape: - shape = endpoint_to_shape[endpoint] - self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape) - - def testAtrousFullyConvolutionalEndpointShapes(self): - global_pool = False - num_classes = 10 - output_stride = 8 - inputs = create_test_input(2, 321, 321, 3) - with slim.arg_scope(resnet_utils.resnet_arg_scope()): - _, end_points = self._resnet_small(inputs, - num_classes, - global_pool=global_pool, - output_stride=output_stride, - scope='resnet') - endpoint_to_shape = { - 'resnet/block1': [2, 41, 41, 4], - 'resnet/block2': [2, 41, 41, 8], - 'resnet/block3': [2, 41, 41, 16], - 'resnet/block4': [2, 41, 41, 32]} - for endpoint in endpoint_to_shape: - shape = endpoint_to_shape[endpoint] - self.assertListEqual(end_points[endpoint].get_shape().as_list(), shape) - - def testAtrousFullyConvolutionalValues(self): - """Verify dense feature extraction with atrous convolution.""" - nominal_stride = 32 - for output_stride in [4, 8, 16, 32, None]: - with slim.arg_scope(resnet_utils.resnet_arg_scope()): - with tf.Graph().as_default(): - with self.test_session() as sess: - tf.set_random_seed(0) - inputs = create_test_input(2, 81, 81, 3) - # Dense feature extraction followed by subsampling. - output, _ = self._resnet_small(inputs, None, - is_training=False, - global_pool=False, - output_stride=output_stride) - if output_stride is None: - factor = 1 - else: - factor = nominal_stride // output_stride - output = resnet_utils.subsample(output, factor) - # Make the two networks use the same weights. - tf.get_variable_scope().reuse_variables() - # Feature extraction at the nominal network rate. - expected, _ = self._resnet_small(inputs, None, - is_training=False, - global_pool=False) - sess.run(tf.global_variables_initializer()) - self.assertAllClose(output.eval(), expected.eval(), - atol=1e-4, rtol=1e-4) - - def testUnknownBatchSize(self): - batch = 2 - height, width = 65, 65 - global_pool = True - num_classes = 10 - inputs = create_test_input(None, height, width, 3) - with slim.arg_scope(resnet_utils.resnet_arg_scope()): - logits, _ = self._resnet_small(inputs, num_classes, - global_pool=global_pool, - scope='resnet') - self.assertTrue(logits.op.name.startswith('resnet/logits')) - self.assertListEqual(logits.get_shape().as_list(), - [None, 1, 1, num_classes]) - images = create_test_input(batch, height, width, 3) - with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - output = sess.run(logits, {inputs: images.eval()}) - self.assertEqual(output.shape, (batch, 1, 1, num_classes)) - - def testFullyConvolutionalUnknownHeightWidth(self): - batch = 2 - height, width = 65, 65 - global_pool = False - inputs = create_test_input(batch, None, None, 3) - with slim.arg_scope(resnet_utils.resnet_arg_scope()): - output, _ = self._resnet_small(inputs, None, - global_pool=global_pool) - self.assertListEqual(output.get_shape().as_list(), - [batch, None, None, 32]) - images = create_test_input(batch, height, width, 3) - with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - output = sess.run(output, {inputs: images.eval()}) - self.assertEqual(output.shape, (batch, 3, 3, 32)) - - def testAtrousFullyConvolutionalUnknownHeightWidth(self): - batch = 2 - height, width = 65, 65 - global_pool = False - output_stride = 8 - inputs = create_test_input(batch, None, None, 3) - with slim.arg_scope(resnet_utils.resnet_arg_scope()): - output, _ = self._resnet_small(inputs, - None, - global_pool=global_pool, - output_stride=output_stride) - self.assertListEqual(output.get_shape().as_list(), - [batch, None, None, 32]) - images = create_test_input(batch, height, width, 3) - with self.test_session() as sess: - sess.run(tf.global_variables_initializer()) - output = sess.run(output, {inputs: images.eval()}) - self.assertEqual(output.shape, (batch, 9, 9, 32)) - - -if __name__ == '__main__': - tf.test.main() diff --git a/examples/slim/nets/vgg.py b/examples/slim/nets/vgg.py deleted file mode 100644 index 79680702..00000000 --- a/examples/slim/nets/vgg.py +++ /dev/null @@ -1,262 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Contains model definitions for versions of the Oxford VGG network. - -These model definitions were introduced in the following technical report: - - Very Deep Convolutional Networks For Large-Scale Image Recognition - Karen Simonyan and Andrew Zisserman - arXiv technical report, 2015 - PDF: http://arxiv.org/pdf/1409.1556.pdf - ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf - CC-BY-4.0 - -More information can be obtained from the VGG website: -www.robots.ox.ac.uk/~vgg/research/very_deep/ - -Usage: - with slim.arg_scope(vgg.vgg_arg_scope()): - outputs, end_points = vgg.vgg_a(inputs) - - with slim.arg_scope(vgg.vgg_arg_scope()): - outputs, end_points = vgg.vgg_16(inputs) - -@@vgg_a -@@vgg_16 -@@vgg_19 -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -slim = tf.contrib.slim - - -def vgg_arg_scope(weight_decay=0.0005): - """Defines the VGG arg scope. - - Args: - weight_decay: The l2 regularization coefficient. - - Returns: - An arg_scope. - """ - with slim.arg_scope([slim.conv2d, slim.fully_connected], - activation_fn=tf.nn.relu, - weights_regularizer=slim.l2_regularizer(weight_decay), - biases_initializer=tf.zeros_initializer()): - with slim.arg_scope([slim.conv2d], padding='SAME') as arg_sc: - return arg_sc - - -def vgg_a(inputs, - num_classes=1000, - is_training=True, - dropout_keep_prob=0.5, - spatial_squeeze=True, - scope='vgg_a', - fc_conv_padding='VALID'): - """Oxford Net VGG 11-Layers version A Example. - - Note: All the fully_connected layers have been transformed to conv2d layers. - To use in classification mode, resize input to 224x224. - - Args: - inputs: a tensor of size [batch_size, height, width, channels]. - num_classes: number of predicted classes. - is_training: whether or not the model is being trained. - dropout_keep_prob: the probability that activations are kept in the dropout - layers during training. - spatial_squeeze: whether or not should squeeze the spatial dimensions of the - outputs. Useful to remove unnecessary dimensions for classification. - scope: Optional scope for the variables. - fc_conv_padding: the type of padding to use for the fully connected layer - that is implemented as a convolutional layer. Use 'SAME' padding if you - are applying the network in a fully convolutional manner and want to - get a prediction map downsampled by a factor of 32 as an output. Otherwise, - the output prediction map will be (input / 32) - 6 in case of 'VALID' padding. - - Returns: - the last op containing the log predictions and end_points dict. - """ - with tf.variable_scope(scope, 'vgg_a', [inputs]) as sc: - end_points_collection = sc.name + '_end_points' - # Collect outputs for conv2d, fully_connected and max_pool2d. - with slim.arg_scope([slim.conv2d, slim.max_pool2d], - outputs_collections=end_points_collection): - net = slim.repeat(inputs, 1, slim.conv2d, 64, [3, 3], scope='conv1') - net = slim.max_pool2d(net, [2, 2], scope='pool1') - net = slim.repeat(net, 1, slim.conv2d, 128, [3, 3], scope='conv2') - net = slim.max_pool2d(net, [2, 2], scope='pool2') - net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv3') - net = slim.max_pool2d(net, [2, 2], scope='pool3') - net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv4') - net = slim.max_pool2d(net, [2, 2], scope='pool4') - net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv5') - net = slim.max_pool2d(net, [2, 2], scope='pool5') - # Use conv2d instead of fully_connected layers. - net = slim.conv2d(net, 4096, [7, 7], padding=fc_conv_padding, scope='fc6') - net = slim.dropout(net, dropout_keep_prob, is_training=is_training, - scope='dropout6') - net = slim.conv2d(net, 4096, [1, 1], scope='fc7') - net = slim.dropout(net, dropout_keep_prob, is_training=is_training, - scope='dropout7') - net = slim.conv2d(net, num_classes, [1, 1], - activation_fn=None, - normalizer_fn=None, - scope='fc8') - # Convert end_points_collection into a end_point dict. - end_points = slim.utils.convert_collection_to_dict(end_points_collection) - if spatial_squeeze: - net = tf.squeeze(net, [1, 2], name='fc8/squeezed') - end_points[sc.name + '/fc8'] = net - return net, end_points -vgg_a.default_image_size = 224 - - -def vgg_16(inputs, - num_classes=1000, - is_training=True, - dropout_keep_prob=0.5, - spatial_squeeze=True, - scope='vgg_16', - fc_conv_padding='VALID'): - """Oxford Net VGG 16-Layers version D Example. - - Note: All the fully_connected layers have been transformed to conv2d layers. - To use in classification mode, resize input to 224x224. - - Args: - inputs: a tensor of size [batch_size, height, width, channels]. - num_classes: number of predicted classes. - is_training: whether or not the model is being trained. - dropout_keep_prob: the probability that activations are kept in the dropout - layers during training. - spatial_squeeze: whether or not should squeeze the spatial dimensions of the - outputs. Useful to remove unnecessary dimensions for classification. - scope: Optional scope for the variables. - fc_conv_padding: the type of padding to use for the fully connected layer - that is implemented as a convolutional layer. Use 'SAME' padding if you - are applying the network in a fully convolutional manner and want to - get a prediction map downsampled by a factor of 32 as an output. Otherwise, - the output prediction map will be (input / 32) - 6 in case of 'VALID' padding. - - Returns: - the last op containing the log predictions and end_points dict. - """ - with tf.variable_scope(scope, 'vgg_16', [inputs]) as sc: - end_points_collection = sc.name + '_end_points' - # Collect outputs for conv2d, fully_connected and max_pool2d. - with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], - outputs_collections=end_points_collection): - net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') - net = slim.max_pool2d(net, [2, 2], scope='pool1') - net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') - net = slim.max_pool2d(net, [2, 2], scope='pool2') - net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') - net = slim.max_pool2d(net, [2, 2], scope='pool3') - net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') - net = slim.max_pool2d(net, [2, 2], scope='pool4') - net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') - net = slim.max_pool2d(net, [2, 2], scope='pool5') - # Use conv2d instead of fully_connected layers. - net = slim.conv2d(net, 4096, [7, 7], padding=fc_conv_padding, scope='fc6') - net = slim.dropout(net, dropout_keep_prob, is_training=is_training, - scope='dropout6') - net = slim.conv2d(net, 4096, [1, 1], scope='fc7') - net = slim.dropout(net, dropout_keep_prob, is_training=is_training, - scope='dropout7') - net = slim.conv2d(net, num_classes, [1, 1], - activation_fn=None, - normalizer_fn=None, - scope='fc8') - # Convert end_points_collection into a end_point dict. - end_points = slim.utils.convert_collection_to_dict(end_points_collection) - if spatial_squeeze: - net = tf.squeeze(net, [1, 2], name='fc8/squeezed') - end_points[sc.name + '/fc8'] = net - return net, end_points -vgg_16.default_image_size = 224 - - -def vgg_19(inputs, - num_classes=1000, - is_training=True, - dropout_keep_prob=0.5, - spatial_squeeze=True, - scope='vgg_19', - fc_conv_padding='VALID'): - """Oxford Net VGG 19-Layers version E Example. - - Note: All the fully_connected layers have been transformed to conv2d layers. - To use in classification mode, resize input to 224x224. - - Args: - inputs: a tensor of size [batch_size, height, width, channels]. - num_classes: number of predicted classes. - is_training: whether or not the model is being trained. - dropout_keep_prob: the probability that activations are kept in the dropout - layers during training. - spatial_squeeze: whether or not should squeeze the spatial dimensions of the - outputs. Useful to remove unnecessary dimensions for classification. - scope: Optional scope for the variables. - fc_conv_padding: the type of padding to use for the fully connected layer - that is implemented as a convolutional layer. Use 'SAME' padding if you - are applying the network in a fully convolutional manner and want to - get a prediction map downsampled by a factor of 32 as an output. Otherwise, - the output prediction map will be (input / 32) - 6 in case of 'VALID' padding. - - Returns: - the last op containing the log predictions and end_points dict. - """ - with tf.variable_scope(scope, 'vgg_19', [inputs]) as sc: - end_points_collection = sc.name + '_end_points' - # Collect outputs for conv2d, fully_connected and max_pool2d. - with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d], - outputs_collections=end_points_collection): - net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') - net = slim.max_pool2d(net, [2, 2], scope='pool1') - net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') - net = slim.max_pool2d(net, [2, 2], scope='pool2') - net = slim.repeat(net, 4, slim.conv2d, 256, [3, 3], scope='conv3') - net = slim.max_pool2d(net, [2, 2], scope='pool3') - net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv4') - net = slim.max_pool2d(net, [2, 2], scope='pool4') - net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv5') - net = slim.max_pool2d(net, [2, 2], scope='pool5') - # Use conv2d instead of fully_connected layers. - net = slim.conv2d(net, 4096, [7, 7], padding=fc_conv_padding, scope='fc6') - net = slim.dropout(net, dropout_keep_prob, is_training=is_training, - scope='dropout6') - net = slim.conv2d(net, 4096, [1, 1], scope='fc7') - net = slim.dropout(net, dropout_keep_prob, is_training=is_training, - scope='dropout7') - net = slim.conv2d(net, num_classes, [1, 1], - activation_fn=None, - normalizer_fn=None, - scope='fc8') - # Convert end_points_collection into a end_point dict. - end_points = slim.utils.convert_collection_to_dict(end_points_collection) - if spatial_squeeze: - net = tf.squeeze(net, [1, 2], name='fc8/squeezed') - end_points[sc.name + '/fc8'] = net - return net, end_points -vgg_19.default_image_size = 224 - -# Alias -vgg_d = vgg_16 -vgg_e = vgg_19 diff --git a/examples/slim/nets/vgg_test.py b/examples/slim/nets/vgg_test.py deleted file mode 100644 index e4ff8def..00000000 --- a/examples/slim/nets/vgg_test.py +++ /dev/null @@ -1,455 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for slim.nets.vgg.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from nets import vgg - -slim = tf.contrib.slim - - -class VGGATest(tf.test.TestCase): - - def testBuild(self): - batch_size = 5 - height, width = 224, 224 - num_classes = 1000 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, _ = vgg.vgg_a(inputs, num_classes) - self.assertEquals(logits.op.name, 'vgg_a/fc8/squeezed') - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, num_classes]) - - def testFullyConvolutional(self): - batch_size = 1 - height, width = 256, 256 - num_classes = 1000 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, _ = vgg.vgg_a(inputs, num_classes, spatial_squeeze=False) - self.assertEquals(logits.op.name, 'vgg_a/fc8/BiasAdd') - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, 2, 2, num_classes]) - - def testEndPoints(self): - batch_size = 5 - height, width = 224, 224 - num_classes = 1000 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - _, end_points = vgg.vgg_a(inputs, num_classes) - expected_names = ['vgg_a/conv1/conv1_1', - 'vgg_a/pool1', - 'vgg_a/conv2/conv2_1', - 'vgg_a/pool2', - 'vgg_a/conv3/conv3_1', - 'vgg_a/conv3/conv3_2', - 'vgg_a/pool3', - 'vgg_a/conv4/conv4_1', - 'vgg_a/conv4/conv4_2', - 'vgg_a/pool4', - 'vgg_a/conv5/conv5_1', - 'vgg_a/conv5/conv5_2', - 'vgg_a/pool5', - 'vgg_a/fc6', - 'vgg_a/fc7', - 'vgg_a/fc8' - ] - self.assertSetEqual(set(end_points.keys()), set(expected_names)) - - def testModelVariables(self): - batch_size = 5 - height, width = 224, 224 - num_classes = 1000 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - vgg.vgg_a(inputs, num_classes) - expected_names = ['vgg_a/conv1/conv1_1/weights', - 'vgg_a/conv1/conv1_1/biases', - 'vgg_a/conv2/conv2_1/weights', - 'vgg_a/conv2/conv2_1/biases', - 'vgg_a/conv3/conv3_1/weights', - 'vgg_a/conv3/conv3_1/biases', - 'vgg_a/conv3/conv3_2/weights', - 'vgg_a/conv3/conv3_2/biases', - 'vgg_a/conv4/conv4_1/weights', - 'vgg_a/conv4/conv4_1/biases', - 'vgg_a/conv4/conv4_2/weights', - 'vgg_a/conv4/conv4_2/biases', - 'vgg_a/conv5/conv5_1/weights', - 'vgg_a/conv5/conv5_1/biases', - 'vgg_a/conv5/conv5_2/weights', - 'vgg_a/conv5/conv5_2/biases', - 'vgg_a/fc6/weights', - 'vgg_a/fc6/biases', - 'vgg_a/fc7/weights', - 'vgg_a/fc7/biases', - 'vgg_a/fc8/weights', - 'vgg_a/fc8/biases', - ] - model_variables = [v.op.name for v in slim.get_model_variables()] - self.assertSetEqual(set(model_variables), set(expected_names)) - - def testEvaluation(self): - batch_size = 2 - height, width = 224, 224 - num_classes = 1000 - with self.test_session(): - eval_inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, _ = vgg.vgg_a(eval_inputs, is_training=False) - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, num_classes]) - predictions = tf.argmax(logits, 1) - self.assertListEqual(predictions.get_shape().as_list(), [batch_size]) - - def testTrainEvalWithReuse(self): - train_batch_size = 2 - eval_batch_size = 1 - train_height, train_width = 224, 224 - eval_height, eval_width = 256, 256 - num_classes = 1000 - with self.test_session(): - train_inputs = tf.random_uniform( - (train_batch_size, train_height, train_width, 3)) - logits, _ = vgg.vgg_a(train_inputs) - self.assertListEqual(logits.get_shape().as_list(), - [train_batch_size, num_classes]) - tf.get_variable_scope().reuse_variables() - eval_inputs = tf.random_uniform( - (eval_batch_size, eval_height, eval_width, 3)) - logits, _ = vgg.vgg_a(eval_inputs, is_training=False, - spatial_squeeze=False) - self.assertListEqual(logits.get_shape().as_list(), - [eval_batch_size, 2, 2, num_classes]) - logits = tf.reduce_mean(logits, [1, 2]) - predictions = tf.argmax(logits, 1) - self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size]) - - def testForward(self): - batch_size = 1 - height, width = 224, 224 - with self.test_session() as sess: - inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, _ = vgg.vgg_a(inputs) - sess.run(tf.global_variables_initializer()) - output = sess.run(logits) - self.assertTrue(output.any()) - - -class VGG16Test(tf.test.TestCase): - - def testBuild(self): - batch_size = 5 - height, width = 224, 224 - num_classes = 1000 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, _ = vgg.vgg_16(inputs, num_classes) - self.assertEquals(logits.op.name, 'vgg_16/fc8/squeezed') - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, num_classes]) - - def testFullyConvolutional(self): - batch_size = 1 - height, width = 256, 256 - num_classes = 1000 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, _ = vgg.vgg_16(inputs, num_classes, spatial_squeeze=False) - self.assertEquals(logits.op.name, 'vgg_16/fc8/BiasAdd') - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, 2, 2, num_classes]) - - def testEndPoints(self): - batch_size = 5 - height, width = 224, 224 - num_classes = 1000 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - _, end_points = vgg.vgg_16(inputs, num_classes) - expected_names = ['vgg_16/conv1/conv1_1', - 'vgg_16/conv1/conv1_2', - 'vgg_16/pool1', - 'vgg_16/conv2/conv2_1', - 'vgg_16/conv2/conv2_2', - 'vgg_16/pool2', - 'vgg_16/conv3/conv3_1', - 'vgg_16/conv3/conv3_2', - 'vgg_16/conv3/conv3_3', - 'vgg_16/pool3', - 'vgg_16/conv4/conv4_1', - 'vgg_16/conv4/conv4_2', - 'vgg_16/conv4/conv4_3', - 'vgg_16/pool4', - 'vgg_16/conv5/conv5_1', - 'vgg_16/conv5/conv5_2', - 'vgg_16/conv5/conv5_3', - 'vgg_16/pool5', - 'vgg_16/fc6', - 'vgg_16/fc7', - 'vgg_16/fc8' - ] - self.assertSetEqual(set(end_points.keys()), set(expected_names)) - - def testModelVariables(self): - batch_size = 5 - height, width = 224, 224 - num_classes = 1000 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - vgg.vgg_16(inputs, num_classes) - expected_names = ['vgg_16/conv1/conv1_1/weights', - 'vgg_16/conv1/conv1_1/biases', - 'vgg_16/conv1/conv1_2/weights', - 'vgg_16/conv1/conv1_2/biases', - 'vgg_16/conv2/conv2_1/weights', - 'vgg_16/conv2/conv2_1/biases', - 'vgg_16/conv2/conv2_2/weights', - 'vgg_16/conv2/conv2_2/biases', - 'vgg_16/conv3/conv3_1/weights', - 'vgg_16/conv3/conv3_1/biases', - 'vgg_16/conv3/conv3_2/weights', - 'vgg_16/conv3/conv3_2/biases', - 'vgg_16/conv3/conv3_3/weights', - 'vgg_16/conv3/conv3_3/biases', - 'vgg_16/conv4/conv4_1/weights', - 'vgg_16/conv4/conv4_1/biases', - 'vgg_16/conv4/conv4_2/weights', - 'vgg_16/conv4/conv4_2/biases', - 'vgg_16/conv4/conv4_3/weights', - 'vgg_16/conv4/conv4_3/biases', - 'vgg_16/conv5/conv5_1/weights', - 'vgg_16/conv5/conv5_1/biases', - 'vgg_16/conv5/conv5_2/weights', - 'vgg_16/conv5/conv5_2/biases', - 'vgg_16/conv5/conv5_3/weights', - 'vgg_16/conv5/conv5_3/biases', - 'vgg_16/fc6/weights', - 'vgg_16/fc6/biases', - 'vgg_16/fc7/weights', - 'vgg_16/fc7/biases', - 'vgg_16/fc8/weights', - 'vgg_16/fc8/biases', - ] - model_variables = [v.op.name for v in slim.get_model_variables()] - self.assertSetEqual(set(model_variables), set(expected_names)) - - def testEvaluation(self): - batch_size = 2 - height, width = 224, 224 - num_classes = 1000 - with self.test_session(): - eval_inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, _ = vgg.vgg_16(eval_inputs, is_training=False) - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, num_classes]) - predictions = tf.argmax(logits, 1) - self.assertListEqual(predictions.get_shape().as_list(), [batch_size]) - - def testTrainEvalWithReuse(self): - train_batch_size = 2 - eval_batch_size = 1 - train_height, train_width = 224, 224 - eval_height, eval_width = 256, 256 - num_classes = 1000 - with self.test_session(): - train_inputs = tf.random_uniform( - (train_batch_size, train_height, train_width, 3)) - logits, _ = vgg.vgg_16(train_inputs) - self.assertListEqual(logits.get_shape().as_list(), - [train_batch_size, num_classes]) - tf.get_variable_scope().reuse_variables() - eval_inputs = tf.random_uniform( - (eval_batch_size, eval_height, eval_width, 3)) - logits, _ = vgg.vgg_16(eval_inputs, is_training=False, - spatial_squeeze=False) - self.assertListEqual(logits.get_shape().as_list(), - [eval_batch_size, 2, 2, num_classes]) - logits = tf.reduce_mean(logits, [1, 2]) - predictions = tf.argmax(logits, 1) - self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size]) - - def testForward(self): - batch_size = 1 - height, width = 224, 224 - with self.test_session() as sess: - inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, _ = vgg.vgg_16(inputs) - sess.run(tf.global_variables_initializer()) - output = sess.run(logits) - self.assertTrue(output.any()) - - -class VGG19Test(tf.test.TestCase): - - def testBuild(self): - batch_size = 5 - height, width = 224, 224 - num_classes = 1000 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, _ = vgg.vgg_19(inputs, num_classes) - self.assertEquals(logits.op.name, 'vgg_19/fc8/squeezed') - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, num_classes]) - - def testFullyConvolutional(self): - batch_size = 1 - height, width = 256, 256 - num_classes = 1000 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, _ = vgg.vgg_19(inputs, num_classes, spatial_squeeze=False) - self.assertEquals(logits.op.name, 'vgg_19/fc8/BiasAdd') - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, 2, 2, num_classes]) - - def testEndPoints(self): - batch_size = 5 - height, width = 224, 224 - num_classes = 1000 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - _, end_points = vgg.vgg_19(inputs, num_classes) - expected_names = [ - 'vgg_19/conv1/conv1_1', - 'vgg_19/conv1/conv1_2', - 'vgg_19/pool1', - 'vgg_19/conv2/conv2_1', - 'vgg_19/conv2/conv2_2', - 'vgg_19/pool2', - 'vgg_19/conv3/conv3_1', - 'vgg_19/conv3/conv3_2', - 'vgg_19/conv3/conv3_3', - 'vgg_19/conv3/conv3_4', - 'vgg_19/pool3', - 'vgg_19/conv4/conv4_1', - 'vgg_19/conv4/conv4_2', - 'vgg_19/conv4/conv4_3', - 'vgg_19/conv4/conv4_4', - 'vgg_19/pool4', - 'vgg_19/conv5/conv5_1', - 'vgg_19/conv5/conv5_2', - 'vgg_19/conv5/conv5_3', - 'vgg_19/conv5/conv5_4', - 'vgg_19/pool5', - 'vgg_19/fc6', - 'vgg_19/fc7', - 'vgg_19/fc8' - ] - self.assertSetEqual(set(end_points.keys()), set(expected_names)) - - def testModelVariables(self): - batch_size = 5 - height, width = 224, 224 - num_classes = 1000 - with self.test_session(): - inputs = tf.random_uniform((batch_size, height, width, 3)) - vgg.vgg_19(inputs, num_classes) - expected_names = [ - 'vgg_19/conv1/conv1_1/weights', - 'vgg_19/conv1/conv1_1/biases', - 'vgg_19/conv1/conv1_2/weights', - 'vgg_19/conv1/conv1_2/biases', - 'vgg_19/conv2/conv2_1/weights', - 'vgg_19/conv2/conv2_1/biases', - 'vgg_19/conv2/conv2_2/weights', - 'vgg_19/conv2/conv2_2/biases', - 'vgg_19/conv3/conv3_1/weights', - 'vgg_19/conv3/conv3_1/biases', - 'vgg_19/conv3/conv3_2/weights', - 'vgg_19/conv3/conv3_2/biases', - 'vgg_19/conv3/conv3_3/weights', - 'vgg_19/conv3/conv3_3/biases', - 'vgg_19/conv3/conv3_4/weights', - 'vgg_19/conv3/conv3_4/biases', - 'vgg_19/conv4/conv4_1/weights', - 'vgg_19/conv4/conv4_1/biases', - 'vgg_19/conv4/conv4_2/weights', - 'vgg_19/conv4/conv4_2/biases', - 'vgg_19/conv4/conv4_3/weights', - 'vgg_19/conv4/conv4_3/biases', - 'vgg_19/conv4/conv4_4/weights', - 'vgg_19/conv4/conv4_4/biases', - 'vgg_19/conv5/conv5_1/weights', - 'vgg_19/conv5/conv5_1/biases', - 'vgg_19/conv5/conv5_2/weights', - 'vgg_19/conv5/conv5_2/biases', - 'vgg_19/conv5/conv5_3/weights', - 'vgg_19/conv5/conv5_3/biases', - 'vgg_19/conv5/conv5_4/weights', - 'vgg_19/conv5/conv5_4/biases', - 'vgg_19/fc6/weights', - 'vgg_19/fc6/biases', - 'vgg_19/fc7/weights', - 'vgg_19/fc7/biases', - 'vgg_19/fc8/weights', - 'vgg_19/fc8/biases', - ] - model_variables = [v.op.name for v in slim.get_model_variables()] - self.assertSetEqual(set(model_variables), set(expected_names)) - - def testEvaluation(self): - batch_size = 2 - height, width = 224, 224 - num_classes = 1000 - with self.test_session(): - eval_inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, _ = vgg.vgg_19(eval_inputs, is_training=False) - self.assertListEqual(logits.get_shape().as_list(), - [batch_size, num_classes]) - predictions = tf.argmax(logits, 1) - self.assertListEqual(predictions.get_shape().as_list(), [batch_size]) - - def testTrainEvalWithReuse(self): - train_batch_size = 2 - eval_batch_size = 1 - train_height, train_width = 224, 224 - eval_height, eval_width = 256, 256 - num_classes = 1000 - with self.test_session(): - train_inputs = tf.random_uniform( - (train_batch_size, train_height, train_width, 3)) - logits, _ = vgg.vgg_19(train_inputs) - self.assertListEqual(logits.get_shape().as_list(), - [train_batch_size, num_classes]) - tf.get_variable_scope().reuse_variables() - eval_inputs = tf.random_uniform( - (eval_batch_size, eval_height, eval_width, 3)) - logits, _ = vgg.vgg_19(eval_inputs, is_training=False, - spatial_squeeze=False) - self.assertListEqual(logits.get_shape().as_list(), - [eval_batch_size, 2, 2, num_classes]) - logits = tf.reduce_mean(logits, [1, 2]) - predictions = tf.argmax(logits, 1) - self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size]) - - def testForward(self): - batch_size = 1 - height, width = 224, 224 - with self.test_session() as sess: - inputs = tf.random_uniform((batch_size, height, width, 3)) - logits, _ = vgg.vgg_19(inputs) - sess.run(tf.global_variables_initializer()) - output = sess.run(logits) - self.assertTrue(output.any()) - -if __name__ == '__main__': - tf.test.main() diff --git a/examples/slim/preprocessing/__init__.py b/examples/slim/preprocessing/__init__.py deleted file mode 100644 index 8b137891..00000000 --- a/examples/slim/preprocessing/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/examples/slim/preprocessing/cifarnet_preprocessing.py b/examples/slim/preprocessing/cifarnet_preprocessing.py deleted file mode 100644 index 195a5c7d..00000000 --- a/examples/slim/preprocessing/cifarnet_preprocessing.py +++ /dev/null @@ -1,114 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Provides utilities to preprocess images in CIFAR-10. - -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -_PADDING = 4 - -slim = tf.contrib.slim - - -def preprocess_for_train(image, - output_height, - output_width, - padding=_PADDING): - """Preprocesses the given image for training. - - Note that the actual resizing scale is sampled from - [`resize_size_min`, `resize_size_max`]. - - Args: - image: A `Tensor` representing an image of arbitrary size. - output_height: The height of the image after preprocessing. - output_width: The width of the image after preprocessing. - padding: The amound of padding before and after each dimension of the image. - - Returns: - A preprocessed image. - """ - tf.summary.image('image', tf.expand_dims(image, 0)) - - # Transform the image to floats. - image = tf.to_float(image) - if padding > 0: - image = tf.pad(image, [[padding, padding], [padding, padding], [0, 0]]) - # Randomly crop a [height, width] section of the image. - distorted_image = tf.random_crop(image, - [output_height, output_width, 3]) - - # Randomly flip the image horizontally. - distorted_image = tf.image.random_flip_left_right(distorted_image) - - tf.summary.image('distorted_image', tf.expand_dims(distorted_image, 0)) - - # Because these operations are not commutative, consider randomizing - # the order their operation. - distorted_image = tf.image.random_brightness(distorted_image, - max_delta=63) - distorted_image = tf.image.random_contrast(distorted_image, - lower=0.2, upper=1.8) - # Subtract off the mean and divide by the variance of the pixels. - return tf.image.per_image_standardization(distorted_image) - - -def preprocess_for_eval(image, output_height, output_width): - """Preprocesses the given image for evaluation. - - Args: - image: A `Tensor` representing an image of arbitrary size. - output_height: The height of the image after preprocessing. - output_width: The width of the image after preprocessing. - - Returns: - A preprocessed image. - """ - tf.summary.image('image', tf.expand_dims(image, 0)) - # Transform the image to floats. - image = tf.to_float(image) - - # Resize and crop if needed. - resized_image = tf.image.resize_image_with_crop_or_pad(image, - output_width, - output_height) - tf.summary.image('resized_image', tf.expand_dims(resized_image, 0)) - - # Subtract off the mean and divide by the variance of the pixels. - return tf.image.per_image_standardization(resized_image) - - -def preprocess_image(image, output_height, output_width, is_training=False): - """Preprocesses the given image. - - Args: - image: A `Tensor` representing an image of arbitrary size. - output_height: The height of the image after preprocessing. - output_width: The width of the image after preprocessing. - is_training: `True` if we're preprocessing the image for training and - `False` otherwise. - - Returns: - A preprocessed image. - """ - if is_training: - return preprocess_for_train(image, output_height, output_width) - else: - return preprocess_for_eval(image, output_height, output_width) diff --git a/examples/slim/preprocessing/inception_preprocessing.py b/examples/slim/preprocessing/inception_preprocessing.py deleted file mode 100644 index ca3eba0b..00000000 --- a/examples/slim/preprocessing/inception_preprocessing.py +++ /dev/null @@ -1,304 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Provides utilities to preprocess images for the Inception networks.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from tensorflow.python.ops import control_flow_ops - - -def apply_with_random_selector(x, func, num_cases): - """Computes func(x, sel), with sel sampled from [0...num_cases-1]. - - Args: - x: input Tensor. - func: Python function to apply. - num_cases: Python int32, number of cases to sample sel from. - - Returns: - The result of func(x, sel), where func receives the value of the - selector as a python integer, but sel is sampled dynamically. - """ - sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32) - # Pass the real x only to one of the func calls. - return control_flow_ops.merge([ - func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case) - for case in range(num_cases)])[0] - - -def distort_color(image, color_ordering=0, fast_mode=True, scope=None): - """Distort the color of a Tensor image. - - Each color distortion is non-commutative and thus ordering of the color ops - matters. Ideally we would randomly permute the ordering of the color ops. - Rather then adding that level of complication, we select a distinct ordering - of color ops for each preprocessing thread. - - Args: - image: 3-D Tensor containing single image in [0, 1]. - color_ordering: Python int, a type of distortion (valid values: 0-3). - fast_mode: Avoids slower ops (random_hue and random_contrast) - scope: Optional scope for name_scope. - Returns: - 3-D Tensor color-distorted image on range [0, 1] - Raises: - ValueError: if color_ordering not in [0, 3] - """ - with tf.name_scope(scope, 'distort_color', [image]): - if fast_mode: - if color_ordering == 0: - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - else: - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_brightness(image, max_delta=32. / 255.) - else: - if color_ordering == 0: - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - elif color_ordering == 1: - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - elif color_ordering == 2: - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - image = tf.image.random_hue(image, max_delta=0.2) - image = tf.image.random_brightness(image, max_delta=32. / 255.) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - elif color_ordering == 3: - image = tf.image.random_hue(image, max_delta=0.2) - image = tf.image.random_saturation(image, lower=0.5, upper=1.5) - image = tf.image.random_contrast(image, lower=0.5, upper=1.5) - image = tf.image.random_brightness(image, max_delta=32. / 255.) - else: - raise ValueError('color_ordering must be in [0, 3]') - - # The random_* ops do not necessarily clamp. - return tf.clip_by_value(image, 0.0, 1.0) - - -def distorted_bounding_box_crop(image, - bbox, - min_object_covered=0.1, - aspect_ratio_range=(0.75, 1.33), - area_range=(0.05, 1.0), - max_attempts=100, - scope=None): - """Generates cropped_image using a one of the bboxes randomly distorted. - - See `tf.image.sample_distorted_bounding_box` for more documentation. - - Args: - image: 3-D Tensor of image (it will be converted to floats in [0, 1]). - bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] - where each coordinate is [0, 1) and the coordinates are arranged - as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole - image. - min_object_covered: An optional `float`. Defaults to `0.1`. The cropped - area of the image must contain at least this fraction of any bounding box - supplied. - aspect_ratio_range: An optional list of `floats`. The cropped area of the - image must have an aspect ratio = width / height within this range. - area_range: An optional list of `floats`. The cropped area of the image - must contain a fraction of the supplied image within in this range. - max_attempts: An optional `int`. Number of attempts at generating a cropped - region of the image of the specified constraints. After `max_attempts` - failures, return the entire image. - scope: Optional scope for name_scope. - Returns: - A tuple, a 3-D Tensor cropped_image and the distorted bbox - """ - with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]): - # Each bounding box has shape [1, num_boxes, box coords] and - # the coordinates are ordered [ymin, xmin, ymax, xmax]. - - # A large fraction of image datasets contain a human-annotated bounding - # box delineating the region of the image containing the object of interest. - # We choose to create a new bounding box for the object which is a randomly - # distorted version of the human-annotated bounding box that obeys an - # allowed range of aspect ratios, sizes and overlap with the human-annotated - # bounding box. If no box is supplied, then we assume the bounding box is - # the entire image. - sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( - tf.shape(image), - bounding_boxes=bbox, - min_object_covered=min_object_covered, - aspect_ratio_range=aspect_ratio_range, - area_range=area_range, - max_attempts=max_attempts, - use_image_if_no_bounding_boxes=True) - bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box - - # Crop the image to the specified bounding box. - cropped_image = tf.slice(image, bbox_begin, bbox_size) - return cropped_image, distort_bbox - - -def preprocess_for_train(image, height, width, bbox, - fast_mode=True, - scope=None): - """Distort one image for training a network. - - Distorting images provides a useful technique for augmenting the data - set during training in order to make the network invariant to aspects - of the image that do not effect the label. - - Additionally it would create image_summaries to display the different - transformations applied to the image. - - Args: - image: 3-D Tensor of image. If dtype is tf.float32 then the range should be - [0, 1], otherwise it would converted to tf.float32 assuming that the range - is [0, MAX], where MAX is largest positive representable number for - int(8/16/32) data type (see `tf.image.convert_image_dtype` for details). - height: integer - width: integer - bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] - where each coordinate is [0, 1) and the coordinates are arranged - as [ymin, xmin, ymax, xmax]. - fast_mode: Optional boolean, if True avoids slower transformations (i.e. - bi-cubic resizing, random_hue or random_contrast). - scope: Optional scope for name_scope. - Returns: - 3-D float Tensor of distorted image used for training with range [-1, 1]. - """ - with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): - if bbox is None: - bbox = tf.constant([0.0, 0.0, 1.0, 1.0], - dtype=tf.float32, - shape=[1, 1, 4]) - if image.dtype != tf.float32: - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - # Each bounding box has shape [1, num_boxes, box coords] and - # the coordinates are ordered [ymin, xmin, ymax, xmax]. - image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), - bbox) - tf.summary.image('image_with_bounding_boxes', image_with_box) - - distorted_image, distorted_bbox = distorted_bounding_box_crop(image, bbox) - # Restore the shape since the dynamic slice based upon the bbox_size loses - # the third dimension. - distorted_image.set_shape([None, None, 3]) - image_with_distorted_box = tf.image.draw_bounding_boxes( - tf.expand_dims(image, 0), distorted_bbox) - tf.summary.image('images_with_distorted_bounding_box', - image_with_distorted_box) - - # This resizing operation may distort the images because the aspect - # ratio is not respected. We select a resize method in a round robin - # fashion based on the thread number. - # Note that ResizeMethod contains 4 enumerated resizing methods. - - # We select only 1 case for fast_mode bilinear. - num_resize_cases = 1 if fast_mode else 4 - distorted_image = apply_with_random_selector( - distorted_image, - lambda x, method: tf.image.resize_images(x, [height, width], method=method), - num_cases=num_resize_cases) - - tf.summary.image('cropped_resized_image', - tf.expand_dims(distorted_image, 0)) - - # Randomly flip the image horizontally. - distorted_image = tf.image.random_flip_left_right(distorted_image) - - # Randomly distort the colors. There are 4 ways to do it. - distorted_image = apply_with_random_selector( - distorted_image, - lambda x, ordering: distort_color(x, ordering, fast_mode), - num_cases=4) - - tf.summary.image('final_distorted_image', - tf.expand_dims(distorted_image, 0)) - distorted_image = tf.subtract(distorted_image, 0.5) - distorted_image = tf.multiply(distorted_image, 2.0) - return distorted_image - - -def preprocess_for_eval(image, height, width, - central_fraction=0.875, scope=None): - """Prepare one image for evaluation. - - If height and width are specified it would output an image with that size by - applying resize_bilinear. - - If central_fraction is specified it would cropt the central fraction of the - input image. - - Args: - image: 3-D Tensor of image. If dtype is tf.float32 then the range should be - [0, 1], otherwise it would converted to tf.float32 assuming that the range - is [0, MAX], where MAX is largest positive representable number for - int(8/16/32) data type (see `tf.image.convert_image_dtype` for details) - height: integer - width: integer - central_fraction: Optional Float, fraction of the image to crop. - scope: Optional scope for name_scope. - Returns: - 3-D float Tensor of prepared image. - """ - with tf.name_scope(scope, 'eval_image', [image, height, width]): - if image.dtype != tf.float32: - image = tf.image.convert_image_dtype(image, dtype=tf.float32) - # Crop the central region of the image with an area containing 87.5% of - # the original image. - if central_fraction: - image = tf.image.central_crop(image, central_fraction=central_fraction) - - if height and width: - # Resize the image to the specified height and width. - image = tf.expand_dims(image, 0) - image = tf.image.resize_bilinear(image, [height, width], - align_corners=False) - image = tf.squeeze(image, [0]) - image = tf.subtract(image, 0.5) - image = tf.multiply(image, 2.0) - return image - - -def preprocess_image(image, height, width, - is_training=False, - bbox=None, - fast_mode=True): - """Pre-process one image for training or evaluation. - - Args: - image: 3-D Tensor [height, width, channels] with the image. - height: integer, image expected height. - width: integer, image expected width. - is_training: Boolean. If true it would transform an image for train, - otherwise it would transform it for evaluation. - bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] - where each coordinate is [0, 1) and the coordinates are arranged as - [ymin, xmin, ymax, xmax]. - fast_mode: Optional boolean, if True avoids slower transformations. - - Returns: - 3-D float Tensor containing an appropriately scaled image - - Raises: - ValueError: if user does not provide bounding box - """ - if is_training: - return preprocess_for_train(image, height, width, bbox, fast_mode) - else: - return preprocess_for_eval(image, height, width) diff --git a/examples/slim/preprocessing/lenet_preprocessing.py b/examples/slim/preprocessing/lenet_preprocessing.py deleted file mode 100644 index ac5e71af..00000000 --- a/examples/slim/preprocessing/lenet_preprocessing.py +++ /dev/null @@ -1,44 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Provides utilities for preprocessing.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -slim = tf.contrib.slim - - -def preprocess_image(image, output_height, output_width, is_training): - """Preprocesses the given image. - - Args: - image: A `Tensor` representing an image of arbitrary size. - output_height: The height of the image after preprocessing. - output_width: The width of the image after preprocessing. - is_training: `True` if we're preprocessing the image for training and - `False` otherwise. - - Returns: - A preprocessed image. - """ - image = tf.to_float(image) - image = tf.image.resize_image_with_crop_or_pad( - image, output_width, output_height) - image = tf.subtract(image, 128.0) - image = tf.div(image, 128.0) - return image diff --git a/examples/slim/preprocessing/preprocessing_factory.py b/examples/slim/preprocessing/preprocessing_factory.py deleted file mode 100644 index 35f8645e..00000000 --- a/examples/slim/preprocessing/preprocessing_factory.py +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Contains a factory for building various models.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from preprocessing import cifarnet_preprocessing -from preprocessing import inception_preprocessing -from preprocessing import lenet_preprocessing -from preprocessing import vgg_preprocessing - -slim = tf.contrib.slim - - -def get_preprocessing(name, is_training=False): - """Returns preprocessing_fn(image, height, width, **kwargs). - - Args: - name: The name of the preprocessing function. - is_training: `True` if the model is being used for training and `False` - otherwise. - - Returns: - preprocessing_fn: A function that preprocessing a single image (pre-batch). - It has the following signature: - image = preprocessing_fn(image, output_height, output_width, ...). - - Raises: - ValueError: If Preprocessing `name` is not recognized. - """ - preprocessing_fn_map = { - 'cifarnet': cifarnet_preprocessing, - 'inception': inception_preprocessing, - 'inception_v1': inception_preprocessing, - 'inception_v2': inception_preprocessing, - 'inception_v3': inception_preprocessing, - 'inception_v4': inception_preprocessing, - 'inception_resnet_v2': inception_preprocessing, - 'lenet': lenet_preprocessing, - 'resnet_v1_50': vgg_preprocessing, - 'resnet_v1_101': vgg_preprocessing, - 'resnet_v1_152': vgg_preprocessing, - 'resnet_v2_50': vgg_preprocessing, - 'resnet_v2_101': vgg_preprocessing, - 'resnet_v2_152': vgg_preprocessing, - 'vgg': vgg_preprocessing, - 'vgg_a': vgg_preprocessing, - 'vgg_16': vgg_preprocessing, - 'vgg_19': vgg_preprocessing, - } - - if name not in preprocessing_fn_map: - raise ValueError('Preprocessing name [%s] was not recognized' % name) - - def preprocessing_fn(image, output_height, output_width, **kwargs): - return preprocessing_fn_map[name].preprocess_image( - image, output_height, output_width, is_training=is_training, **kwargs) - - return preprocessing_fn diff --git a/examples/slim/preprocessing/vgg_preprocessing.py b/examples/slim/preprocessing/vgg_preprocessing.py deleted file mode 100644 index 1900cae2..00000000 --- a/examples/slim/preprocessing/vgg_preprocessing.py +++ /dev/null @@ -1,370 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Provides utilities to preprocess images. - -The preprocessing steps for VGG were introduced in the following technical -report: - - Very Deep Convolutional Networks For Large-Scale Image Recognition - Karen Simonyan and Andrew Zisserman - arXiv technical report, 2015 - PDF: http://arxiv.org/pdf/1409.1556.pdf - ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf - CC-BY-4.0 - -More information can be obtained from the VGG website: -www.robots.ox.ac.uk/~vgg/research/very_deep/ -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from tensorflow.python.ops import control_flow_ops - -slim = tf.contrib.slim - -_R_MEAN = 123.68 -_G_MEAN = 116.78 -_B_MEAN = 103.94 - -_RESIZE_SIDE_MIN = 256 -_RESIZE_SIDE_MAX = 512 - - -def _crop(image, offset_height, offset_width, crop_height, crop_width): - """Crops the given image using the provided offsets and sizes. - - Note that the method doesn't assume we know the input image size but it does - assume we know the input image rank. - - Args: - image: an image of shape [height, width, channels]. - offset_height: a scalar tensor indicating the height offset. - offset_width: a scalar tensor indicating the width offset. - crop_height: the height of the cropped image. - crop_width: the width of the cropped image. - - Returns: - the cropped (and resized) image. - - Raises: - InvalidArgumentError: if the rank is not 3 or if the image dimensions are - less than the crop size. - """ - original_shape = tf.shape(image) - - rank_assertion = tf.Assert( - tf.equal(tf.rank(image), 3), - ['Rank of image must be equal to 3.']) - cropped_shape = control_flow_ops.with_dependencies( - [rank_assertion], - tf.stack([crop_height, crop_width, original_shape[2]])) - - size_assertion = tf.Assert( - tf.logical_and( - tf.greater_equal(original_shape[0], crop_height), - tf.greater_equal(original_shape[1], crop_width)), - ['Crop size greater than the image size.']) - - offsets = tf.to_int32(tf.stack([offset_height, offset_width, 0])) - - # Use tf.slice instead of crop_to_bounding box as it accepts tensors to - # define the crop size. - image = control_flow_ops.with_dependencies( - [size_assertion], - tf.slice(image, offsets, cropped_shape)) - return tf.reshape(image, cropped_shape) - - -def _random_crop(image_list, crop_height, crop_width): - """Crops the given list of images. - - The function applies the same crop to each image in the list. This can be - effectively applied when there are multiple image inputs of the same - dimension such as: - - image, depths, normals = _random_crop([image, depths, normals], 120, 150) - - Args: - image_list: a list of image tensors of the same dimension but possibly - varying channel. - crop_height: the new height. - crop_width: the new width. - - Returns: - the image_list with cropped images. - - Raises: - ValueError: if there are multiple image inputs provided with different size - or the images are smaller than the crop dimensions. - """ - if not image_list: - raise ValueError('Empty image_list.') - - # Compute the rank assertions. - rank_assertions = [] - for i in range(len(image_list)): - image_rank = tf.rank(image_list[i]) - rank_assert = tf.Assert( - tf.equal(image_rank, 3), - ['Wrong rank for tensor %s [expected] [actual]', - image_list[i].name, 3, image_rank]) - rank_assertions.append(rank_assert) - - image_shape = control_flow_ops.with_dependencies( - [rank_assertions[0]], - tf.shape(image_list[0])) - image_height = image_shape[0] - image_width = image_shape[1] - crop_size_assert = tf.Assert( - tf.logical_and( - tf.greater_equal(image_height, crop_height), - tf.greater_equal(image_width, crop_width)), - ['Crop size greater than the image size.']) - - asserts = [rank_assertions[0], crop_size_assert] - - for i in range(1, len(image_list)): - image = image_list[i] - asserts.append(rank_assertions[i]) - shape = control_flow_ops.with_dependencies([rank_assertions[i]], - tf.shape(image)) - height = shape[0] - width = shape[1] - - height_assert = tf.Assert( - tf.equal(height, image_height), - ['Wrong height for tensor %s [expected][actual]', - image.name, height, image_height]) - width_assert = tf.Assert( - tf.equal(width, image_width), - ['Wrong width for tensor %s [expected][actual]', - image.name, width, image_width]) - asserts.extend([height_assert, width_assert]) - - # Create a random bounding box. - # - # Use tf.random_uniform and not numpy.random.rand as doing the former would - # generate random numbers at graph eval time, unlike the latter which - # generates random numbers at graph definition time. - max_offset_height = control_flow_ops.with_dependencies( - asserts, tf.reshape(image_height - crop_height + 1, [])) - max_offset_width = control_flow_ops.with_dependencies( - asserts, tf.reshape(image_width - crop_width + 1, [])) - offset_height = tf.random_uniform( - [], maxval=max_offset_height, dtype=tf.int32) - offset_width = tf.random_uniform( - [], maxval=max_offset_width, dtype=tf.int32) - - return [_crop(image, offset_height, offset_width, - crop_height, crop_width) for image in image_list] - - -def _central_crop(image_list, crop_height, crop_width): - """Performs central crops of the given image list. - - Args: - image_list: a list of image tensors of the same dimension but possibly - varying channel. - crop_height: the height of the image following the crop. - crop_width: the width of the image following the crop. - - Returns: - the list of cropped images. - """ - outputs = [] - for image in image_list: - image_height = tf.shape(image)[0] - image_width = tf.shape(image)[1] - - offset_height = (image_height - crop_height) / 2 - offset_width = (image_width - crop_width) / 2 - - outputs.append(_crop(image, offset_height, offset_width, - crop_height, crop_width)) - return outputs - - -def _mean_image_subtraction(image, means): - """Subtracts the given means from each image channel. - - For example: - means = [123.68, 116.779, 103.939] - image = _mean_image_subtraction(image, means) - - Note that the rank of `image` must be known. - - Args: - image: a tensor of size [height, width, C]. - means: a C-vector of values to subtract from each channel. - - Returns: - the centered image. - - Raises: - ValueError: If the rank of `image` is unknown, if `image` has a rank other - than three or if the number of channels in `image` doesn't match the - number of values in `means`. - """ - if image.get_shape().ndims != 3: - raise ValueError('Input must be of size [height, width, C>0]') - num_channels = image.get_shape().as_list()[-1] - if len(means) != num_channels: - raise ValueError('len(means) must match the number of channels') - - channels = tf.split(axis=2, num_or_size_splits=num_channels, value=image) - for i in range(num_channels): - channels[i] -= means[i] - return tf.concat(axis=2, values=channels) - - -def _smallest_size_at_least(height, width, smallest_side): - """Computes new shape with the smallest side equal to `smallest_side`. - - Computes new shape with the smallest side equal to `smallest_side` while - preserving the original aspect ratio. - - Args: - height: an int32 scalar tensor indicating the current height. - width: an int32 scalar tensor indicating the current width. - smallest_side: A python integer or scalar `Tensor` indicating the size of - the smallest side after resize. - - Returns: - new_height: an int32 scalar tensor indicating the new height. - new_width: and int32 scalar tensor indicating the new width. - """ - smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32) - - height = tf.to_float(height) - width = tf.to_float(width) - smallest_side = tf.to_float(smallest_side) - - scale = tf.cond(tf.greater(height, width), - lambda: smallest_side / width, - lambda: smallest_side / height) - new_height = tf.to_int32(height * scale) - new_width = tf.to_int32(width * scale) - return new_height, new_width - - -def _aspect_preserving_resize(image, smallest_side): - """Resize images preserving the original aspect ratio. - - Args: - image: A 3-D image `Tensor`. - smallest_side: A python integer or scalar `Tensor` indicating the size of - the smallest side after resize. - - Returns: - resized_image: A 3-D tensor containing the resized image. - """ - smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32) - - shape = tf.shape(image) - height = shape[0] - width = shape[1] - new_height, new_width = _smallest_size_at_least(height, width, smallest_side) - image = tf.expand_dims(image, 0) - resized_image = tf.image.resize_bilinear(image, [new_height, new_width], - align_corners=False) - resized_image = tf.squeeze(resized_image) - resized_image.set_shape([None, None, 3]) - return resized_image - - -def preprocess_for_train(image, - output_height, - output_width, - resize_side_min=_RESIZE_SIDE_MIN, - resize_side_max=_RESIZE_SIDE_MAX): - """Preprocesses the given image for training. - - Note that the actual resizing scale is sampled from - [`resize_size_min`, `resize_size_max`]. - - Args: - image: A `Tensor` representing an image of arbitrary size. - output_height: The height of the image after preprocessing. - output_width: The width of the image after preprocessing. - resize_side_min: The lower bound for the smallest side of the image for - aspect-preserving resizing. - resize_side_max: The upper bound for the smallest side of the image for - aspect-preserving resizing. - - Returns: - A preprocessed image. - """ - resize_side = tf.random_uniform( - [], minval=resize_side_min, maxval=resize_side_max+1, dtype=tf.int32) - - image = _aspect_preserving_resize(image, resize_side) - image = _random_crop([image], output_height, output_width)[0] - image.set_shape([output_height, output_width, 3]) - image = tf.to_float(image) - image = tf.image.random_flip_left_right(image) - return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN]) - - -def preprocess_for_eval(image, output_height, output_width, resize_side): - """Preprocesses the given image for evaluation. - - Args: - image: A `Tensor` representing an image of arbitrary size. - output_height: The height of the image after preprocessing. - output_width: The width of the image after preprocessing. - resize_side: The smallest side of the image for aspect-preserving resizing. - - Returns: - A preprocessed image. - """ - image = _aspect_preserving_resize(image, resize_side) - image = _central_crop([image], output_height, output_width)[0] - image.set_shape([output_height, output_width, 3]) - image = tf.to_float(image) - return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN]) - - -def preprocess_image(image, output_height, output_width, is_training=False, - resize_side_min=_RESIZE_SIDE_MIN, - resize_side_max=_RESIZE_SIDE_MAX): - """Preprocesses the given image. - - Args: - image: A `Tensor` representing an image of arbitrary size. - output_height: The height of the image after preprocessing. - output_width: The width of the image after preprocessing. - is_training: `True` if we're preprocessing the image for training and - `False` otherwise. - resize_side_min: The lower bound for the smallest side of the image for - aspect-preserving resizing. If `is_training` is `False`, then this value - is used for rescaling. - resize_side_max: The upper bound for the smallest side of the image for - aspect-preserving resizing. If `is_training` is `False`, this value is - ignored. Otherwise, the resize side is sampled from - [resize_size_min, resize_size_max]. - - Returns: - A preprocessed image. - """ - if is_training: - return preprocess_for_train(image, output_height, output_width, - resize_side_min, resize_side_max) - else: - return preprocess_for_eval(image, output_height, output_width, - resize_side_min) diff --git a/examples/slim/scripts/finetune_inception_v1_on_flowers.sh b/examples/slim/scripts/finetune_inception_v1_on_flowers.sh deleted file mode 100644 index 480b46c0..00000000 --- a/examples/slim/scripts/finetune_inception_v1_on_flowers.sh +++ /dev/null @@ -1,89 +0,0 @@ -#!/bin/bash -# -# This script performs the following operations: -# 1. Downloads the Flowers dataset -# 2. Fine-tunes an InceptionV1 model on the Flowers training set. -# 3. Evaluates the model on the Flowers validation set. -# -# Usage: -# cd slim -# ./slim/scripts/finetune_inception_v1_on_flowers.sh - -# Where the pre-trained InceptionV1 checkpoint is saved to. -PRETRAINED_CHECKPOINT_DIR=/tmp/checkpoints - -# Where the training (fine-tuned) checkpoint and logs will be saved to. -TRAIN_DIR=/tmp/flowers-models/inception_v1 - -# Where the dataset is saved to. -DATASET_DIR=/tmp/flowers - -# Download the pre-trained checkpoint. -if [ ! -d "$PRETRAINED_CHECKPOINT_DIR" ]; then - mkdir ${PRETRAINED_CHECKPOINT_DIR} -fi -if [ ! -f ${PRETRAINED_CHECKPOINT_DIR}/inception_v1.ckpt ]; then - wget http://download.tensorflow.org/models/inception_v1_2016_08_28.tar.gz - tar -xvf inception_v1_2016_08_28.tar.gz - mv inception_v1.ckpt ${PRETRAINED_CHECKPOINT_DIR}/inception_v1.ckpt - rm inception_v1_2016_08_28.tar.gz -fi - -# Download the dataset -python download_and_convert_data.py \ - --dataset_name=flowers \ - --dataset_dir=${DATASET_DIR} - -# Fine-tune only the new layers for 2000 steps. -python train_image_classifier.py \ - --train_dir=${TRAIN_DIR} \ - --dataset_name=flowers \ - --dataset_split_name=train \ - --dataset_dir=${DATASET_DIR} \ - --model_name=inception_v1 \ - --checkpoint_path=${PRETRAINED_CHECKPOINT_DIR}/inception_v1.ckpt \ - --checkpoint_exclude_scopes=InceptionV1/Logits \ - --trainable_scopes=InceptionV1/Logits \ - --max_number_of_steps=3000 \ - --batch_size=32 \ - --learning_rate=0.01 \ - --save_interval_secs=60 \ - --save_summaries_secs=60 \ - --log_every_n_steps=100 \ - --optimizer=rmsprop \ - --weight_decay=0.00004 - -# Run evaluation. -python eval_image_classifier.py \ - --checkpoint_path=${TRAIN_DIR} \ - --eval_dir=${TRAIN_DIR} \ - --dataset_name=flowers \ - --dataset_split_name=validation \ - --dataset_dir=${DATASET_DIR} \ - --model_name=inception_v1 - -# Fine-tune all the new layers for 1000 steps. -python train_image_classifier.py \ - --train_dir=${TRAIN_DIR}/all \ - --dataset_name=flowers \ - --dataset_split_name=train \ - --dataset_dir=${DATASET_DIR} \ - --checkpoint_path=${TRAIN_DIR} \ - --model_name=inception_v1 \ - --max_number_of_steps=1000 \ - --batch_size=32 \ - --learning_rate=0.001 \ - --save_interval_secs=60 \ - --save_summaries_secs=60 \ - --log_every_n_steps=100 \ - --optimizer=rmsprop \ - --weight_decay=0.00004 - -# Run evaluation. -python eval_image_classifier.py \ - --checkpoint_path=${TRAIN_DIR}/all \ - --eval_dir=${TRAIN_DIR}/all \ - --dataset_name=flowers \ - --dataset_split_name=validation \ - --dataset_dir=${DATASET_DIR} \ - --model_name=inception_v1 diff --git a/examples/slim/scripts/finetune_inception_v3_on_flowers.sh b/examples/slim/scripts/finetune_inception_v3_on_flowers.sh deleted file mode 100644 index dfcc87ac..00000000 --- a/examples/slim/scripts/finetune_inception_v3_on_flowers.sh +++ /dev/null @@ -1,91 +0,0 @@ -#!/bin/bash -# -# This script performs the following operations: -# 1. Downloads the Flowers dataset -# 2. Fine-tunes an InceptionV3 model on the Flowers training set. -# 3. Evaluates the model on the Flowers validation set. -# -# Usage: -# cd slim -# ./slim/scripts/finetune_inceptionv3_on_flowers.sh - -# Where the pre-trained InceptionV3 checkpoint is saved to. -PRETRAINED_CHECKPOINT_DIR=/tmp/checkpoints - -# Where the training (fine-tuned) checkpoint and logs will be saved to. -TRAIN_DIR=/tmp/flowers-models/inception_v3 - -# Where the dataset is saved to. -DATASET_DIR=/tmp/flowers - -# Download the pre-trained checkpoint. -if [ ! -d "$PRETRAINED_CHECKPOINT_DIR" ]; then - mkdir ${PRETRAINED_CHECKPOINT_DIR} -fi -if [ ! -f ${PRETRAINED_CHECKPOINT_DIR}/inception_v3.ckpt ]; then - wget http://download.tensorflow.org/models/inception_v3_2016_08_28.tar.gz - tar -xvf inception_v3_2016_08_28.tar.gz - mv inception_v3.ckpt ${PRETRAINED_CHECKPOINT_DIR}/inception_v3.ckpt - rm inception_v3_2016_08_28.tar.gz -fi - -# Download the dataset -python download_and_convert_data.py \ - --dataset_name=flowers \ - --dataset_dir=${DATASET_DIR} - -# Fine-tune only the new layers for 1000 steps. -python train_image_classifier.py \ - --train_dir=${TRAIN_DIR} \ - --dataset_name=flowers \ - --dataset_split_name=train \ - --dataset_dir=${DATASET_DIR} \ - --model_name=inception_v3 \ - --checkpoint_path=${PRETRAINED_CHECKPOINT_DIR}/inception_v3.ckpt \ - --checkpoint_exclude_scopes=InceptionV3/Logits,InceptionV3/AuxLogits \ - --trainable_scopes=InceptionV3/Logits,InceptionV3/AuxLogits \ - --max_number_of_steps=1000 \ - --batch_size=32 \ - --learning_rate=0.01 \ - --learning_rate_decay_type=fixed \ - --save_interval_secs=60 \ - --save_summaries_secs=60 \ - --log_every_n_steps=100 \ - --optimizer=rmsprop \ - --weight_decay=0.00004 - -# Run evaluation. -python eval_image_classifier.py \ - --checkpoint_path=${TRAIN_DIR} \ - --eval_dir=${TRAIN_DIR} \ - --dataset_name=flowers \ - --dataset_split_name=validation \ - --dataset_dir=${DATASET_DIR} \ - --model_name=inception_v3 - -# Fine-tune all the new layers for 500 steps. -python train_image_classifier.py \ - --train_dir=${TRAIN_DIR}/all \ - --dataset_name=flowers \ - --dataset_split_name=train \ - --dataset_dir=${DATASET_DIR} \ - --model_name=inception_v3 \ - --checkpoint_path=${TRAIN_DIR} \ - --max_number_of_steps=500 \ - --batch_size=32 \ - --learning_rate=0.0001 \ - --learning_rate_decay_type=fixed \ - --save_interval_secs=60 \ - --save_summaries_secs=60 \ - --log_every_n_steps=10 \ - --optimizer=rmsprop \ - --weight_decay=0.00004 - -# Run evaluation. -python eval_image_classifier.py \ - --checkpoint_path=${TRAIN_DIR}/all \ - --eval_dir=${TRAIN_DIR}/all \ - --dataset_name=flowers \ - --dataset_split_name=validation \ - --dataset_dir=${DATASET_DIR} \ - --model_name=inception_v3 diff --git a/examples/slim/scripts/train_cifarnet_on_cifar10.sh b/examples/slim/scripts/train_cifarnet_on_cifar10.sh deleted file mode 100644 index daefb22e..00000000 --- a/examples/slim/scripts/train_cifarnet_on_cifar10.sh +++ /dev/null @@ -1,49 +0,0 @@ -#!/bin/bash -# -# This script performs the following operations: -# 1. Downloads the Cifar10 dataset -# 2. Trains a CifarNet model on the Cifar10 training set. -# 3. Evaluates the model on the Cifar10 testing set. -# -# Usage: -# cd slim -# ./scripts/train_cifar_net_on_mnist.sh - -# Where the checkpoint and logs will be saved to. -TRAIN_DIR=/tmp/cifarnet-model - -# Where the dataset is saved to. -DATASET_DIR=/tmp/cifar10 - -# Download the dataset -python download_and_convert_data.py \ - --dataset_name=cifar10 \ - --dataset_dir=${DATASET_DIR} - -# Run training. -python train_image_classifier.py \ - --train_dir=${TRAIN_DIR} \ - --dataset_name=cifar10 \ - --dataset_split_name=train \ - --dataset_dir=${DATASET_DIR} \ - --model_name=cifarnet \ - --preprocessing_name=cifarnet \ - --max_number_of_steps=100000 \ - --batch_size=128 \ - --save_interval_secs=120 \ - --save_summaries_secs=120 \ - --log_every_n_steps=100 \ - --optimizer=sgd \ - --learning_rate=0.1 \ - --learning_rate_decay_factor=0.1 \ - --num_epochs_per_decay=200 \ - --weight_decay=0.004 - -# Run evaluation. -python eval_image_classifier.py \ - --checkpoint_path=${TRAIN_DIR} \ - --eval_dir=${TRAIN_DIR} \ - --dataset_name=cifar10 \ - --dataset_split_name=test \ - --dataset_dir=${DATASET_DIR} \ - --model_name=cifarnet diff --git a/examples/slim/scripts/train_lenet_on_mnist.sh b/examples/slim/scripts/train_lenet_on_mnist.sh deleted file mode 100644 index 8dbeff2a..00000000 --- a/examples/slim/scripts/train_lenet_on_mnist.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/bash -# -# This script performs the following operations: -# 1. Downloads the MNIST dataset -# 2. Trains a LeNet model on the MNIST training set. -# 3. Evaluates the model on the MNIST testing set. -# -# Usage: -# cd slim -# ./slim/scripts/train_lenet_on_mnist.sh - -# Where the checkpoint and logs will be saved to. -TRAIN_DIR=/tmp/lenet-model - -# Where the dataset is saved to. -DATASET_DIR=/tmp/mnist - -# Download the dataset -python download_and_convert_data.py \ - --dataset_name=mnist \ - --dataset_dir=${DATASET_DIR} - -# Run training. -python train_image_classifier.py \ - --train_dir=${TRAIN_DIR} \ - --dataset_name=mnist \ - --dataset_split_name=train \ - --dataset_dir=${DATASET_DIR} \ - --model_name=lenet \ - --preprocessing_name=lenet \ - --max_number_of_steps=20000 \ - --batch_size=50 \ - --learning_rate=0.01 \ - --save_interval_secs=60 \ - --save_summaries_secs=60 \ - --log_every_n_steps=100 \ - --optimizer=sgd \ - --learning_rate_decay_type=fixed \ - --weight_decay=0 - -# Run evaluation. -python eval_image_classifier.py \ - --checkpoint_path=${TRAIN_DIR} \ - --eval_dir=${TRAIN_DIR} \ - --dataset_name=mnist \ - --dataset_split_name=test \ - --dataset_dir=${DATASET_DIR} \ - --model_name=lenet diff --git a/examples/slim/slim_walkthough.ipynb b/examples/slim/slim_walkthough.ipynb deleted file mode 100644 index 5804c7b2..00000000 --- a/examples/slim/slim_walkthough.ipynb +++ /dev/null @@ -1,1023 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# TF-Slim Walkthrough\n", - "\n", - "This notebook will walk you through the basics of using TF-Slim to define, train and evaluate neural networks on various tasks. It assumes a basic knowledge of neural networks. " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Table of contents\n", - "\n", - "Installation and setup
\n", - "Creating your first neural network with TF-Slim
\n", - "Reading Data with TF-Slim
\n", - "Training a convolutional neural network (CNN)
\n", - "Using pre-trained models
\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Installation and setup\n", - "\n", - "\n", - "As of 8/28/16, the latest stable release of TF is r0.10, which does not contain the latest version of slim.\n", - "To obtain the latest version of TF-Slim, please install the most recent nightly build of TF\n", - "as explained [here](https://github.com/tensorflow/models/tree/master/slim#installing-latest-version-of-tf-slim).\n", - "\n", - "To use TF-Slim for image classification (as we do in this notebook), you also have to install the TF-Slim image models library from [here](https://github.com/tensorflow/models/tree/master/slim). Let's suppose you install this into a directory called TF_MODELS. Then you should change directory to TF_MODELS/slim **before** running this notebook, so that these files are in your python path.\n", - "\n", - "To check you've got these two steps to work, just execute the cell below. If it complains about unknown modules, restart the notebook after moving to the TF-Slim models directory.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import matplotlib\n", - "%matplotlib inline\n", - "import matplotlib.pyplot as plt\n", - "import math\n", - "import numpy as np\n", - "import tensorflow as tf\n", - "import time\n", - "\n", - "from datasets import dataset_utils\n", - "\n", - "# Main slim library\n", - "slim = tf.contrib.slim" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Creating your first neural network with TF-Slim\n", - "\n", - "\n", - "Below we give some code to create a simple multilayer perceptron (MLP) which can be used\n", - "for regression problems. The model has 2 hidden layers.\n", - "The output is a single node. \n", - "When this function is called, it will create various nodes, and silently add them to whichever global TF graph is currently in scope. When a node which corresponds to a layer with adjustable parameters (eg., a fully connected layer) is created, additional parameter variable nodes are silently created, and added to the graph. (We will discuss how to train the parameters later.)\n", - "\n", - "We use variable scope to put all the nodes under a common name,\n", - "so that the graph has some hierarchical structure.\n", - "This is useful when we want to visualize the TF graph in tensorboard, or if we want to query related\n", - "variables. \n", - "The fully connected layers all use the same L2 weight decay and ReLu activations, as specified by **arg_scope**. (However, the final layer overrides these defaults, and uses an identity activation function.)\n", - "\n", - "We also illustrate how to add a dropout layer after the first fully connected layer (FC1). Note that at test time, \n", - "we do not drop out nodes, but instead use the average activations; hence we need to know whether the model is being\n", - "constructed for training or testing, since the computational graph will be different in the two cases\n", - "(although the variables, storing the model parameters, will be shared, since they have the same name/scope)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def regression_model(inputs, is_training=True, scope=\"deep_regression\"):\n", - " \"\"\"Creates the regression model.\n", - "\n", - " Args:\n", - " inputs: A node that yields a `Tensor` of size [batch_size, dimensions].\n", - " is_training: Whether or not we're currently training the model.\n", - " scope: An optional variable_op scope for the model.\n", - "\n", - " Returns:\n", - " predictions: 1-D `Tensor` of shape [batch_size] of responses.\n", - " end_points: A dict of end points representing the hidden layers.\n", - " \"\"\"\n", - " with tf.variable_scope(scope, 'deep_regression', [inputs]):\n", - " end_points = {}\n", - " # Set the default weight _regularizer and acvitation for each fully_connected layer.\n", - " with slim.arg_scope([slim.fully_connected],\n", - " activation_fn=tf.nn.relu,\n", - " weights_regularizer=slim.l2_regularizer(0.01)):\n", - "\n", - " # Creates a fully connected layer from the inputs with 32 hidden units.\n", - " net = slim.fully_connected(inputs, 32, scope='fc1')\n", - " end_points['fc1'] = net\n", - "\n", - " # Adds a dropout layer to prevent over-fitting.\n", - " net = slim.dropout(net, 0.8, is_training=is_training)\n", - "\n", - " # Adds another fully connected layer with 16 hidden units.\n", - " net = slim.fully_connected(net, 16, scope='fc2')\n", - " end_points['fc2'] = net\n", - "\n", - " # Creates a fully-connected layer with a single hidden unit. Note that the\n", - " # layer is made linear by setting activation_fn=None.\n", - " predictions = slim.fully_connected(net, 1, activation_fn=None, scope='prediction')\n", - " end_points['out'] = predictions\n", - "\n", - " return predictions, end_points" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Let's create the model and examine its structure.\n", - "\n", - "We create a TF graph and call regression_model(), which adds nodes (tensors) to the graph. We then examine their shape, and print the names of all the model variables which have been implicitly created inside of each layer. We see that the names of the variables follow the scopes that we specified." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "with tf.Graph().as_default():\n", - " # Dummy placeholders for arbitrary number of 1d inputs and outputs\n", - " inputs = tf.placeholder(tf.float32, shape=(None, 1))\n", - " outputs = tf.placeholder(tf.float32, shape=(None, 1))\n", - "\n", - " # Build model\n", - " predictions, end_points = regression_model(inputs)\n", - "\n", - " # Print name and shape of each tensor.\n", - " print \"Layers\"\n", - " for k, v in end_points.iteritems():\n", - " print 'name = {}, shape = {}'.format(v.name, v.get_shape())\n", - "\n", - " # Print name and shape of parameter nodes (values not yet initialized)\n", - " print \"\\n\"\n", - " print \"Parameters\"\n", - " for v in slim.get_model_variables():\n", - " print 'name = {}, shape = {}'.format(v.name, v.get_shape())\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Let's create some 1d regression data .\n", - "\n", - "We will train and test the model on some noisy observations of a nonlinear function.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "def produce_batch(batch_size, noise=0.3):\n", - " xs = np.random.random(size=[batch_size, 1]) * 10\n", - " ys = np.sin(xs) + 5 + np.random.normal(size=[batch_size, 1], scale=noise)\n", - " return [xs.astype(np.float32), ys.astype(np.float32)]\n", - "\n", - "x_train, y_train = produce_batch(200)\n", - "x_test, y_test = produce_batch(200)\n", - "plt.scatter(x_train, y_train)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Let's fit the model to the data\n", - "\n", - "The user has to specify the loss function and the optimizer, and slim does the rest.\n", - "In particular, the slim.learning.train function does the following:\n", - "\n", - "- For each iteration, evaluate the train_op, which updates the parameters using the optimizer applied to the current minibatch. Also, update the global_step.\n", - "- Occasionally store the model checkpoint in the specified directory. This is useful in case your machine crashes - then you can simply restart from the specified checkpoint." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def convert_data_to_tensors(x, y):\n", - " inputs = tf.constant(x)\n", - " inputs.set_shape([None, 1])\n", - " \n", - " outputs = tf.constant(y)\n", - " outputs.set_shape([None, 1])\n", - " return inputs, outputs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "# The following snippet trains the regression model using a sum_of_squares loss.\n", - "ckpt_dir = '/tmp/regression_model/'\n", - "\n", - "with tf.Graph().as_default():\n", - " tf.logging.set_verbosity(tf.logging.INFO)\n", - " \n", - " inputs, targets = convert_data_to_tensors(x_train, y_train)\n", - "\n", - " # Make the model.\n", - " predictions, nodes = regression_model(inputs, is_training=True)\n", - "\n", - " # Add the loss function to the graph.\n", - " loss = slim.losses.sum_of_squares(predictions, targets)\n", - " \n", - " # The total loss is the uers's loss plus any regularization losses.\n", - " total_loss = slim.losses.get_total_loss()\n", - "\n", - " # Specify the optimizer and create the train op:\n", - " optimizer = tf.train.AdamOptimizer(learning_rate=0.005)\n", - " train_op = slim.learning.create_train_op(total_loss, optimizer) \n", - "\n", - " # Run the training inside a session.\n", - " final_loss = slim.learning.train(\n", - " train_op,\n", - " logdir=ckpt_dir,\n", - " number_of_steps=5000,\n", - " save_summaries_secs=5,\n", - " log_every_n_steps=500)\n", - " \n", - "print(\"Finished training. Last batch loss:\", final_loss)\n", - "print(\"Checkpoint saved in %s\" % ckpt_dir)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Training with multiple loss functions.\n", - "\n", - "Sometimes we have multiple objectives we want to simultaneously optimize.\n", - "In slim, it is easy to add more losses, as we show below. (We do not optimize the total loss in this example,\n", - "but we show how to compute it.)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "with tf.Graph().as_default():\n", - " inputs, targets = convert_data_to_tensors(x_train, y_train)\n", - " predictions, end_points = regression_model(inputs, is_training=True)\n", - "\n", - " # Add multiple loss nodes.\n", - " sum_of_squares_loss = slim.losses.sum_of_squares(predictions, targets)\n", - " absolute_difference_loss = slim.losses.absolute_difference(predictions, targets)\n", - "\n", - " # The following two ways to compute the total loss are equivalent\n", - " regularization_loss = tf.add_n(slim.losses.get_regularization_losses())\n", - " total_loss1 = sum_of_squares_loss + absolute_difference_loss + regularization_loss\n", - "\n", - " # Regularization Loss is included in the total loss by default.\n", - " # This is good for training, but not for testing.\n", - " total_loss2 = slim.losses.get_total_loss(add_regularization_losses=True)\n", - " \n", - " init_op = tf.initialize_all_variables()\n", - " \n", - " with tf.Session() as sess:\n", - " sess.run(init_op) # Will initialize the parameters with random weights.\n", - " \n", - " total_loss1, total_loss2 = sess.run([total_loss1, total_loss2])\n", - " \n", - " print('Total Loss1: %f' % total_loss1)\n", - " print('Total Loss2: %f' % total_loss2)\n", - "\n", - " print('Regularization Losses:')\n", - " for loss in slim.losses.get_regularization_losses():\n", - " print(loss)\n", - "\n", - " print('Loss Functions:')\n", - " for loss in slim.losses.get_losses():\n", - " print(loss)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Let's load the saved model and use it for prediction." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "with tf.Graph().as_default():\n", - " inputs, targets = convert_data_to_tensors(x_test, y_test)\n", - " \n", - " # Create the model structure. (Parameters will be loaded below.)\n", - " predictions, end_points = regression_model(inputs, is_training=False)\n", - "\n", - " # Make a session which restores the old parameters from a checkpoint.\n", - " sv = tf.train.Supervisor(logdir=ckpt_dir)\n", - " with sv.managed_session() as sess:\n", - " inputs, predictions, targets = sess.run([inputs, predictions, targets])\n", - "\n", - "plt.scatter(inputs, targets, c='r');\n", - "plt.scatter(inputs, predictions, c='b');\n", - "plt.title('red=true, blue=predicted')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Let's compute various evaluation metrics on the test set.\n", - "\n", - "In TF-Slim termiology, losses are optimized, but metrics (which may not be differentiable, e.g., precision and recall) are just measured. As an illustration, the code below computes mean squared error and mean absolute error metrics on the test set.\n", - "\n", - "Each metric declaration creates several local variables (which must be initialized via tf.initialize_local_variables()) and returns both a value_op and an update_op. When evaluated, the value_op returns the current value of the metric. The update_op loads a new batch of data, runs the model, obtains the predictions and accumulates the metric statistics appropriately before returning the current value of the metric. We store these value nodes and update nodes in 2 dictionaries.\n", - "\n", - "After creating the metric nodes, we can pass them to slim.evaluation.evaluation, which repeatedly evaluates these nodes the specified number of times. (This allows us to compute the evaluation in a streaming fashion across minibatches, which is usefulf for large datasets.) Finally, we print the final value of each metric.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "with tf.Graph().as_default():\n", - " inputs, targets = convert_data_to_tensors(x_test, y_test)\n", - " predictions, end_points = regression_model(inputs, is_training=False)\n", - "\n", - " # Specify metrics to evaluate:\n", - " names_to_value_nodes, names_to_update_nodes = slim.metrics.aggregate_metric_map({\n", - " 'Mean Squared Error': slim.metrics.streaming_mean_squared_error(predictions, targets),\n", - " 'Mean Absolute Error': slim.metrics.streaming_mean_absolute_error(predictions, targets)\n", - " })\n", - "\n", - " # Make a session which restores the old graph parameters, and then run eval.\n", - " sv = tf.train.Supervisor(logdir=ckpt_dir)\n", - " with sv.managed_session() as sess:\n", - " metric_values = slim.evaluation.evaluation(\n", - " sess,\n", - " num_evals=1, # Single pass over data\n", - " eval_op=names_to_update_nodes.values(),\n", - " final_op=names_to_value_nodes.values())\n", - "\n", - " names_to_values = dict(zip(names_to_value_nodes.keys(), metric_values))\n", - " for key, value in names_to_values.iteritems():\n", - " print('%s: %f' % (key, value))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Reading Data with TF-Slim\n", - "\n", - "\n", - "Reading data with TF-Slim has two main components: A\n", - "[Dataset](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/slim/python/slim/data/dataset.py) and a \n", - "[DatasetDataProvider](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py). The former is a descriptor of a dataset, while the latter performs the actions necessary for actually reading the data. Lets look at each one in detail:\n", - "\n", - "\n", - "## Dataset\n", - "A TF-Slim\n", - "[Dataset](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/slim/python/slim/data/dataset.py)\n", - "contains descriptive information about a dataset necessary for reading it, such as the list of data files and how to decode them. It also contains metadata including class labels, the size of the train/test splits and descriptions of the tensors that the dataset provides. For example, some datasets contain images with labels. Others augment this data with bounding box annotations, etc. The Dataset object allows us to write generic code using the same API, regardless of the data content and encoding type.\n", - "\n", - "TF-Slim's Dataset works especially well when the data is stored as a (possibly sharded)\n", - "[TFRecords file](https://www.tensorflow.org/versions/r0.10/how_tos/reading_data/index.html#file-formats), where each record contains a [tf.train.Example protocol buffer](https://github.com/tensorflow/tensorflow/blob/r0.10/tensorflow/core/example/example.proto).\n", - "TF-Slim uses a consistent convention for naming the keys and values inside each Example record. \n", - "\n", - "## DatasetDataProvider\n", - "\n", - "A\n", - "[DatasetDataProvider](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py) is a class which actually reads the data from a dataset. It is highly configurable to read the data in various ways that may make a big impact on the efficiency of your training process. For example, it can be single or multi-threaded. If your data is sharded across many files, it can read each files serially, or from every file simultaneously.\n", - "\n", - "## Demo: The Flowers Dataset\n", - "\n", - "For convenience, we've include scripts to convert several common image datasets into TFRecord format and have provided\n", - "the Dataset descriptor files necessary for reading them. We demonstrate how easy it is to use these dataset via the Flowers dataset below." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Download the Flowers Dataset\n", - "\n", - "\n", - "We've made available a tarball of the Flowers dataset which has already been converted to TFRecord format." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import tensorflow as tf\n", - "from datasets import dataset_utils\n", - "\n", - "url = \"http://download.tensorflow.org/data/flowers.tar.gz\"\n", - "flowers_data_dir = '/tmp/flowers'\n", - "\n", - "if not tf.gfile.Exists(flowers_data_dir):\n", - " tf.gfile.MakeDirs(flowers_data_dir)\n", - "\n", - "dataset_utils.download_and_uncompress_tarball(url, flowers_data_dir) " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Display some of the data." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "from datasets import flowers\n", - "import tensorflow as tf\n", - "\n", - "slim = tf.contrib.slim\n", - "\n", - "with tf.Graph().as_default(): \n", - " dataset = flowers.get_split('train', flowers_data_dir)\n", - " data_provider = slim.dataset_data_provider.DatasetDataProvider(\n", - " dataset, common_queue_capacity=32, common_queue_min=1)\n", - " image, label = data_provider.get(['image', 'label'])\n", - " \n", - " with tf.Session() as sess: \n", - " with slim.queues.QueueRunners(sess):\n", - " for i in xrange(4):\n", - " np_image, np_label = sess.run([image, label])\n", - " height, width, _ = np_image.shape\n", - " class_name = name = dataset.labels_to_names[np_label]\n", - " \n", - " plt.figure()\n", - " plt.imshow(np_image)\n", - " plt.title('%s, %d x %d' % (name, height, width))\n", - " plt.axis('off')\n", - " plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Convolutional neural nets (CNNs).\n", - "\n", - "\n", - "In this section, we show how to train an image classifier using a simple CNN.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Define the model.\n", - "\n", - "Below we define a simple CNN. Note that the output layer is linear function - we will apply softmax transformation externally to the model, either in the loss function (for training), or in the prediction function (during testing)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "def my_cnn(images, num_classes, is_training): # is_training is not used...\n", - " with slim.arg_scope([slim.max_pool2d], kernel_size=[3, 3], stride=2):\n", - " net = slim.conv2d(images, 64, [5, 5])\n", - " net = slim.max_pool2d(net)\n", - " net = slim.conv2d(net, 64, [5, 5])\n", - " net = slim.max_pool2d(net)\n", - " net = slim.flatten(net)\n", - " net = slim.fully_connected(net, 192)\n", - " net = slim.fully_connected(net, num_classes, activation_fn=None) \n", - " return net" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Apply the model to some randomly generated images." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import tensorflow as tf\n", - "\n", - "with tf.Graph().as_default():\n", - " # The model can handle any input size because the first layer is convolutional.\n", - " # The size of the model is determined when image_node is first passed into the my_cnn function.\n", - " # Once the variables are initialized, the size of all the weight matrices is fixed.\n", - " # Because of the fully connected layers, this means that all subsequent images must have the same\n", - " # input size as the first image.\n", - " batch_size, height, width, channels = 3, 28, 28, 3\n", - " images = tf.random_uniform([batch_size, height, width, channels], maxval=1)\n", - " \n", - " # Create the model.\n", - " num_classes = 10\n", - " logits = my_cnn(images, num_classes, is_training=True)\n", - " probabilities = tf.nn.softmax(logits)\n", - " \n", - " # Initialize all the variables (including parameters) randomly.\n", - " init_op = tf.initialize_all_variables()\n", - " \n", - " with tf.Session() as sess:\n", - " # Run the init_op, evaluate the model outputs and print the results:\n", - " sess.run(init_op)\n", - " probabilities = sess.run(probabilities)\n", - " \n", - "print('Probabilities Shape:')\n", - "print(probabilities.shape) # batch_size x num_classes \n", - "\n", - "print('\\nProbabilities:')\n", - "print(probabilities)\n", - "\n", - "print('\\nSumming across all classes (Should equal 1):')\n", - "print(np.sum(probabilities, 1)) # Each row sums to 1" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Train the model on the Flowers dataset.\n", - "\n", - "Before starting, make sure you've run the code to Download the Flowers dataset. Now, we'll get a sense of what it looks like to use TF-Slim's training functions found in\n", - "[learning.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/slim/python/slim/learning.py). First, we'll create a function, `load_batch`, that loads batches of dataset from a dataset. Next, we'll train a model for a single step (just to demonstrate the API), and evaluate the results." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "from preprocessing import inception_preprocessing\n", - "import tensorflow as tf\n", - "\n", - "slim = tf.contrib.slim\n", - "\n", - "\n", - "def load_batch(dataset, batch_size=32, height=299, width=299, is_training=False):\n", - " \"\"\"Loads a single batch of data.\n", - " \n", - " Args:\n", - " dataset: The dataset to load.\n", - " batch_size: The number of images in the batch.\n", - " height: The size of each image after preprocessing.\n", - " width: The size of each image after preprocessing.\n", - " is_training: Whether or not we're currently training or evaluating.\n", - " \n", - " Returns:\n", - " images: A Tensor of size [batch_size, height, width, 3], image samples that have been preprocessed.\n", - " images_raw: A Tensor of size [batch_size, height, width, 3], image samples that can be used for visualization.\n", - " labels: A Tensor of size [batch_size], whose values range between 0 and dataset.num_classes.\n", - " \"\"\"\n", - " data_provider = slim.dataset_data_provider.DatasetDataProvider(\n", - " dataset, common_queue_capacity=32,\n", - " common_queue_min=8)\n", - " image_raw, label = data_provider.get(['image', 'label'])\n", - " \n", - " # Preprocess image for usage by Inception.\n", - " image = inception_preprocessing.preprocess_image(image_raw, height, width, is_training=is_training)\n", - " \n", - " # Preprocess the image for display purposes.\n", - " image_raw = tf.expand_dims(image_raw, 0)\n", - " image_raw = tf.image.resize_images(image_raw, [height, width])\n", - " image_raw = tf.squeeze(image_raw)\n", - "\n", - " # Batch it up.\n", - " images, images_raw, labels = tf.train.batch(\n", - " [image, image_raw, label],\n", - " batch_size=batch_size,\n", - " num_threads=1,\n", - " capacity=2 * batch_size)\n", - " \n", - " return images, images_raw, labels" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "from datasets import flowers\n", - "\n", - "# This might take a few minutes.\n", - "train_dir = '/tmp/tfslim_model/'\n", - "print('Will save model to %s' % train_dir)\n", - "\n", - "with tf.Graph().as_default():\n", - " tf.logging.set_verbosity(tf.logging.INFO)\n", - "\n", - " dataset = flowers.get_split('train', flowers_data_dir)\n", - " images, _, labels = load_batch(dataset)\n", - " \n", - " # Create the model:\n", - " logits = my_cnn(images, num_classes=dataset.num_classes, is_training=True)\n", - " \n", - " # Specify the loss function:\n", - " one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes)\n", - " slim.losses.softmax_cross_entropy(logits, one_hot_labels)\n", - " total_loss = slim.losses.get_total_loss()\n", - "\n", - " # Create some summaries to visualize the training process:\n", - " tf.scalar_summary('losses/Total Loss', total_loss)\n", - " \n", - " # Specify the optimizer and create the train op:\n", - " optimizer = tf.train.AdamOptimizer(learning_rate=0.01)\n", - " train_op = slim.learning.create_train_op(total_loss, optimizer)\n", - "\n", - " # Run the training:\n", - " final_loss = slim.learning.train(\n", - " train_op,\n", - " logdir=train_dir,\n", - " number_of_steps=1, # For speed, we just do 1 epoch\n", - " save_summaries_secs=1)\n", - " \n", - " print('Finished training. Final batch loss %d' % final_loss)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Evaluate some metrics.\n", - "\n", - "As we discussed above, we can compute various metrics besides the loss.\n", - "Below we show how to compute prediction accuracy of the trained model, as well as top-5 classification accuracy. (The difference between evaluation and evaluation_loop is that the latter writes the results to a log directory, so they can be viewed in tensorboard.)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "from datasets import flowers\n", - "\n", - "# This might take a few minutes.\n", - "with tf.Graph().as_default():\n", - " tf.logging.set_verbosity(tf.logging.DEBUG)\n", - " \n", - " dataset = flowers.get_split('train', flowers_data_dir)\n", - " images, _, labels = load_batch(dataset)\n", - " \n", - " logits = my_cnn(images, num_classes=dataset.num_classes, is_training=False)\n", - " predictions = tf.argmax(logits, 1)\n", - " \n", - " # Define the metrics:\n", - " names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({\n", - " 'eval/Accuracy': slim.metrics.streaming_accuracy(predictions, labels),\n", - " 'eval/Recall@5': slim.metrics.streaming_recall_at_k(logits, labels, 5),\n", - " })\n", - "\n", - " print('Running evaluation Loop...')\n", - " checkpoint_path = tf.train.latest_checkpoint(train_dir)\n", - " metric_values = slim.evaluation.evaluate_once(\n", - " master='',\n", - " checkpoint_path=checkpoint_path,\n", - " logdir=train_dir,\n", - " eval_op=names_to_updates.values(),\n", - " final_op=names_to_values.values())\n", - "\n", - " names_to_values = dict(zip(names_to_values.keys(), metric_values))\n", - " for name in names_to_values:\n", - " print('%s: %f' % (name, names_to_values[name]))\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Using pre-trained models\n", - "\n", - "\n", - "Neural nets work best when they have many parameters, making them very flexible function approximators.\n", - "However, this means they must be trained on big datasets. Since this process is slow, we provide various pre-trained models - see the list [here](https://github.com/tensorflow/models/tree/master/slim#pre-trained-models).\n", - "\n", - "\n", - "You can either use these models as-is, or you can perform \"surgery\" on them, to modify them for some other task. For example, it is common to \"chop off\" the final pre-softmax layer, and replace it with a new set of weights corresponding to some new set of labels. You can then quickly fine tune the new model on a small new dataset. We illustrate this below, using inception-v1 as the base model. While models like Inception V3 are more powerful, Inception V1 is used for speed purposes.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Download the Inception V1 checkpoint\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "from datasets import dataset_utils\n", - "\n", - "url = \"http://download.tensorflow.org/models/inception_v1_2016_08_28.tar.gz\"\n", - "checkpoints_dir = '/tmp/checkpoints'\n", - "\n", - "if not tf.gfile.Exists(checkpoints_dir):\n", - " tf.gfile.MakeDirs(checkpoints_dir)\n", - "\n", - "dataset_utils.download_and_uncompress_tarball(url, checkpoints_dir)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "### Apply Pre-trained model to Images.\n", - "\n", - "We have to convert each image to the size expected by the model checkpoint.\n", - "There is no easy way to determine this size from the checkpoint itself.\n", - "So we use a preprocessor to enforce this." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "import os\n", - "import tensorflow as tf\n", - "import urllib2\n", - "\n", - "from datasets import imagenet\n", - "from nets import inception\n", - "from preprocessing import inception_preprocessing\n", - "\n", - "slim = tf.contrib.slim\n", - "\n", - "batch_size = 3\n", - "image_size = inception.inception_v1.default_image_size\n", - "\n", - "with tf.Graph().as_default():\n", - " url = 'https://upload.wikimedia.org/wikipedia/commons/7/70/EnglishCockerSpaniel_simon.jpg'\n", - " image_string = urllib2.urlopen(url).read()\n", - " image = tf.image.decode_jpeg(image_string, channels=3)\n", - " processed_image = inception_preprocessing.preprocess_image(image, image_size, image_size, is_training=False)\n", - " processed_images = tf.expand_dims(processed_image, 0)\n", - " \n", - " # Create the model, use the default arg scope to configure the batch norm parameters.\n", - " with slim.arg_scope(inception.inception_v1_arg_scope()):\n", - " logits, _ = inception.inception_v1(processed_images, num_classes=1001, is_training=False)\n", - " probabilities = tf.nn.softmax(logits)\n", - " \n", - " init_fn = slim.assign_from_checkpoint_fn(\n", - " os.path.join(checkpoints_dir, 'inception_v1.ckpt'),\n", - " slim.get_model_variables('InceptionV1'))\n", - " \n", - " with tf.Session() as sess:\n", - " init_fn(sess)\n", - " np_image, probabilities = sess.run([image, probabilities])\n", - " probabilities = probabilities[0, 0:]\n", - " sorted_inds = [i[0] for i in sorted(enumerate(-probabilities), key=lambda x:x[1])]\n", - " \n", - " plt.figure()\n", - " plt.imshow(np_image.astype(np.uint8))\n", - " plt.axis('off')\n", - " plt.show()\n", - "\n", - " names = imagenet.create_readable_names_for_imagenet_labels()\n", - " for i in range(5):\n", - " index = sorted_inds[i]\n", - " print('Probability %0.2f%% => [%s]' % (probabilities[index], names[index]))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Fine-tune the model on a different set of labels.\n", - "\n", - "We will fine tune the inception model on the Flowers dataset." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "# Note that this may take several minutes.\n", - "\n", - "import os\n", - "\n", - "from datasets import flowers\n", - "from nets import inception\n", - "from preprocessing import inception_preprocessing\n", - "\n", - "slim = tf.contrib.slim\n", - "image_size = inception.inception_v1.default_image_size\n", - "\n", - "\n", - "def get_init_fn():\n", - " \"\"\"Returns a function run by the chief worker to warm-start the training.\"\"\"\n", - " checkpoint_exclude_scopes=[\"InceptionV1/Logits\", \"InceptionV1/AuxLogits\"]\n", - " \n", - " exclusions = [scope.strip() for scope in checkpoint_exclude_scopes]\n", - "\n", - " variables_to_restore = []\n", - " for var in slim.get_model_variables():\n", - " excluded = False\n", - " for exclusion in exclusions:\n", - " if var.op.name.startswith(exclusion):\n", - " excluded = True\n", - " break\n", - " if not excluded:\n", - " variables_to_restore.append(var)\n", - "\n", - " return slim.assign_from_checkpoint_fn(\n", - " os.path.join(checkpoints_dir, 'inception_v1.ckpt'),\n", - " variables_to_restore)\n", - "\n", - "\n", - "train_dir = '/tmp/inception_finetuned/'\n", - "\n", - "with tf.Graph().as_default():\n", - " tf.logging.set_verbosity(tf.logging.INFO)\n", - " \n", - " dataset = flowers.get_split('train', flowers_data_dir)\n", - " images, _, labels = load_batch(dataset, height=image_size, width=image_size)\n", - " \n", - " # Create the model, use the default arg scope to configure the batch norm parameters.\n", - " with slim.arg_scope(inception.inception_v1_arg_scope()):\n", - " logits, _ = inception.inception_v1(images, num_classes=dataset.num_classes, is_training=True)\n", - " \n", - " # Specify the loss function:\n", - " one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes)\n", - " slim.losses.softmax_cross_entropy(logits, one_hot_labels)\n", - " total_loss = slim.losses.get_total_loss()\n", - "\n", - " # Create some summaries to visualize the training process:\n", - " tf.scalar_summary('losses/Total Loss', total_loss)\n", - " \n", - " # Specify the optimizer and create the train op:\n", - " optimizer = tf.train.AdamOptimizer(learning_rate=0.01)\n", - " train_op = slim.learning.create_train_op(total_loss, optimizer)\n", - " \n", - " # Run the training:\n", - " final_loss = slim.learning.train(\n", - " train_op,\n", - " logdir=train_dir,\n", - " init_fn=get_init_fn(),\n", - " number_of_steps=2)\n", - " \n", - " \n", - "print('Finished training. Last batch loss %f' % final_loss)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Apply fine tuned model to some images." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "import tensorflow as tf\n", - "from datasets import flowers\n", - "from nets import inception\n", - "\n", - "slim = tf.contrib.slim\n", - "\n", - "image_size = inception.inception_v1.default_image_size\n", - "batch_size = 3\n", - "\n", - "with tf.Graph().as_default():\n", - " tf.logging.set_verbosity(tf.logging.INFO)\n", - " \n", - " dataset = flowers.get_split('train', flowers_data_dir)\n", - " images, images_raw, labels = load_batch(dataset, height=image_size, width=image_size)\n", - " \n", - " # Create the model, use the default arg scope to configure the batch norm parameters.\n", - " with slim.arg_scope(inception.inception_v1_arg_scope()):\n", - " logits, _ = inception.inception_v1(images, num_classes=dataset.num_classes, is_training=True)\n", - "\n", - " probabilities = tf.nn.softmax(logits)\n", - " \n", - " checkpoint_path = tf.train.latest_checkpoint(train_dir)\n", - " init_fn = slim.assign_from_checkpoint_fn(\n", - " checkpoint_path,\n", - " slim.get_variables_to_restore())\n", - " \n", - " with tf.Session() as sess:\n", - " with slim.queues.QueueRunners(sess):\n", - " sess.run(tf.initialize_local_variables())\n", - " init_fn(sess)\n", - " np_probabilities, np_images_raw, np_labels = sess.run([probabilities, images_raw, labels])\n", - " \n", - " for i in xrange(batch_size): \n", - " image = np_images_raw[i, :, :, :]\n", - " true_label = np_labels[i]\n", - " predicted_label = np.argmax(np_probabilities[i, :])\n", - " predicted_name = dataset.labels_to_names[predicted_label]\n", - " true_name = dataset.labels_to_names[true_label]\n", - " \n", - " plt.figure()\n", - " plt.imshow(image.astype(np.uint8))\n", - " plt.title('Ground Truth: [%s], Prediction [%s]' % (true_name, predicted_name))\n", - " plt.axis('off')\n", - " plt.show()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/examples/slim/train_image_classifier.py b/examples/slim/train_image_classifier.py deleted file mode 100644 index b1cb0b12..00000000 --- a/examples/slim/train_image_classifier.py +++ /dev/null @@ -1,622 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Generic training script that trains a model using a given dataset.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from pyspark.context import SparkContext -from pyspark.conf import SparkConf -from tensorflowonspark import TFCluster, TFNode -from datetime import datetime -import sys - -def main_fun(argv, ctx): - import tensorflow as tf - from tensorflow.python.ops import control_flow_ops - from datasets import dataset_factory - from deployment import model_deploy - from nets import nets_factory - from preprocessing import preprocessing_factory - - sys.argv = argv - - slim = tf.contrib.slim - - tf.app.flags.DEFINE_integer( - 'num_gpus', '1', 'The number of GPUs to use per node') - - tf.app.flags.DEFINE_boolean('rdma', False, 'Whether to use rdma.') - - tf.app.flags.DEFINE_string( - 'master', '', 'The address of the TensorFlow master to use.') - - tf.app.flags.DEFINE_string( - 'train_dir', '/tmp/tfmodel/', - 'Directory where checkpoints and event logs are written to.') - - tf.app.flags.DEFINE_integer('num_clones', 1, - 'Number of model clones to deploy.') - - tf.app.flags.DEFINE_boolean('clone_on_cpu', False, - 'Use CPUs to deploy clones.') - - tf.app.flags.DEFINE_integer('worker_replicas', 1, 'Number of worker replicas.') - - tf.app.flags.DEFINE_integer( - 'num_ps_tasks', 0, - 'The number of parameter servers. If the value is 0, then the parameters ' - 'are handled locally by the worker.') - - tf.app.flags.DEFINE_integer( - 'num_readers', 4, - 'The number of parallel readers that read data from the dataset.') - - tf.app.flags.DEFINE_integer( - 'num_preprocessing_threads', 4, - 'The number of threads used to create the batches.') - - tf.app.flags.DEFINE_integer( - 'log_every_n_steps', 10, - 'The frequency with which logs are print.') - - tf.app.flags.DEFINE_integer( - 'save_summaries_secs', 600, - 'The frequency with which summaries are saved, in seconds.') - - tf.app.flags.DEFINE_integer( - 'save_interval_secs', 600, - 'The frequency with which the model is saved, in seconds.') - - tf.app.flags.DEFINE_integer( - 'task', 0, 'Task id of the replica running the training.') - - ###################### - # Optimization Flags # - ###################### - - tf.app.flags.DEFINE_float( - 'weight_decay', 0.00004, 'The weight decay on the model weights.') - - tf.app.flags.DEFINE_string( - 'optimizer', 'rmsprop', - 'The name of the optimizer, one of "adadelta", "adagrad", "adam",' - '"ftrl", "momentum", "sgd" or "rmsprop".') - - tf.app.flags.DEFINE_float( - 'adadelta_rho', 0.95, - 'The decay rate for adadelta.') - - tf.app.flags.DEFINE_float( - 'adagrad_initial_accumulator_value', 0.1, - 'Starting value for the AdaGrad accumulators.') - - tf.app.flags.DEFINE_float( - 'adam_beta1', 0.9, - 'The exponential decay rate for the 1st moment estimates.') - - tf.app.flags.DEFINE_float( - 'adam_beta2', 0.999, - 'The exponential decay rate for the 2nd moment estimates.') - - tf.app.flags.DEFINE_float('opt_epsilon', 1.0, 'Epsilon term for the optimizer.') - - tf.app.flags.DEFINE_float('ftrl_learning_rate_power', -0.5, - 'The learning rate power.') - - tf.app.flags.DEFINE_float( - 'ftrl_initial_accumulator_value', 0.1, - 'Starting value for the FTRL accumulators.') - - tf.app.flags.DEFINE_float( - 'ftrl_l1', 0.0, 'The FTRL l1 regularization strength.') - - tf.app.flags.DEFINE_float( - 'ftrl_l2', 0.0, 'The FTRL l2 regularization strength.') - - tf.app.flags.DEFINE_float( - 'momentum', 0.9, - 'The momentum for the MomentumOptimizer and RMSPropOptimizer.') - - tf.app.flags.DEFINE_float('rmsprop_decay', 0.9, 'Decay term for RMSProp.') - - ####################### - # Learning Rate Flags # - ####################### - - tf.app.flags.DEFINE_string( - 'learning_rate_decay_type', - 'exponential', - 'Specifies how the learning rate is decayed. One of "fixed", "exponential",' - ' or "polynomial"') - - tf.app.flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.') - - tf.app.flags.DEFINE_float( - 'end_learning_rate', 0.0001, - 'The minimal end learning rate used by a polynomial decay learning rate.') - - tf.app.flags.DEFINE_float( - 'label_smoothing', 0.0, 'The amount of label smoothing.') - - tf.app.flags.DEFINE_float( - 'learning_rate_decay_factor', 0.94, 'Learning rate decay factor.') - - tf.app.flags.DEFINE_float( - 'num_epochs_per_decay', 2.0, - 'Number of epochs after which learning rate decays.') - - tf.app.flags.DEFINE_bool( - 'sync_replicas', False, - 'Whether or not to synchronize the replicas during training.') - - tf.app.flags.DEFINE_integer( - 'replicas_to_aggregate', 1, - 'The Number of gradients to collect before updating params.') - - tf.app.flags.DEFINE_float( - 'moving_average_decay', None, - 'The decay to use for the moving average.' - 'If left as None, then moving averages are not used.') - - ####################### - # Dataset Flags # - ####################### - - tf.app.flags.DEFINE_string( - 'dataset_name', 'imagenet', 'The name of the dataset to load.') - - tf.app.flags.DEFINE_string( - 'dataset_split_name', 'train', 'The name of the train/test split.') - - tf.app.flags.DEFINE_string( - 'dataset_dir', None, 'The directory where the dataset files are stored.') - - tf.app.flags.DEFINE_integer( - 'labels_offset', 0, - 'An offset for the labels in the dataset. This flag is primarily used to ' - 'evaluate the VGG and ResNet architectures which do not use a background ' - 'class for the ImageNet dataset.') - - tf.app.flags.DEFINE_string( - 'model_name', 'inception_v3', 'The name of the architecture to train.') - - tf.app.flags.DEFINE_string( - 'preprocessing_name', None, 'The name of the preprocessing to use. If left ' - 'as `None`, then the model_name flag is used.') - - tf.app.flags.DEFINE_integer( - 'batch_size', 32, 'The number of samples in each batch.') - - tf.app.flags.DEFINE_integer( - 'train_image_size', None, 'Train image size') - - tf.app.flags.DEFINE_integer('max_number_of_steps', None, - 'The maximum number of training steps.') - - ##################### - # Fine-Tuning Flags # - ##################### - - tf.app.flags.DEFINE_string( - 'checkpoint_path', None, - 'The path to a checkpoint from which to fine-tune.') - - tf.app.flags.DEFINE_string( - 'checkpoint_exclude_scopes', None, - 'Comma-separated list of scopes of variables to exclude when restoring ' - 'from a checkpoint.') - - tf.app.flags.DEFINE_string( - 'trainable_scopes', None, - 'Comma-separated list of scopes to filter the set of variables to train.' - 'By default, None would train all the variables.') - - tf.app.flags.DEFINE_boolean( - 'ignore_missing_vars', False, - 'When restoring a checkpoint would ignore missing variables.') - - FLAGS = tf.app.flags.FLAGS - FLAGS.job_name = ctx.job_name - FLAGS.task = ctx.task_index - FLAGS.num_clones = FLAGS.num_gpus - FLAGS.worker_replicas = len(ctx.cluster_spec['worker']) - assert(FLAGS.num_ps_tasks == (len(ctx.cluster_spec['ps']) if 'ps' in ctx.cluster_spec else 0)) - - def _configure_learning_rate(num_samples_per_epoch, global_step): - """Configures the learning rate. - - Args: - num_samples_per_epoch: The number of samples in each epoch of training. - global_step: The global_step tensor. - - Returns: - A `Tensor` representing the learning rate. - - Raises: - ValueError: if - """ - decay_steps = int(num_samples_per_epoch / FLAGS.batch_size * - FLAGS.num_epochs_per_decay) - if FLAGS.sync_replicas: - decay_steps /= FLAGS.replicas_to_aggregate - - if FLAGS.learning_rate_decay_type == 'exponential': - return tf.train.exponential_decay(FLAGS.learning_rate, - global_step, - decay_steps, - FLAGS.learning_rate_decay_factor, - staircase=True, - name='exponential_decay_learning_rate') - elif FLAGS.learning_rate_decay_type == 'fixed': - return tf.constant(FLAGS.learning_rate, name='fixed_learning_rate') - elif FLAGS.learning_rate_decay_type == 'polynomial': - return tf.train.polynomial_decay(FLAGS.learning_rate, - global_step, - decay_steps, - FLAGS.end_learning_rate, - power=1.0, - cycle=False, - name='polynomial_decay_learning_rate') - else: - raise ValueError('learning_rate_decay_type [%s] was not recognized', - FLAGS.learning_rate_decay_type) - - - def _configure_optimizer(learning_rate): - """Configures the optimizer used for training. - - Args: - learning_rate: A scalar or `Tensor` learning rate. - - Returns: - An instance of an optimizer. - - Raises: - ValueError: if FLAGS.optimizer is not recognized. - """ - if FLAGS.optimizer == 'adadelta': - optimizer = tf.train.AdadeltaOptimizer( - learning_rate, - rho=FLAGS.adadelta_rho, - epsilon=FLAGS.opt_epsilon) - elif FLAGS.optimizer == 'adagrad': - optimizer = tf.train.AdagradOptimizer( - learning_rate, - initial_accumulator_value=FLAGS.adagrad_initial_accumulator_value) - elif FLAGS.optimizer == 'adam': - optimizer = tf.train.AdamOptimizer( - learning_rate, - beta1=FLAGS.adam_beta1, - beta2=FLAGS.adam_beta2, - epsilon=FLAGS.opt_epsilon) - elif FLAGS.optimizer == 'ftrl': - optimizer = tf.train.FtrlOptimizer( - learning_rate, - learning_rate_power=FLAGS.ftrl_learning_rate_power, - initial_accumulator_value=FLAGS.ftrl_initial_accumulator_value, - l1_regularization_strength=FLAGS.ftrl_l1, - l2_regularization_strength=FLAGS.ftrl_l2) - elif FLAGS.optimizer == 'momentum': - optimizer = tf.train.MomentumOptimizer( - learning_rate, - momentum=FLAGS.momentum, - name='Momentum') - elif FLAGS.optimizer == 'rmsprop': - optimizer = tf.train.RMSPropOptimizer( - learning_rate, - decay=FLAGS.rmsprop_decay, - momentum=FLAGS.momentum, - epsilon=FLAGS.opt_epsilon) - elif FLAGS.optimizer == 'sgd': - optimizer = tf.train.GradientDescentOptimizer(learning_rate) - else: - raise ValueError('Optimizer [%s] was not recognized', FLAGS.optimizer) - return optimizer - - - def _add_variables_summaries(learning_rate): - summaries = [] - for variable in slim.get_model_variables(): - summaries.append(tf.summary.histogram(variable.op.name, variable)) - summaries.append(tf.summary.scalar('training/Learning Rate', learning_rate)) - return summaries - - - def _get_init_fn(): - """Returns a function run by the chief worker to warm-start the training. - - Note that the init_fn is only run when initializing the model during the very - first global step. - - Returns: - An init function run by the supervisor. - """ - if FLAGS.checkpoint_path is None: - return None - - # Warn the user if a checkpoint exists in the train_dir. Then we'll be - # ignoring the checkpoint anyway. - if tf.train.latest_checkpoint(FLAGS.train_dir): - tf.logging.info( - 'Ignoring --checkpoint_path because a checkpoint already exists in %s' - % FLAGS.train_dir) - return None - - exclusions = [] - if FLAGS.checkpoint_exclude_scopes: - exclusions = [scope.strip() - for scope in FLAGS.checkpoint_exclude_scopes.split(',')] - - # TODO(sguada) variables.filter_variables() - variables_to_restore = [] - for var in slim.get_model_variables(): - excluded = False - for exclusion in exclusions: - if var.op.name.startswith(exclusion): - excluded = True - break - if not excluded: - variables_to_restore.append(var) - - if tf.gfile.IsDirectory(FLAGS.checkpoint_path): - checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) - else: - checkpoint_path = FLAGS.checkpoint_path - - tf.logging.info('Fine-tuning from %s' % checkpoint_path) - - return slim.assign_from_checkpoint_fn( - checkpoint_path, - variables_to_restore, - ignore_missing_vars=FLAGS.ignore_missing_vars) - - - def _get_variables_to_train(): - """Returns a list of variables to train. - - Returns: - A list of variables to train by the optimizer. - """ - if FLAGS.trainable_scopes is None: - return tf.trainable_variables() - else: - scopes = [scope.strip() for scope in FLAGS.trainable_scopes.split(',')] - - variables_to_train = [] - for scope in scopes: - variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope) - variables_to_train.extend(variables) - return variables_to_train - - # main - cluster_spec, server = TFNode.start_cluster_server(ctx=ctx, num_gpus=FLAGS.num_gpus, rdma=FLAGS.rdma) - if ctx.job_name == 'ps': - # `ps` jobs wait for incoming connections from the workers. - server.join() - else: - # `worker` jobs will actually do the work. - if not FLAGS.dataset_dir: - raise ValueError('You must supply the dataset directory with --dataset_dir') - - tf.logging.set_verbosity(tf.logging.INFO) - with tf.Graph().as_default(): - ####################### - # Config model_deploy # - ####################### - deploy_config = model_deploy.DeploymentConfig( - num_clones=FLAGS.num_clones, - clone_on_cpu=FLAGS.clone_on_cpu, - replica_id=FLAGS.task, - num_replicas=FLAGS.worker_replicas, - num_ps_tasks=FLAGS.num_ps_tasks) - - # Create global_step - #with tf.device(deploy_config.variables_device()): - # global_step = slim.create_global_step() - with tf.device("/job:ps/task:0"): - global_step = tf.Variable(0, name="global_step") - - ###################### - # Select the dataset # - ###################### - dataset = dataset_factory.get_dataset( - FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) - - ###################### - # Select the network # - ###################### - network_fn = nets_factory.get_network_fn( - FLAGS.model_name, - num_classes=(dataset.num_classes - FLAGS.labels_offset), - weight_decay=FLAGS.weight_decay, - is_training=True) - - ##################################### - # Select the preprocessing function # - ##################################### - preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name - image_preprocessing_fn = preprocessing_factory.get_preprocessing( - preprocessing_name, - is_training=True) - - ############################################################## - # Create a dataset provider that loads data from the dataset # - ############################################################## - with tf.device(deploy_config.inputs_device()): - provider = slim.dataset_data_provider.DatasetDataProvider( - dataset, - num_readers=FLAGS.num_readers, - common_queue_capacity=20 * FLAGS.batch_size, - common_queue_min=10 * FLAGS.batch_size) - [image, label] = provider.get(['image', 'label']) - label -= FLAGS.labels_offset - - train_image_size = FLAGS.train_image_size or network_fn.default_image_size - - image = image_preprocessing_fn(image, train_image_size, train_image_size) - - images, labels = tf.train.batch( - [image, label], - batch_size=FLAGS.batch_size, - num_threads=FLAGS.num_preprocessing_threads, - capacity=5 * FLAGS.batch_size) - labels = slim.one_hot_encoding( - labels, dataset.num_classes - FLAGS.labels_offset) - batch_queue = slim.prefetch_queue.prefetch_queue( - [images, labels], capacity=2 * deploy_config.num_clones) - - #################### - # Define the model # - #################### - def clone_fn(batch_queue): - """Allows data parallelism by creating multiple clones of network_fn.""" - images, labels = batch_queue.dequeue() - logits, end_points = network_fn(images) - - ############################# - # Specify the loss function # - ############################# - if 'AuxLogits' in end_points: - tf.losses.softmax_cross_entropy( - logits=end_points['AuxLogits'], onehot_labels=labels, - label_smoothing=FLAGS.label_smoothing, weights=0.4, scope='aux_loss') - tf.losses.softmax_cross_entropy( - logits=logits, onehot_labels=labels, - label_smoothing=FLAGS.label_smoothing, weights=1.0) - return end_points - - # Gather initial summaries. - summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) - - clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue]) - first_clone_scope = deploy_config.clone_scope(0) - # Gather update_ops from the first clone. These contain, for example, - # the updates for the batch_norm variables created by network_fn. - update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) - - # Add summaries for end_points. - end_points = clones[0].outputs - for end_point in end_points: - x = end_points[end_point] - summaries.add(tf.summary.histogram('activations/' + end_point, x)) - summaries.add(tf.summary.scalar('sparsity/' + end_point, - tf.nn.zero_fraction(x))) - - # Add summaries for losses. - for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): - summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) - - # Add summaries for variables. - for variable in slim.get_model_variables(): - summaries.add(tf.summary.histogram(variable.op.name, variable)) - - ################################# - # Configure the moving averages # - ################################# - if FLAGS.moving_average_decay: - moving_average_variables = slim.get_model_variables() - variable_averages = tf.train.ExponentialMovingAverage( - FLAGS.moving_average_decay, global_step) - else: - moving_average_variables, variable_averages = None, None - - ######################################### - # Configure the optimization procedure. # - ######################################### - with tf.device(deploy_config.optimizer_device()): - learning_rate = _configure_learning_rate(dataset.num_samples, global_step) - optimizer = _configure_optimizer(learning_rate) - summaries.add(tf.summary.scalar('learning_rate', learning_rate)) - - if FLAGS.sync_replicas: - # If sync_replicas is enabled, the averaging will be done in the chief - # queue runner. - optimizer = tf.train.SyncReplicasOptimizer( - opt=optimizer, - replicas_to_aggregate=FLAGS.replicas_to_aggregate, - variable_averages=variable_averages, - variables_to_average=moving_average_variables, - replica_id=tf.constant(FLAGS.task, tf.int32, shape=()), - total_num_replicas=FLAGS.worker_replicas) - elif FLAGS.moving_average_decay: - # Update ops executed locally by trainer. - update_ops.append(variable_averages.apply(moving_average_variables)) - - # Variables to train. - variables_to_train = _get_variables_to_train() - - # and returns a train_tensor and summary_op - total_loss, clones_gradients = model_deploy.optimize_clones( - clones, - optimizer, - var_list=variables_to_train) - # Add total_loss to summary. - summaries.add(tf.summary.scalar('total_loss', total_loss)) - - # Create gradient updates. - grad_updates = optimizer.apply_gradients(clones_gradients, - global_step=global_step) - update_ops.append(grad_updates) - - update_op = tf.group(*update_ops) - train_tensor = control_flow_ops.with_dependencies([update_op], total_loss, - name='train_op') - - # Add the summaries from the first clone. These contain the summaries - # created by model_fn and either optimize_clones() or _gather_clone_loss(). - summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES, - first_clone_scope)) - - # Merge all summaries together. - summary_op = tf.summary.merge(list(summaries), name='summary_op') - - - ########################### - # Kicks off the training. # - ########################### - summary_writer = tf.summary.FileWriter("tensorboard_%d" %(ctx.worker_num), graph=tf.get_default_graph()) - slim.learning.train( - train_tensor, - logdir=FLAGS.train_dir, - master=server.target, - is_chief=(FLAGS.task == 0), - init_fn=_get_init_fn(), - summary_op=summary_op, - number_of_steps=FLAGS.max_number_of_steps, - log_every_n_steps=FLAGS.log_every_n_steps, - save_summaries_secs=FLAGS.save_summaries_secs, - save_interval_secs=FLAGS.save_interval_secs, - summary_writer=summary_writer, - sync_optimizer=optimizer if FLAGS.sync_replicas else None) - - -if __name__ == '__main__': - import argparse - - sc = SparkContext(conf=SparkConf().setAppName("train_image_classifier")) - executors = sc._conf.get("spark.executor.instances") - num_executors = int(executors) if executors is not None else 1 - - parser = argparse.ArgumentParser() - parser.add_argument("--num_ps_tasks", help="number of PS nodes", type=int, default=0) - parser.add_argument("--tensorboard", help="launch tensorboard process", action="store_true") - parser.add_argument("--cluster_size", help="number of nodes in the cluster", type=int, default=num_executors) - (args,rem) = parser.parse_known_args() - - assert(num_executors > args.num_ps_tasks) - cluster = TFCluster.run(sc, main_fun, sys.argv, args.cluster_size, args.num_ps_tasks, args.tensorboard, TFCluster.InputMode.TENSORFLOW) - cluster.shutdown() diff --git a/tensorflowonspark/TFNode.py b/tensorflowonspark/TFNode.py index 96aadc81..889ff52e 100755 --- a/tensorflowonspark/TFNode.py +++ b/tensorflowonspark/TFNode.py @@ -34,6 +34,7 @@ def hdfs_path(ctx, path): """ # All Hadoop-Compatible File System Schemes (as of Hadoop 3.0.x): HADOOP_SCHEMES = ['adl://', + 'file://', 'hdfs://', 'oss://', 's3://', @@ -42,8 +43,7 @@ def hdfs_path(ctx, path): 'swift://', 'viewfs://', 'wasb://'] - if (any(path.startswith(scheme) for scheme in HADOOP_SCHEMES) - or path.startswith('file://')): + if (any(path.startswith(scheme) for scheme in HADOOP_SCHEMES)): # absolute path w/ scheme, just return as-is return path elif path.startswith("/"): diff --git a/tensorflowonspark/TFSparkNode.py b/tensorflowonspark/TFSparkNode.py index 50523f72..f1d6ebf4 100644 --- a/tensorflowonspark/TFSparkNode.py +++ b/tensorflowonspark/TFSparkNode.py @@ -388,9 +388,7 @@ def _train(iter): terminating = state == "'terminating'" if terminating: logging.info("mgr is terminating, skipping partition") - count = 0 - for item in iter: - count += 1 + count = sum(1 for item in iter) logging.info("Skipped {0} items from partition".format(count)) else: logging.info("Feeding partition {0} into {1} queue {2}".format(iter, qname, queue)) @@ -416,17 +414,19 @@ def _train(iter): logging.info("Processed {0} items in partition".format(count)) # check if TF is terminating feed after this partition - state = str(mgr.get('state')) - terminating = state == "'terminating'" - if terminating: - try: - logging.info("TFSparkNode: requesting stop") - client = reservation.Client(cluster_meta['server_addr']) - client.request_stop() - client.close() - except Exception as e: - # ignore any errors while requesting stop - logging.debug("Error while requesting stop: {0}".format(e)) + if not terminating: + state = str(mgr.get('state')) + terminating = state == "'terminating'" + if terminating: + try: + logging.info("TFSparkNode: requesting stop") + client = reservation.Client(cluster_meta['server_addr']) + client.request_stop() + client.close() + except Exception as e: + # ignore any errors while requesting stop + logging.debug("Error while requesting stop: {0}".format(e)) + return [terminating] return _train