From b3f0c8396c74146644e5fdcc445652f99ddbe5bf Mon Sep 17 00:00:00 2001
From: Yilei Cai <ycai@oath.com>
Date: Tue, 3 Jul 2018 15:07:00 -0700
Subject: [PATCH 1/2] add readme for examples of running dataset and pipeline

---
 examples/mnist/tf/README.md | 107 ++++++++++++++++++++++++++++++++++++
 1 file changed, 107 insertions(+)
 create mode 100644 examples/mnist/tf/README.md

diff --git a/examples/mnist/tf/README.md b/examples/mnist/tf/README.md
new file mode 100644
index 00000000..354848e3
--- /dev/null
+++ b/examples/mnist/tf/README.md
@@ -0,0 +1,107 @@
+## Running distributed MNIST training / inference
+
+### _using Dataset_
+```bash
+# for CPU mode:
+# export QUEUE=default
+# remove references to $LIB_CUDA
+
+# hdfs dfs -rm -r mnist_model
+# hdfs dfs -rm -r predictions
+
+${SPARK_HOME}/bin/spark-submit \
+--master yarn \
+--deploy-mode cluster \
+--queue ${QUEUE} \
+--num-executors 4 \
+--executor-memory 27G \
+--py-files TensorFlowOnSpark/tfspark.zip,TensorFlowOnSpark/examples/mnist/tf/mnist_dist_dataset.py \
+--conf spark.dynamicAllocation.enabled=false \
+--conf spark.yarn.maxAppAttempts=1 \
+--archives hdfs:///user/${USER}/Python.zip#Python \
+--conf spark.executorEnv.LD_LIBRARY_PATH=$LIB_CUDA:$LIB_JVM:$LIB_HDFS \
+--driver-library-path=$LIB_CUDA \
+TensorFlowOnSpark/examples/mnist/tf/mnist_spark_dataset.py \
+${TF_ROOT}/${TF_VERSION}/examples/mnist/tf/mnist_spark_dataset.py \
+--images_labels mnist/csv2/train \
+--format csv2 \  
+--mode train \
+--model mnist_model
+
+# to use inference mode, change `--mode train` to `--mode inference` and add `--output predictions`
+# one item in csv2 format is `image | label`, to use input data in TFRecord format, change `--format csv` to `--format tfr`
+# to use infiniband, add `--rdma`
+```
+
+### _using QueueRunners_
+```bash
+# for CPU mode:
+# export QUEUE=default
+# remove references to $LIB_CUDA
+
+# hdfs dfs -rm -r mnist_model
+# hdfs dfs -rm -r predictions
+
+${SPARK_HOME}/bin/spark-submit \
+--master yarn \
+--deploy-mode cluster \
+--queue ${QUEUE} \
+--num-executors 4 \
+--executor-memory 27G \
+--py-files TensorFlowOnSpark/tfspark.zip,TensorFlowOnSpark/examples/mnist/tf/mnist_dist.py \
+--conf spark.dynamicAllocation.enabled=false \
+--conf spark.yarn.maxAppAttempts=1 \
+--archives hdfs:///user/${USER}/Python.zip#Python \
+--conf spark.executorEnv.LD_LIBRARY_PATH=$LIB_CUDA:$LIB_JVM:$LIB_HDFS \
+--driver-library-path=$LIB_CUDA \
+TensorFlowOnSpark/examples/mnist/tf/mnist_spark.py \
+--images mnist/tfr/train/images \
+--labels mnist/tfr/train/labels \
+--format csv \
+--mode train \
+--model mnist_model
+
+# to use inference mode, change `--mode train` to `--mode inference` and add `--output predictions`
+# to use input data in TFRecord format, change `--format csv` to `--format tfr`
+# to use infiniband, add `--rdma`
+```
+
+## Running distributed MNIST training and inference utilizing Spark ML Pipeline
+```bash
+# for CPU mode:
+# export QUEUE=default
+# remove references to $LIB_CUDA
+
+# hdfs dfs -rm -r mnist_model
+# hdfs dfs -rm -r mnist_export
+# hdfs dfs -rm -r tfrecords
+# hdfs dfs -rm -r predictions
+
+${SPARK_HOME}/bin/spark-submit \
+--master yarn \
+--deploy-mode cluster \
+--queue ${QUEUE} \
+--num-executors 4 \
+--executor-memory 27G \
+--jars hdfs:///user/${USER}/tensorflow-hadoop-1.0-SNAPSHOT.jar \  
+--py-files TensorFlowOnSpark/tfspark.zip,TensorFlowOnSpark/examples/mnist/tf/mnist_dist_pipeline.py \
+--conf spark.dynamicAllocation.enabled=false \
+--conf spark.yarn.maxAppAttempts=1 \
+--archives hdfs:///user/${USER}/Python.zip#Python \
+--conf spark.executorEnv.LD_LIBRARY_PATH=$LIB_CUDA:$LIB_JVM:$LIB_HDFS \
+--driver-library-path=$LIB_CUDA \
+TensorFlowOnSpark/examples/mnist/tf/mnist_spark_pipeline.py \
+--images mnist/csv/train/images \
+--labels mnist/csv/train/labels \
+--tfrecord_dir tfrecords \
+--format csv \
+--model_dir mnist_model \
+--export_dir mnist_export \
+--train \
+--inference_mode signature \
+--inference_output predictions
+
+# to use input data in TFRecord format, change `--format csv` to `--format tfr`
+# tensorflow-hadoop-1.0-SNAPSHOT.jar is needed for transforming csv input to TFRecord 
+# `--tfrecord_dir` is needed for temporarily saving dataframe to TFRecord on hdfs
+```

From ce9b3ea2de2e1b5f22b5f544208f208d6f189ec3 Mon Sep 17 00:00:00 2001
From: yileic <caiyilei3@gmail.com>
Date: Thu, 5 Jul 2018 13:48:51 -0700
Subject: [PATCH 2/2] update indent level

---
 examples/mnist/tf/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/mnist/tf/README.md b/examples/mnist/tf/README.md
index 354848e3..39833696 100644
--- a/examples/mnist/tf/README.md
+++ b/examples/mnist/tf/README.md
@@ -66,7 +66,7 @@ TensorFlowOnSpark/examples/mnist/tf/mnist_spark.py \
 # to use infiniband, add `--rdma`
 ```
 
-## Running distributed MNIST training and inference utilizing Spark ML Pipeline
+### _using Spark ML Pipeline_
 ```bash
 # for CPU mode:
 # export QUEUE=default