diff --git a/.travis.settings.xml b/.travis.settings.xml
new file mode 100644
index 00000000..eccc2b50
--- /dev/null
+++ b/.travis.settings.xml
@@ -0,0 +1,9 @@
+
+
+
+ bintray-tensorflowonspark-repo
+ ${env.BINTRAY_USER}
+ ${env.BINTRAY_API_KEY}
+
+
+
diff --git a/.travis.yml b/.travis.yml
index 54969c89..b7248a54 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,39 +1,65 @@
-language: python
-python:
- - 2.7
- - 3.6
-cache: pip
-before_install:
- - curl -LO http://www-us.apache.org/dist/spark/spark-2.3.1/spark-2.3.1-bin-hadoop2.7.tgz
- - export SPARK_HOME=./spark
- - mkdir $SPARK_HOME
- - tar -xf spark-2.3.1-bin-hadoop2.7.tgz -C $SPARK_HOME --strip-components=1
- - export PATH=$SPARK_HOME/bin:$PATH
- - export SPARK_LOCAL_IP=127.0.0.1
- - export SPARK_CLASSPATH=./lib/tensorflow-hadoop-1.0-SNAPSHOT.jar
- - export PYTHONPATH=$(pwd)
-install:
- - pip install -r requirements.txt
-script:
- - sphinx-build -b html docs/source docs/build/html
- - test/run_tests.sh
+matrix:
+ include:
+ - language: python
+ python: 2.7
+ before_install:
+ - curl -LO http://www-us.apache.org/dist/spark/spark-2.3.1/spark-2.3.1-bin-hadoop2.7.tgz
+ - export SPARK_HOME=./spark
+ - mkdir $SPARK_HOME
+ - tar -xf spark-2.3.1-bin-hadoop2.7.tgz -C $SPARK_HOME --strip-components=1
+ - export PATH=$SPARK_HOME/bin:$PATH
+ - export SPARK_LOCAL_IP=127.0.0.1
+ - export SPARK_CLASSPATH=./lib/tensorflow-hadoop-1.0-SNAPSHOT.jar
+ - export PYTHONPATH=$(pwd)
+ install:
+ - pip install -r requirements.txt
+ script:
+ - test/run_tests.sh
+ - language: python
+ python: 3.6
+ before_install:
+ - curl -LO http://www-us.apache.org/dist/spark/spark-2.3.1/spark-2.3.1-bin-hadoop2.7.tgz
+ - export SPARK_HOME=./spark
+ - mkdir $SPARK_HOME
+ - tar -xf spark-2.3.1-bin-hadoop2.7.tgz -C $SPARK_HOME --strip-components=1
+ - export PATH=$SPARK_HOME/bin:$PATH
+ - export SPARK_LOCAL_IP=127.0.0.1
+ - export SPARK_CLASSPATH=./lib/tensorflow-hadoop-1.0-SNAPSHOT.jar
+ - export PYTHONPATH=$(pwd)
+ install:
+ - pip install -r requirements.txt
+ script:
+ - sphinx-build -b html docs/source docs/build/html
+ - test/run_tests.sh
+ - language: java
+ jdk: oraclejdk8
notifications:
email: false
deploy:
- - provider: pages
- skip_cleanup: true
- github_token: $GITHUB_TOKEN
- local_dir: docs/build/html
- on:
- branch: master
- python: 3.6
- tags: true
- - provider: pypi
- user: leewyang
- password:
- secure: T2Q8VM6SgcMtJDO2kJbaELE/5ICR5mx8pkM6TyNAJZ2Mr3fLIy6iDfPKunBAYVljl+SDEWmuoPTWqJdqMyo47LBKPKtBHbGzATqGSRTLvxLOYNSXUX+uCpPtr7CMp1eP3xpZ3YbAJZvoEFlWnBQKeBtX/PjNCpmKdp7ir+46CvR/pR1tcM5cFnSgU+uCPAMUt8KTZIxeRo+oJtaE0DM2RxLJ9nGnaRNz9fdXxwhViNj/bMnDRUI0G6k+Iy4sO2669si8nhTDr+Oq66ONUcJtAQymNUM/hzBTCkrJvuIq1TqTlKkA39UrtD5/wCkCqPUbCLVuIfNwkYfW2C8AlXcbphBKN4PhwaoL5XECr3/AOsgNpnPWhCF1Z1uLi58FhIlSyp+5c/x2wVJLZi2IE+c996An7UO3t16ZFpFEgzS6m9PVbi6Qil6Tl4AhV5QLKb0Qn0hLe2l0WixzK9KLMHfkqX8h5ZGC7i0TvCNcU2uIFjY8we91GORZKZhwUVDKbPqiUZIKn64Qq8EwJIsk/S344OrUTzm7z0lFCqtPphg1duU42QOFmaYWi6hgsbtDxN6+CubLw23G3PtKjOpNt8hHnrjZsz9H1MKbSAoYQ4fo+Iwb3owTjXnSTBr94StW7qysggWH6xQimFDh/SKOE9MfroMGt5YTXfduTbqyeameYqE=
- distributions: sdist bdist_wheel
- on:
- branch: master
- python: 3.6
- tags: true
+- provider: pages
+ skip_cleanup: true
+ github_token: "$GITHUB_TOKEN"
+ local_dir: docs/build/html
+ on:
+ branch: master
+ python: 3.6
+ tags: true
+ condition: "$TRAVIS_TAG =~ ^v.*$"
+- provider: pypi
+ user: leewyang
+ password:
+ secure: T2Q8VM6SgcMtJDO2kJbaELE/5ICR5mx8pkM6TyNAJZ2Mr3fLIy6iDfPKunBAYVljl+SDEWmuoPTWqJdqMyo47LBKPKtBHbGzATqGSRTLvxLOYNSXUX+uCpPtr7CMp1eP3xpZ3YbAJZvoEFlWnBQKeBtX/PjNCpmKdp7ir+46CvR/pR1tcM5cFnSgU+uCPAMUt8KTZIxeRo+oJtaE0DM2RxLJ9nGnaRNz9fdXxwhViNj/bMnDRUI0G6k+Iy4sO2669si8nhTDr+Oq66ONUcJtAQymNUM/hzBTCkrJvuIq1TqTlKkA39UrtD5/wCkCqPUbCLVuIfNwkYfW2C8AlXcbphBKN4PhwaoL5XECr3/AOsgNpnPWhCF1Z1uLi58FhIlSyp+5c/x2wVJLZi2IE+c996An7UO3t16ZFpFEgzS6m9PVbi6Qil6Tl4AhV5QLKb0Qn0hLe2l0WixzK9KLMHfkqX8h5ZGC7i0TvCNcU2uIFjY8we91GORZKZhwUVDKbPqiUZIKn64Qq8EwJIsk/S344OrUTzm7z0lFCqtPphg1duU42QOFmaYWi6hgsbtDxN6+CubLw23G3PtKjOpNt8hHnrjZsz9H1MKbSAoYQ4fo+Iwb3owTjXnSTBr94StW7qysggWH6xQimFDh/SKOE9MfroMGt5YTXfduTbqyeameYqE=
+ distributions: sdist bdist_wheel
+ on:
+ branch: master
+ python: 3.6
+ tags: true
+ condition: "$TRAVIS_TAG =~ ^v.*$"
+- provider: script
+ script: mvn deploy -DskipTests --settings .travis.settings.xml
+ skip_cleanup: true
+ on:
+ branch: master
+ jdk: oraclejdk8
+ tags: true
+ condition: "$TRAVIS_TAG =~ ^scala_.*$"
diff --git a/examples/mnist/spark/mnist_dist.py b/examples/mnist/spark/mnist_dist.py
index 85dd51cf..d94930f2 100755
--- a/examples/mnist/spark/mnist_dist.py
+++ b/examples/mnist/spark/mnist_dist.py
@@ -138,6 +138,7 @@ def feed_dict(batch):
with tf.train.MonitoredTrainingSession(master=server.target,
is_chief=(task_index == 0),
checkpoint_dir=logdir,
+ save_checkpoint_secs=10,
hooks=[tf.train.StopAtStepHook(last_step=args.steps)],
chief_only_hooks=[ExportHook(ctx.absolute_path(args.export_dir), x, prediction)]) as mon_sess:
step = 0
diff --git a/examples/mnist/spark/mnist_spark.py b/examples/mnist/spark/mnist_spark.py
index 9c6a4415..c2757e1f 100755
--- a/examples/mnist/spark/mnist_spark.py
+++ b/examples/mnist/spark/mnist_spark.py
@@ -26,7 +26,7 @@
parser.add_argument("--batch_size", help="number of records per batch", type=int, default=100)
parser.add_argument("--epochs", help="number of epochs", type=int, default=1)
parser.add_argument("--export_dir", help="HDFS path to export saved_model", default="mnist_export")
-parser.add_argument("--format", help="example format: (csv|pickle|tfr)", choices=["csv", "pickle", "tfr"], default="csv")
+parser.add_argument("--format", help="example format: (csv|tfr)", choices=["csv", "tfr"], default="csv")
parser.add_argument("--images", help="HDFS path to MNIST images in parallelized format")
parser.add_argument("--labels", help="HDFS path to MNIST labels in parallelized format")
parser.add_argument("--model", help="HDFS path to save/load model during train/inference", default="mnist_model")
@@ -56,22 +56,22 @@ def toNumpy(bytestr):
return (image, label)
dataRDD = images.map(lambda x: toNumpy(bytes(x[0])))
-else:
- if args.format == "csv":
- images = sc.textFile(args.images).map(lambda ln: [int(x) for x in ln.split(',')])
- labels = sc.textFile(args.labels).map(lambda ln: [float(x) for x in ln.split(',')])
- else: # args.format == "pickle":
- images = sc.pickleFile(args.images)
- labels = sc.pickleFile(args.labels)
+else: # "csv"
print("zipping images and labels")
+ # If partitions of images/labels don't match, you can use the following code:
+ # images = sc.textFile(args.images).map(lambda ln: [int(x) for x in ln.split(',')]).zipWithIndex().map(lambda x: (x[1], x[0]))
+ # labels = sc.textFile(args.labels).map(lambda ln: [float(x) for x in ln.split(',')]).zipWithIndex().map(lambda x: (x[1], x[0]))
+ # dataRDD = images.join(labels).map(lambda x: (x[1][0], x[1][1]))
+ images = sc.textFile(args.images).map(lambda ln: [int(x) for x in ln.split(',')])
+ labels = sc.textFile(args.labels).map(lambda ln: [float(x) for x in ln.split(',')])
dataRDD = images.zip(labels)
cluster = TFCluster.run(sc, mnist_dist.map_fun, args, args.cluster_size, num_ps, args.tensorboard, TFCluster.InputMode.SPARK, log_dir=args.model)
if args.mode == "train":
cluster.train(dataRDD, args.epochs)
-else:
- labelRDD = cluster.inference(dataRDD)
- labelRDD.saveAsTextFile(args.output)
+else: # inference
+ predRDD = cluster.inference(dataRDD)
+ predRDD.saveAsTextFile(args.output)
cluster.shutdown(grace_secs=30)
diff --git a/pom.xml b/pom.xml
index fb291d55..5bcf35e8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -5,11 +5,18 @@
4.0.0
com.yahoo.ml
tensorflowonspark
- 1.0-SNAPSHOT
+ 1.0
jar
tensorflowonspark
Spark Scala inferencing for TensorFlowOnSpark
+
+
+ bintray-tensorflowonspark-repo
+ https://api.bintray.com/maven/yahoo/maven/tensorflowonspark
+
+
+
1.8
1.8
@@ -22,11 +29,11 @@
2.11.8
3.2.1
1.1.0
- 3.0.3
+ 3.0.5
1.0
3.7.0
- 1.8.0
- 1.0-SNAPSHOT
+ 1.9.0
+ 1.9.0
@@ -67,6 +74,11 @@
hadoop
${tensorflow-hadoop.version}
+
+ com.google.protobuf
+ protobuf-java
+ 3.5.1
+
org.scalatest
scalatest_2.11