In [1]:
import re
from bigdl.util.common import *
from pyspark.sql.functions import col, udf
from pyspark.sql.types import DoubleType, StringType
from bigdl.nn.layer import *
from bigdl.nn.criterion import *
from pyspark import SparkConf
from pyspark.ml import Pipeline
from pyspark.ml.evaluation import MulticlassClassificationEvaluator

from zoo.common.nncontext import *
from zoo.pipeline.nnframes.nn_classifier import *
from zoo.pipeline.nnframes.nn_image_reader import *
from zoo.feature.common import *
from zoo.feature.image.imagePreprocessing import *

creating: createDefault
creating: createSGD


In [2]:
from zoo.pipeline.api.net import *

In [3]:
sparkConf = SparkConf().setAppName("ImageTransferLearningExample")
sc = get_nncontext(sparkConf)

In [4]:
image_path = "/home/yang/sources/datasets/cat_dog/demo/*/*"

In [5]:
imageDF = NNImageReader.readImages(image_path, sc)

In [6]:
getName = udf(lambda row:
                  re.search(r'(cat|dog)\.([\d]*)\.jpg', row[0], re.IGNORECASE).group(0),
                  StringType())
getLabel = udf(lambda name: 1.0 if name.startswith('cat') else 2.0, DoubleType())
labelDF = imageDF.withColumn("name", getName(col("image"))) \
        .withColumn("label", getLabel(col('name')))
(trainingDF, validationDF) = labelDF.randomSplit([0.9, 0.1])

In [7]:
transformer = ChainedPreprocessing(
        [RowToImageFeature(), Resize(256, 256), CenterCrop(224, 224),
         ChannelNormalize(123.0, 117.0, 104.0), MatToTensor(), ImageFeatureToTensor()])

creating: createRowToImageFeature
creating: createResize
creating: createCenterCrop
creating: createMatToTensor
creating: createImageFeatureToTensor
creating: createChainedPreprocessing


In [8]:
full_model = Net.loadModel("/home/yang/sources/model/bigdl_inception-v1_imagenet_0.4.0.model")
model = full_model.new_graph(["pool5/drop_7x7_s1"]) # create a new model by remove layers after pool5/drop_7x7_s1
model.freeze_up_to(["pool4/3x3_s2"]) # freeze layers from input to pool4/3x3_s2 inclusive

In [9]:
lrModel = Sequential().add(model).add(Reshape([1024])).add(Linear(1024, 2)).add(LogSoftMax())

creating: createSequential
creating: createReshape
creating: createLinear
creating: createLogSoftMax


In [10]:
classifier = NNClassifier.create(lrModel, ClassNLLCriterion(), transformer) \
        .setLearningRate(0.003).setBatchSize(40).setMaxEpoch(1).setFeaturesCol("image")

creating: createClassNLLCriterion
creating: createScalarToTensor
creating: createFeatureLabelPreprocessing
creating: createNNClassifier


In [11]:
pipeline = Pipeline(stages=[classifier])

In [12]:
catdogModel = pipeline.fit(trainingDF)

creating: createFeatureToTupleAdapter


In [13]:
predictionDF = catdogModel.transform(validationDF).cache()
predictionDF.show()

+--------------------+-------------+-----+----------+
|               image|         name|label|prediction|
+--------------------+-------------+-----+----------+
|[file:/home/yang/...|cat.10099.jpg|  1.0|       1.0|
|[file:/home/yang/...|cat.10189.jpg|  1.0|       1.0|
|[file:/home/yang/...|cat.10402.jpg|  1.0|       1.0|
|[file:/home/yang/...|cat.10430.jpg|  1.0|       1.0|
|[file:/home/yang/...| cat.1049.jpg|  1.0|       1.0|
|[file:/home/yang/...|cat.10493.jpg|  1.0|       1.0|
|[file:/home/yang/...|cat.10499.jpg|  1.0|       1.0|
|[file:/home/yang/...|cat.10547.jpg|  1.0|       1.0|
|[file:/home/yang/...|cat.10603.jpg|  1.0|       1.0|
|[file:/home/yang/...|cat.10670.jpg|  1.0|       1.0|
|[file:/home/yang/...|cat.10778.jpg|  1.0|       1.0|
|[file:/home/yang/...|cat.10789.jpg|  1.0|       1.0|
|[file:/home/yang/...|cat.10832.jpg|  1.0|       1.0|
|[file:/home/yang/...|cat.10833.jpg|  1.0|       1.0|
|[file:/home/yang/...| cat.1087.jpg|  1.0|       1.0|
|[file:/home/yang/...|cat.11

In [17]:
correct = predictionDF.filter("label=prediction").count()
overall = predictionDF.count()
accuracy = correct * 1.0 / overall
print "accuracy is %s" % accuracy

accuracy is 0.991111111111
