<a href="https://colab.research.google.com/github/wayne0git/python_basics/blob/master/tensorflow/tensorflow_dataset_hub_example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Tensorflow Datasets / Hub Example
Ref - https://learning.edx.org/course/course-v1:HarvardX+TinyML2+3T2020

### Data preparation by tensorflow_datasets

In [1]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

In [2]:
# Download datasets
# with_info - Return the tuple (tf.data.Dataset, tfds.core.DatasetInfo)
# as_supervised - If True, the returned tf.data.Dataset will have a 2-tuple structure (input, label)
(raw_train, raw_validation, raw_test), metadata = tfds.load('cats_vs_dogs', 
                                split=['train[:80%]', 'train[80%:90%]', 'train[90%:]'],
                                with_info=True,
                                as_supervised=True)

[1mDownloading and preparing dataset cats_vs_dogs/4.0.0 (download: 786.68 MiB, generated: Unknown size, total: 786.68 MiB) to /root/tensorflow_datasets/cats_vs_dogs/4.0.0...[0m


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Dl Completed...', max=1.0, style=Progre…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Dl Size...', max=1.0, style=ProgressSty…







HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))



Shuffling and writing examples to /root/tensorflow_datasets/cats_vs_dogs/4.0.0.incompleteKQ81H1/cats_vs_dogs-train.tfrecord


HBox(children=(FloatProgress(value=0.0, max=23262.0), HTML(value='')))

[1mDataset cats_vs_dogs downloaded and prepared to /root/tensorflow_datasets/cats_vs_dogs/4.0.0. Subsequent calls will reuse this data.[0m


In [5]:
# Check dataset info
print('Number of examples: ', metadata.splits['train'].num_examples)
print('Number of classes: ', metadata.features['label'].num_classes)

Number of examples:  23262
Number of classes:  2


In [6]:
# Preprocessing function
def format_image(image, label):
    image = tf.image.resize(image, (224, 224)) / 255.0
    return image, label

In [10]:
# Data preprocessing pipeline
BATCH_SIZE = 32
num_examples = metadata.splits['train'].num_examples

train_batches = raw_train.shuffle(num_examples // 4).map(format_image).batch(BATCH_SIZE).prefetch(1)
validation_batches = raw_validation.map(format_image).batch(BATCH_SIZE).prefetch(1)
test_batches = raw_test.map(format_image).batch(1)

In [12]:
# Check data shape
for image_batch, label_batch in train_batches.take(1):
    pass

image_batch.shape

TensorShape([32, 224, 224, 3])

### Use pretrained model by Tensorflow_Hub

In [13]:
import tensorflow_hub as hub

In [16]:
# Wraps a SavedModel (or a legacy TF1 Hub format) as a Keras Layer
MODULE_HANDLE ="https://tfhub.dev/google/tf2-preview/{}/feature_vector/4".format("mobilenet_v2")

feature_extractor = hub.KerasLayer(MODULE_HANDLE, input_shape=(224, 224, 3), 
                  output_shape=[1280], trainable=False)

In [19]:
# Transfer Learning
model = tf.keras.Sequential([feature_extractor,
                tf.keras.layers.Dense(2, activation='softmax')])
model.summary()

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(train_batches, epochs=5, validation_data=validation_batches)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer (KerasLayer)     (None, 1280)              2257984   
_________________________________________________________________
dense (Dense)                (None, 2)                 2562      
Total params: 2,260,546
Trainable params: 2,562
Non-trainable params: 2,257,984
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f1b3f61af60>