In [None]:
import tensorflow as tf
import os

In [None]:
IMG_SIZE = 224

# Build datasets

## Open Images fruits

In [None]:
# install openimages library to get the data
!pip install openimages

Collecting openimages
  Downloading https://files.pythonhosted.org/packages/49/ba/587944c183999aa9a0416d6979739b78adfe021eee74aa9db78f0beaea06/openimages-0.0.1-py2.py3-none-any.whl
Collecting boto3
[?25l  Downloading https://files.pythonhosted.org/packages/c3/db/a7e290eb77632c9d25247977bbfc99aef9cd59f7c13eea69f8fea44404af/boto3-1.16.63-py2.py3-none-any.whl (130kB)
[K     |████████████████████████████████| 133kB 5.9MB/s 
Collecting cvdata
[?25l  Downloading https://files.pythonhosted.org/packages/47/e5/5361375b284ac1da759cf78329f8484cb33c039c4c91e38862ca4cba2ae6/cvdata-0.0.7-py2.py3-none-any.whl (49kB)
[K     |████████████████████████████████| 51kB 7.8MB/s 
Collecting jmespath<1.0.0,>=0.7.1
  Downloading https://files.pythonhosted.org/packages/07/cb/5f001272b6faeb23c1c9e0acc04d48eaaf5c862c17709d20e3469c6e0139/jmespath-0.10.0-py2.py3-none-any.whl
Collecting s3transfer<0.4.0,>=0.3.0
[?25l  Downloading https://files.pythonhosted.org/packages/ea/43/4b4a1b26eb03a429a4c37ca7fdf369d938bd6

In [None]:
# download only images from wanted categories
!mkdir fruit_dataset

fruit_list = ["Apple", "Banana", "Grape", "Mango", "Orange", "Peach", "Pear"]

from openimages.download import download_dataset
download_dataset("/content/fruit_dataset", fruit_list)

2021-02-02  20:11:17 INFO NumExpr defaulting to 2 threads.
2021-02-02  20:11:21 INFO Downloading 312 train images for class 'apple'
100%|██████████| 312/312 [00:15<00:00, 20.03it/s]
2021-02-02  20:11:37 INFO Downloading 570 train images for class 'banana'
100%|██████████| 570/570 [00:28<00:00, 20.11it/s]
2021-02-02  20:12:05 INFO Downloading 67 train images for class 'grape'
100%|██████████| 67/67 [00:04<00:00, 13.90it/s]
2021-02-02  20:12:10 INFO Downloading 109 train images for class 'mango'
100%|██████████| 109/109 [00:06<00:00, 16.39it/s]
2021-02-02  20:12:17 INFO Downloading 758 train images for class 'orange'
100%|██████████| 758/758 [00:36<00:00, 20.69it/s]
2021-02-02  20:12:53 INFO Downloading 62 train images for class 'peach'
100%|██████████| 62/62 [00:04<00:00, 14.18it/s]
2021-02-02  20:12:58 INFO Downloading 114 train images for class 'pear'
100%|██████████| 114/114 [00:06<00:00, 17.14it/s]
2021-02-02  20:13:05 INFO Downloading 23 validation images for class 'apple'
100%|███

{'apple': {'images_dir': '/content/fruit_dataset/apple/images'},
 'banana': {'images_dir': '/content/fruit_dataset/banana/images'},
 'grape': {'images_dir': '/content/fruit_dataset/grape/images'},
 'mango': {'images_dir': '/content/fruit_dataset/mango/images'},
 'orange': {'images_dir': '/content/fruit_dataset/orange/images'},
 'peach': {'images_dir': '/content/fruit_dataset/peach/images'},
 'pear': {'images_dir': '/content/fruit_dataset/pear/images'}}

In [None]:
# reorganize images into the 'fruit_dataset' folder
import os
import shutil

for fruit in fruit_list:
  fruit_path = f"/content/fruit_dataset/{fruit.lower()}/images"
  print(fruit, len(os.listdir(fruit_path)))
  for image in os.listdir(fruit_path):
    shutil.move(os.path.join(fruit_path, image), f"/content/fruit_dataset/{fruit.lower()}")
  os.removedirs(fruit_path)

Apple 389
Banana 589
Grape 108
Mango 115
Orange 883
Peach 99
Pear 154


## Stanford dogs

In [None]:
# download dogs dataset
!wget http://vision.stanford.edu/aditya86/ImageNetDogs/images.tar

--2021-02-02 20:14:22--  http://vision.stanford.edu/aditya86/ImageNetDogs/images.tar
Resolving vision.stanford.edu (vision.stanford.edu)... 171.64.68.10
Connecting to vision.stanford.edu (vision.stanford.edu)|171.64.68.10|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 793579520 (757M) [application/x-tar]
Saving to: ‘images.tar’


2021-02-02 20:15:04 (18.3 MB/s) - ‘images.tar’ saved [793579520/793579520]



In [None]:
# unzip dogs dataset, which will be located in the 'Images' folder
!tar -xf images.tar

# Training utils

In [None]:
# check if tensorflow see the GPU
tf.test.gpu_device_name()

'/device:GPU:0'

In [None]:
# check name of GPU
!nvidia-smi

Tue Feb  2 20:10:06 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.39       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   32C    P0    22W / 300W |      0MiB / 16130MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
# inspired a lot by https://keras.io/examples/vision/image_classification_efficientnet_fine_tuning/
def build_model(num_classes, img_size=224):
    input = tf.keras.layers.Input(shape=(img_size, img_size, 3))
    model = tf.keras.applications.EfficientNetB3(include_top=False, input_tensor=input, weights="imagenet")

    # Freeze the pretrained weights
    model.trainable = False

    # Rebuild top
    x = tf.keras.layers.GlobalAveragePooling2D(name="avg_pool")(model.output)
    x = tf.keras.layers.BatchNormalization()(x)

    top_dropout_rate = 0.2
    x = tf.keras.layers.Dropout(top_dropout_rate, name="top_dropout")(x)
    output = tf.keras.layers.Dense(num_classes, activation="softmax", name="pred")(x)

    # Compile
    model = tf.keras.Model(input, output, name="EfficientNet")
    model.compile(optimizer='adam', loss="sparse_categorical_crossentropy", metrics=["sparse_categorical_accuracy"])
    return model

In [None]:
tf.__version__

'2.4.1'

# Keras generators

In [None]:
def use_keras_generators(path):
  datagen = tf.keras.preprocessing.image.ImageDataGenerator()
  dataset = datagen.flow_from_directory(path, (IMG_SIZE, IMG_SIZE), batch_size=32, class_mode='sparse')

  num_classes = len(os.listdir(path))
  model = build_model(num_classes)

  model.fit(dataset, batch_size=32, epochs=5)

In [None]:
use_keras_generators('/content/fruit_dataset')

Found 2337 images belonging to 7 classes.
Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb3_notop.h5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
use_keras_generators('/content/Images')

Found 20580 images belonging to 120 classes.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


# Tf.data

In [None]:
from glob import glob
import random

def make_dataset(path, batch_size):

  def parse_image(filename):
    image = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [IMG_SIZE, IMG_SIZE])
    return image

  def configure_for_performance(ds):
    ds = ds.shuffle(buffer_size=1000)
    ds = ds.batch(batch_size)
    ds = ds.repeat()
    ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    return ds

  classes = os.listdir(path)
  filenames = glob(path + '/*/*')
  random.shuffle(filenames)
  labels = [classes.index(name.split('/')[-2]) for name in filenames]

  filenames_ds = tf.data.Dataset.from_tensor_slices(filenames)
  images_ds = filenames_ds.map(parse_image, num_parallel_calls=tf.data.experimental.AUTOTUNE)
  labels_ds = tf.data.Dataset.from_tensor_slices(labels)
  ds = tf.data.Dataset.zip((images_ds, labels_ds))
  ds = configure_for_performance(ds)

  return ds

In [None]:
from glob import glob
import math

def use_tf_data(path):
  dataset = make_dataset(path, 32)

  num_classes = len(os.listdir(path))
  num_images = len(glob(path + '/*/*'))
  model = build_model(num_classes)

  model.fit(dataset, batch_size=32, epochs=5, steps_per_epoch=math.ceil(num_images/32))

In [None]:
use_tf_data('/content/fruit_dataset')

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb3_notop.h5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
use_tf_data('/content/Images')

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
 44/644 [=>............................] - ETA: 41s - loss: 0.2885 - sparse_categorical_accuracy: 0.9119

KeyboardInterrupt: ignored

### image_dataset_from_directory

In [None]:
def use_keras_new(path):
  keras_ds = tf.keras.preprocessing.image_dataset_from_directory(path, batch_size=32, image_size=(IMG_SIZE, IMG_SIZE))
  keras_ds = keras_ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

  num_classes = len(os.listdir(path))
  model = build_model(num_classes)

  model.fit(keras_ds, batch_size=32, epochs=5)

In [None]:
use_keras_new('/content/fruit_dataset')

Found 2337 files belonging to 7 classes.
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
use_keras_new('/content/Images')

Found 20580 files belonging to 120 classes.
Epoch 1/5
Epoch 2/5
104/644 [===>..........................] - ETA: 41s - loss: 0.4844 - sparse_categorical_accuracy: 0.8558

KeyboardInterrupt: ignored

# Tfrecords

## Make tfrecords

In [None]:
from glob import glob
import os
import random

def serialize_example(image, label):

    feature = {
        'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image])),
        'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[label]))
    }

    example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
    return example_proto.SerializeToString()

def make_tfrecords(path, record_file='/content/images.tfrecords'):
  classes = os.listdir(path)
  with tf.io.TFRecordWriter(record_file) as writer:
    files_list = glob(path + '/*/*')
    random.shuffle(files_list)
    for filename in files_list:
      image_string = open(filename, 'rb').read()
      category = filename.split('/')[-2]
      label = classes.index(category)
      tf_example = serialize_example(image_string, label)
      writer.write(tf_example)

In [None]:
make_tfrecords('/content/fruit_dataset')

In [None]:
make_tfrecords('/content/Images')

## Train with tfrecords

In [None]:
def _parse_image_function(example):
    image_feature_description = {
        'image': tf.io.FixedLenFeature([], tf.string),
        'label': tf.io.FixedLenFeature([], tf.int64),
    }

    features = tf.io.parse_single_example(example, image_feature_description)
    image = tf.image.decode_jpeg(features['image'], channels=3)
    image = tf.image.resize(image, [IMG_SIZE, IMG_SIZE])

    label = tf.cast(features['label'], tf.int32)

    return image, label


def read_dataset(filename, batch_size):
    dataset = tf.data.TFRecordDataset(filename)
    dataset = dataset.map(_parse_image_function, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    dataset = dataset.shuffle(500)
    dataset = dataset.batch(batch_size, drop_remainder=True)
    dataset = dataset.repeat()
    dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

    return dataset

In [None]:
import math
from glob import glob

def use_tfrecords(path):
  dataset = read_dataset('/content/images.tfrecords', 32)

  num_classes = len(os.listdir(path))
  num_images = len(glob(path + '/*/*'))
  model = build_model(num_classes)

  model.fit(dataset, batch_size=32, epochs=5, steps_per_epoch=math.ceil(num_images/32))

In [None]:
use_tfrecords('/content/fruit_dataset')

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
use_tfrecords('/content/Images')

NameError: ignored

## Tfrecords with TPU

In [None]:
import os
from tensorflow.python.profiler import profiler_client

tpu_profile_service_address = os.environ['COLAB_TPU_ADDR'].replace('8470', '8466')
print(profiler_client.monitor(tpu_profile_service_address, 100, 2))

  Timestamp: 19:49:53
  TPU type: TPU v2
  Utilization of TPU Matrix Units (higher is better): 0.000%




In [None]:
# Import PyDrive and
# associated libraries.
# This only needs to be done once in a notebook.
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
# This only needs to be done once in a notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [None]:
gs_path_fruit = 'gs://ai-decathlon-canada/data/test_yan/fruits/images.tfrecords'
gs_path_dog = 'gs://ai-decathlon-canada/data/test_yan/dogs/images.tfrecords'

In [None]:
make_tfrecords('/content/fruit_dataset', gs_path_fruit)

In [None]:
make_tfrecords('/content/Images', gs_path_dog)

In [None]:
import math
from glob import glob

def use_tfrecords_tpu(path, tfrecords_path):
  dataset = read_dataset(tfrecords_path, 32)

  num_classes = len(os.listdir(path))
  num_images = len(glob(path + '/*/*'))

  tpu_cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
  tf.config.experimental_connect_to_cluster(tpu_cluster_resolver)
  tf.tpu.experimental.initialize_tpu_system(tpu_cluster_resolver)
  strategy = tf.distribute.TPUStrategy(tpu_cluster_resolver)

  with strategy.scope():
    model = build_model(num_classes)

  model.fit(dataset, batch_size=32, epochs=5, steps_per_epoch=int(num_images/32))

In [None]:
use_tfrecords_tpu('/content/fruit_dataset', gs_path_fruit)





INFO:tensorflow:Initializing the TPU system: grpc://10.8.217.138:8470


INFO:tensorflow:Initializing the TPU system: grpc://10.8.217.138:8470


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Found TPU system:


INFO:tensorflow:Found TPU system:


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
