In [1]:
import matplotlib.pylab as plt
import tensorflow as tf
import tensorflow_hub as hub
import os
import numpy as np
import tensorflow_datasets as tfds
import warnings
warnings.filterwarnings('ignore')

This project is an attempt to detect four disease conditions in Cassava leaves, Cassava Mosaic Disease (CMD), Cassava Germ Mite (CGM), Cassava Brown Streak Disease (CBSD), Cassava Bacterial Blight (CBB) and a Healthy condition.

Cassava dataset has been used for this purpose. 

In [2]:
dataset, info = tfds.load(name='cassava', with_info=True, as_supervised=True, split=['train', 'test', 'validation'])

Downloading and preparing dataset 1.26 GiB (download: 1.26 GiB, generated: Unknown size, total: 1.26 GiB) to ~/tensorflow_datasets/cassava/0.1.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]

Generating splits...:   0%|          | 0/3 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/5656 [00:00<?, ? examples/s]

Shuffling ~/tensorflow_datasets/cassava/0.1.0.incompletePGOSVX/cassava-train.tfrecord*...:   0%|          | 0/…

Generating test examples...:   0%|          | 0/1885 [00:00<?, ? examples/s]

Shuffling ~/tensorflow_datasets/cassava/0.1.0.incompletePGOSVX/cassava-test.tfrecord*...:   0%|          | 0/1…

Generating validation examples...:   0%|          | 0/1889 [00:00<?, ? examples/s]

Shuffling ~/tensorflow_datasets/cassava/0.1.0.incompletePGOSVX/cassava-validation.tfrecord*...:   0%|         …

Dataset cassava downloaded and prepared to ~/tensorflow_datasets/cassava/0.1.0. Subsequent calls will reuse this data.


In [3]:
info

tfds.core.DatasetInfo(
    name='cassava',
    full_name='cassava/0.1.0',
    description="""
    Cassava consists of leaf images for the cassava plant depicting healthy and
    four (4) disease conditions; Cassava Mosaic Disease (CMD), Cassava Bacterial
    Blight (CBB), Cassava Greem Mite (CGM) and Cassava Brown Streak Disease (CBSD).
    Dataset consists of a total of 9430 labelled images.
    The 9430 labelled images are split into a training set (5656), a test set(1885)
    and a validation set (1889). The number of images per class are unbalanced with
    the two disease classes CMD and CBSD having 72% of the images.
    """,
    homepage='https://www.kaggle.com/c/cassava-disease/overview',
    data_path='~/tensorflow_datasets/cassava/0.1.0',
    file_format=tfrecord,
    download_size=1.26 GiB,
    dataset_size=1.26 GiB,
    features=FeaturesDict({
        'image': Image(shape=(None, None, 3), dtype=tf.uint8),
        'image/filename': Text(shape=(), dtype=tf.string),
        'l

In [None]:
train, info_train = tfds.load(name='cassava', with_info=True, split='test')
tfds.show_examples(info_train, train)

In [None]:
def sc(image, label):
  image = tf.cast(image, tf.float32)
  image /=255.0
  return tf.image.resize(image,[224, 224]), tf.one_hot(label, 5)

In [None]:
def get_dataset(batch_size = 32):
  train_dataset_sc = dataset[0].map(sc).shuffle(1000).batch(batch_size)
  test_dataset_sc = dataset[1].map(sc).batch(batch_size)
  val_dataset_sc = dataset[2].map(sc).batch(batch_size)
  return train_dataset_sc, test_dataset_sc, val_dataset_sc

In [None]:
train_dataset, test_dataset, val_dataset = get_dataset()
train_dataset.cache()
val_dataset.cache()

In [None]:
f_e = "https://tfhub.dev/google/tf2-preview/mobilenet_v2/feature_vector/4"

In [None]:
f_e_layer = hub.KerasLayer(f_e, input_shape=(224, 224, 3))

In [None]:
f_e_layer.trainable = False

In [None]:
model = tf.keras.Sequential([
    f_e_layer,
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(5, activation='softmax')
])

model.summary()

In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
    metrics=['acc']
)

In [None]:
history = model.fit(train_dataset, epochs = 30, validation_data=val_dataset)

In [None]:
res = model.evaluate(test_dataset)

In [None]:
for t_sample in dataset[1].take(10):
  image, label = t_sample[0], t_sample[1]
  img_sc, label_array = sc(t_sample[0], t_sample[1])
  img_sc = np.expand_dims(img_sc, axis = 0)
  img = tf.keras.preprocessing.image.img_to_array(image)
  pred = model.predict(img_sc)
  #print(pred)
  plt.figure()
  plt.imshow(image)
  plt.show()
  print("Given: %s" % info.features["label"].names[label.numpy()])
  print("Predicted: %s" % info.features["label"].names[np.argmax(pred)])

In [None]:
for f0, f1 in dataset[1].map(sc).batch(200):
  y = np.argmax(f1, axis=1)
  y_pred = np.argmax(model.predict(f0), axis=1)
  print(tf.math.confusion_matrix(labels=y, predictions=y_pred, num_classes = 5))