In [250]:
!pip install -q split-folders

In [251]:
!pip install mlxtend

In [252]:
import tensorflow as tf
print('TensorFlow version:', tf.__version__)

from tensorflow.keras.preprocessing.image import ImageDataGenerator

import os
import zipfile
import splitfolders
import glob

from PIL import Image

from mlxtend.plotting import plot_confusion_matrix
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

import numpy as np
import pandas as pd

## Flowers dataset overview

### Total number of images and by per category

In [253]:
path = '../input/flowers-recognition/flowers'
categories = os.listdir(path)

print('Total number of images:', sum([len(os.listdir(path + '/' + cat)) for cat in categories]))
print('\nFlowers categories:', len(categories), categories)
print('\nTotal number images by each category:')
print('Dandelion images:', len(os.listdir(path + '/dandelion')))
print('Daisy images:    ', len(os.listdir(path + '/daisy')))
print('Sunflower images:', len(os.listdir(path + '/sunflower')))
print('Tulip images:    ', len(os.listdir(path + '/tulip')))
print('Rose images:     ', len(os.listdir(path + '/rose')))

### Resolution of the images

In [254]:
%%time

img_shapes = {"height": [], "width": []}

for cat in categories:
    filelist = glob.glob(path + '/' + cat + '/*.jpg')
    for fname in filelist:
        img_shapes["height"].append(np.array(Image.open(fname)).shape[0])
        img_shapes["width"].append(np.array(Image.open(fname)).shape[1])

In [255]:
df_img_sizes = pd.DataFrame(img_shapes)
print(df_img_sizes.shape)
df_img_sizes.describe()

In [256]:
df_img_sizes.hist()

In [257]:
#Let's take the most popular resolutions according to the histogram
IMG_HEIGHT = 240
IMG_WIDTH = 260

In [258]:
def draw_flowers(category):
    rose_dir = path + '/' + category
    rose_files = os.listdir(path + '/' + category)

    # Output images in a 4x4 configuration
    nrows = 4
    ncols = 4
    pic_index = 8


    fig = plt.gcf()
    fig.set_size_inches(ncols * 4, nrows * 4)

    next_rose_img = [os.path.join(rose_dir, fname) 
                    for fname in rose_files[:pic_index]]

    for i, img_path in enumerate(next_rose_img):
      sp = plt.subplot(nrows, ncols, i + 1)
      sp.axis('Off')

      img = mpimg.imread(img_path)
      plt.imshow(img)

    plt.show()

### Dandelion

In [259]:
draw_flowers('dandelion')

### Daisy

In [260]:
draw_flowers('daisy')

### Sunflower

In [261]:
draw_flowers('sunflower')

### Tulip

In [262]:
draw_flowers('tulip')

### Rose

In [263]:
draw_flowers('rose')

## Split data into train, validation and test sets

In [264]:
splitfolders.ratio(path, output="output", seed=100, ratio=(.6, .2, .2), group_prefix=None)

In [265]:
path_train = 'output/train/'
path_valid = 'output/val/'
path_test = 'output/test/'
print('Train images:', sum([len(os.listdir(path_train + cat)) for cat in categories]))
print('Valid images:', sum([len(os.listdir(path_valid + cat)) for cat in categories]))
print('Test  images:', sum([len(os.listdir(path_test + cat)) for cat in categories]))

## ImageDataGenerator

In [266]:
BATCH_SIZE_TRAIN = 64

train_datagen = ImageDataGenerator(rescale=1/255)
valid_datagen = ImageDataGenerator(rescale=1/255)
test_datagen = ImageDataGenerator(rescale=1/255)

train_generator = train_datagen.flow_from_directory(
    path_train,  
    target_size=(IMG_WIDTH, IMG_HEIGHT),  
    batch_size=BATCH_SIZE_TRAIN,
    class_mode='categorical')

valid_generator = valid_datagen.flow_from_directory(
    path_valid,  
    target_size=(IMG_WIDTH, IMG_HEIGHT),  
    batch_size=32,
    class_mode='categorical')

test_generator = test_datagen.flow_from_directory(
    path_test,
    target_size=(IMG_WIDTH, IMG_HEIGHT),
    batch_size=32,
    class_mode='categorical'
)

## Model

In [267]:
model = tf.keras.models.Sequential([
    tf.keras.layers.InputLayer(input_shape=(IMG_WIDTH, IMG_HEIGHT, 3)),
    
    # Data Augmentation
    tf.keras.layers.experimental.preprocessing.RandomTranslation(height_factor=(-0.2, 0.3), width_factor=(-0.2, 0.3)),# OK! very good
    tf.keras.layers.experimental.preprocessing.RandomFlip(mode='horizontal_and_vertical'), # OK, it increases acc
    tf.keras.layers.experimental.preprocessing.RandomRotation(factor=(-0.2, 0.3)), #OK.., it increases acc
    tf.keras.layers.experimental.preprocessing.RandomContrast(.4, .7), #?? .., it increases acc little bit
    tf.keras.layers.experimental.preprocessing.RandomZoom(.2, .2), # OK
    
    
    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
    tf.keras.layers.BatchNormalization(renorm=True),
    tf.keras.layers.MaxPooling2D(2, 2),
    
    tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
    tf.keras.layers.BatchNormalization(renorm=True),
    tf.keras.layers.MaxPooling2D(2,2),
    
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.BatchNormalization(renorm=True),
    tf.keras.layers.MaxPooling2D(2,2),
    
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.BatchNormalization(renorm=True),
    tf.keras.layers.MaxPooling2D(2,2), 
    
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.BatchNormalization(renorm=True),
    tf.keras.layers.MaxPooling2D(2,2),
    
    tf.keras.layers.Conv2D(256, (3,3), activation='relu'),
    tf.keras.layers.BatchNormalization(renorm=True),
    tf.keras.layers.MaxPooling2D(2,2),
    
    tf.keras.layers.Flatten(),
    #tf.keras.layers.GlobalAvgPool2D(),
    tf.keras.layers.BatchNormalization(renorm=True),
    tf.keras.layers.Dropout(rate=.2),
    
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.BatchNormalization(renorm=True),
    tf.keras.layers.Dropout(rate=.2),
    
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.BatchNormalization(renorm=True),
    tf.keras.layers.Dropout(rate=.2),
    
    tf.keras.layers.Dense(5, activation = 'softmax')
])

model.summary()

#### Compile

In [268]:
model.compile(loss='categorical_crossentropy',
              optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              metrics=['accuracy'])

#### Callbacks

In [269]:
DESIRED_ACCURACY = 0.98
  
class DesiredAccuracy(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('accuracy') > DESIRED_ACCURACY):
            print("\nReached {}% accuracy so cancelling training!".format(DESIRED_ACCURACY))
            self.model.stop_training = True
        
acc_threshold = DesiredAccuracy()

overfit_threshold = tf.keras.callbacks.EarlyStopping(
    min_delta=0.001, # minimium amount of change to count as an improvement
    patience=30, # how many epochs to wait before stopping
    restore_best_weights=True,
)

checkpoint_filepath = 'checkpoint.hdf5'
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_accuracy',
    mode='max',
    verbose=1,
    save_best_only=True)

### Train

In [270]:
%%time

epochs=500

history = model.fit(
      train_generator, 
      epochs=epochs,
      verbose=1,
      validation_data = valid_generator,
      callbacks=[acc_threshold, overfit_threshold, checkpoint])

### Plot results

In [271]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

print('Max valid acc:', max(val_acc))
print('Min valid loss:', min(val_loss))

#------------------------------------------------
# Plot training and validation accuracy per epoch
#------------------------------------------------
plt.plot(range(len(acc)), acc, label='Train Accuracy')
plt.plot(range(len(val_acc)), val_acc, label='Valid Accuracy')
plt.title ('Train and Validation Accuracy')
plt.figure()

#------------------------------------------------
# Plot training and validation loss per epoch
#------------------------------------------------
plt.plot(range(len(loss)), loss, label='Train Loss')
plt.plot(range(len(val_loss)), val_loss, label='Valid Loss')
plt.title ('Train and Validation Loss')

### Load the best weights

In [272]:
model.load_weights(checkpoint_filepath)

## Evaluate on the test set

In [273]:
model.evaluate(test_generator)

### Predict & Confusion matrix

In [274]:
Y_pred = model.predict(test_generator)
y_pred = np.argmax(Y_pred, axis=1)

font = { 'size': 14}
plt.rc('font', **font)

mat = confusion_matrix(test_generator.classes, y_pred)
plot_confusion_matrix(conf_mat=mat, figsize=(6,6), class_names=categories, show_normed=True)

# Classification Report
print(classification_report(test_generator.classes, y_pred, target_names=categories))

## Model weights analysis

In [275]:
layers = [layer for layer in model.layers if 'dense' in layer.name or 'flatten' in layer.name]
for layer in layers:
    print('\n### Layer:', layer.name)
    
    for i, w in enumerate(layer.weights):
        w = w.numpy()
        print('\n', str(i) + ') shape:', w.shape)
        print('min:', np.min(w))
        print('max', np.max(w))
        print('std:', np.std(w))
      