In [0]:
# This cell for use in Google Colab
# But you can do this in Linux too

!rm -r *
!wget http://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz
!wget http://www.robots.ox.ac.uk/~vgg/data/flowers/102/imagelabels.mat
!tar zxvf 102flowers.tgz

In [0]:
import scipy
import os
import cv2
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from random import shuffle
from PIL import Image
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder

import keras
from keras.callbacks import ModelCheckpoint
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, Activation, BatchNormalization
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator

%matplotlib inline

TRAIN_DIR = 'jpg/'
IMG_SIZE = 128
batch_size = 16

In [0]:
mat = scipy.io.loadmat('imagelabels.mat')
labels = mat['labels']
labels_name_st = ['pink primrose', 'hard-leaved pocket orchid', 'canterbury bells', 'sweet pea', 'english marigold', 'tiger lily', 'moon orchid', 'bird of paradise', 'monkshood', 'globe thistle', 'snapdragon', "colt's foot", 'king protea', 'spear thistle', 'yellow iris', 'globe-flower', 'purple coneflower', 'peruvian lily', 'balloon flower', 'giant white arum lily', 'fire lily', 'pincushion flower', 'fritillary', 'red ginger', 'grape hyacinth', 'corn poppy', 'prince of wales feathers', 'stemless gentian', 'artichoke', 'sweet william', 'carnation', 'garden phlox', 'love in the mist', 'mexican aster', 'alpine sea holly', 'ruby-lipped cattleya', 'cape flower', 'great masterwort', 'siam tulip', 'lenten rose', 'barbeton daisy', 'daffodil', 'sword lily', 'poinsettia', 'bolero deep blue', 'wallflower', 'marigold', 'buttercup', 'oxeye daisy', 'common dandelion', 'petunia', 'wild pansy', 'primula', 'sunflower', 'pelargonium', 'bishop of llandaff', 'gaura', 'geranium', 'orange dahlia', 'pink-yellow dahlia?', 'cautleya spicata', 'japanese anemone', 'black-eyed susan', 'silverbush', 'californian poppy', 'osteospermum', 'spring crocus', 'bearded iris', 'windflower', 'tree poppy', 'gazania', 'azalea', 'water lily', 'rose', 'thorn apple', 'morning glory', 'passion flower', 'lotus', 'toad lily', 'anthurium', 'frangipani', 'clematis', 'hibiscus', 'columbine', 'desert-rose', 'tree mallow', 'magnolia', 'cyclamen ', 'watercress', 'canna lily', 'hippeastrum ', 'bee balm', 'ball moss', 'foxglove', 'bougainvillea', 'camellia', 'mallow', 'mexican petunia', 'bromelia', 'blanket flower', 'trumpet creeper', 'blackberry lily']
labels_name = [i.replace(' ','_') for i in labels_name_st] 

In [0]:
def get_ind(filename, start="_0",end="."):
    """
    Take substring with index of jpg-file
    
    -example: 'image_00345.jpg' -> 345
    """
    filename = filename[filename.find(start) + len(start): filename.rfind(end)]
    return int(filename) - 1

In [0]:
def rename_files(TRAIN_DIR, labels, names):
    """
    Rename filenames in directory according to class of this image
    """
    labels = np.squeeze(labels) # from [[1,2,3]] to [1,2,3]
    jpg = '.jpg'
    for i, filename in enumerate(os.listdir(TRAIN_DIR)):
        label = labels[get_ind(os.path.join(TRAIN_DIR, filename))]
        os.rename(os.path.join(TRAIN_DIR,filename), os.path.join(TRAIN_DIR, str(i+1) + '.' + names[label-1] + jpg))
        
        
rename_files(TRAIN_DIR, labels,labels_name)

In [0]:
def create_class_folders(TRAIN_DIR, names):
    """
    Create folder for each class and move suit file to it
    """
    for dirname in names:
        os.mkdir(os.path.join(TRAIN_DIR, dirname))
    for filename in os.listdir(TRAIN_DIR):
        if filename.endswith('.jpg'):
            suit_dir = filename.split('.')[1] # like this -  111.suit_dir.jpg
            os.rename(os.path.join(TRAIN_DIR,filename), os.path.join(TRAIN_DIR, suit_dir, filename))

create_class_folders(TRAIN_DIR, labels_name)



Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [0]:
def build_finetune_model(dim, dropout, num_classes):

    model = Sequential()
    model.add(Conv2D(32, (3, 3),activation='relu', input_shape=(dim, dim, 3)))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Conv2D(64, (3, 3),activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))

    
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())  # go from 3D to 1D
    model.add(Dense(1024))  # Fully connected layer
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(dropout))  # dropout to avoid over-fitting from: https://www.cs.toronto.edu/~hinton/absps/JMLRdropout.pdf
    model.add(Dense(num_classes))  # fully connected output layer
    model.add(Activation('softmax'))  # softmax the output within the range of (0 to 1) for prediction capabilities

    #  This compiles the model architecture and the necessary functions that we
    #  categorical crossentropy is the loss function for classification problems with more than 2 classes

    return model
 

finetune_model = build_finetune_model(IMG_SIZE, dropout=0.33, 
                                      num_classes=102)

In [36]:
train_datagen = ImageDataGenerator(rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2) # set validation split

train_generator = train_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training') # set as training data

validation_generator = train_datagen.flow_from_directory(
    TRAIN_DIR, # same directory as training data
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation') # set as validation data

Found 6587 images belonging to 102 classes.
Found 1602 images belonging to 102 classes.


In [0]:
finetune_model.compile(loss='categorical_crossentropy',
                  optimizer='rmsprop',  # a very nice optimization function with an adaptable learning rate
                  metrics=['accuracy'])

history = finetune_model.fit_generator(train_generator, epochs=30, workers=1, 
                                       steps_per_epoch=6500 // batch_size, 
                                       shuffle=True)


plot_training(history)

def plot_training(history):

    acc = history.history['acc']
    val_acc = history.history['val_acc']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs = range(len(acc))
    plt.plot(epochs, acc, 'r.')
    plt.plot(epochs, val_acc, 'r')
    plt.title('Training and validation accuracy')
    plt.show()
    plt.savefig('acc_vs_epochs.png')

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
  2/406 [..............................] - ETA: 9:50 - loss: 0.3315 - acc: 0.9375