In [1]:
#datasci libraries
import math
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

#tensorflow
import tensorflow as tf
from tensorflow.python import keras
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras import layers
from tensorflow.keras.layers import Input, Add, Dense, Dropout, Activation, ZeroPadding2D, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D, BatchNormalization
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.optimizers import Adam, RMSprop

#visualization
from matplotlib.pyplot import imshow
import cv2

#misc
import os.path
from pathlib import Path
import glob
import random
from sklearn.metrics import classification_report

The dataset already has a pretrained EfficientNet model but I want to make my own

In [None]:
os.listdir('/kaggle/input/100-bird-species')

Using some helper functions for deep learning from Github for fun

In [None]:
!wget https://raw.githubusercontent.com/mrdbourke/tensorflow-deep-learning/main/extras/helper_functions.py
from helper_functions import plot_loss_curves, walk_through_dir

The data is split into 3 folders (train, validation, test), and within each folder, there are 525 subfolders for each species. The number of images per species is around 150 for training and 5 for validation and testing.

In [None]:
dataset = "../input/100-bird-species/train"
walk_through_dir(dataset)

Will be using a batch size of 32 and image size of 224x224x3

In [None]:
train_path = '/kaggle/input/100-bird-species/train'
val_path = '/kaggle/input/100-bird-species/valid'
test_path = '/kaggle/input/100-bird-species/test'

batch = 32
imgres = (224, 224)

Concatenating all files into a dataframe for visualization

In [None]:
image_dir = Path(dataset)

filepaths = list(image_dir.glob(r'**/*.JPG')) + list(image_dir.glob(r'**/*.jpg')) + list(image_dir.glob(r'**/*.png')) + list(image_dir.glob(r'**/*.png'))

labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1], filepaths))

filepaths = pd.Series(filepaths, name='Filepath').astype(str)
labels = pd.Series(labels, name='Label')

image_df = pd.concat([filepaths, labels], axis=1)
print(image_df.shape)
image_df.head()

Shows a sample of 16 random bird images

In [None]:
fig, axes = plt.subplots(4, 4, figsize = (11, 11))
for i in range(4):
    for j in range(4):
        random_index = random.randint(0, len(image_df) - 1)
        file_path = image_df.loc[random_index, 'Filepath']
        label = image_df.loc[random_index, 'Label']
        img = plt.imread(file_path)
        axes[i, j].imshow(img)
        axes[i, j].set_title(label)
        axes[i, j].axis('off')
plt.show()

Rescale the RGB values to a range of [0, 1]

In [None]:
train_generator = ImageDataGenerator(rescale = 1./255)
val_generator = ImageDataGenerator(rescale = 1./255)
test_generator = ImageDataGenerator(rescale = 1./255)

Read train/val/test images straight from directory

In [None]:
train_images = train_generator.flow_from_directory(
    train_path,
    target_size=imgres,
    class_mode='categorical',
    batch_size=batch
)

val_images = val_generator.flow_from_directory(
    val_path,
    target_size=imgres,
    class_mode='categorical',
    batch_size=batch
)

test_images = test_generator.flow_from_directory(
    test_path,
    target_size=imgres,
    class_mode='categorical',
    batch_size=batch,
    shuffle = False
)

Using transfer learning to freeze the pretrained layers of Inception network and fine tune the rest. An Inception network works by having convolutional layers with different filter sizes in parallel to capture and combine features at different scales. It's great for image classification because it uses less parameters than a traditional deep CNN and can capture a diverse range of features to use in an image. Fun fact: it was named after Christopher Nolan's movie of the same name because of the quote "We need to go deeper", which is pretty much what it does -- go deeper.

In [None]:
inception = tf.keras.applications.InceptionV3(weights='imagenet',include_top=False,input_shape=(224,224,3))
inception.trainable = True
for layer in inception.layers[:197]:
    layer.trainable = False 
for layer in inception.layers:
    print(layer.name, '--', layer.trainable)

Make sure we know the shape of the last layer so we can add in our own stuff

In [None]:
last_layer = inception.get_layer('mixed7')
print('last layer output shape: ', last_layer.output_shape)
layer_output = last_layer.output

n_categories = len(os.listdir('/kaggle/input/100-bird-species/train'))
print(n_categories)

Adding a flatten layer to transition from convolution to fully connected, a final hidden layer with 1024 nodes, and a dropout layer to prevent overfitting. Our output will be a 525-class softmax for each bird species.

In [None]:
x = Flatten()(layer_output)
x = Dense(1024, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = Dropout(0.4)(x)
x = Dense(n_categories, activation='softmax')(x)

model = Model(inputs=inception.inputs, outputs=x)
model.compile(optimizer = Adam(learning_rate=0.0001), 
              loss = 'categorical_crossentropy', 
              metrics = ['accuracy'])

Using learning rate decay to gradually reduce step size to ensure we converge towards a more optimal solution as we get closer to our goal

In [None]:
def scheduler(epoch, lr):
    if epoch < 10:
        return lr
    else:
        return lr * tf.math.exp(-0.1)
    
callback = tf.keras.callbacks.LearningRateScheduler(scheduler)

Training time! Not sure if 25 epochs was too many or too few, but I don't want to redo it and I'm not sure how to implement checkpoints in training yet. My computer went to sleep twice which is why those message rate warnings are there.

In [None]:
history = model.fit(
            train_images,
            validation_data = val_images,
            epochs = 25,
            callbacks=[callback])

Looks like we had a bit of overfitting to the training set. Overall, I'm super happy with 99.6% for training and 94.93% for validation even though there's quite a bit of reduceable variance. Ways to reduce this could be data augmentation to get a bigger training set, early stopping to prevent overfitting, or maybe adding L2/dropout layers for regularization. Test accuracy stands even better at 97.07%, and I'll try uploading my own images of birds to the test set in a future project to see how that goes.

In [None]:
results = model.evaluate(test_images, verbose=0)

print("    Test Loss: {:.5f}".format(results[0]))
print("Test Accuracy: {:.2f}%".format(results[1] * 100))

Cool graphs. Not sure why validation accuracy started so high.

In [None]:
accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(accuracy))
plt.plot(epochs, accuracy, 'b', label='Training accuracy')
plt.plot(epochs, val_accuracy, 'r', label='Validation accuracy')

plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')

plt.title('Training and validation loss')
plt.legend()
plt.show()

Taking a look at our predictions on the test set

In [None]:
pred = model.predict(test_images)
pred = np.argmax(pred,axis=1)

labels = (train_images.class_indices)
labels = dict((v,k) for k,v in labels.items())
pred = [labels[k] for k in pred]

In [None]:
pred[:20]

Making a dataframe for the true labels of the test set so we can compare the predicted vs true labels

In [None]:
image_dir = Path(test_path)

filepaths = list(image_dir.glob(r'**/*.JPG')) + list(image_dir.glob(r'**/*.jpg')) + list(image_dir.glob(r'**/*.png')) + list(image_dir.glob(r'**/*.png'))

labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1], filepaths))

filepaths = pd.Series(filepaths, name='Filepath').astype(str)
labels = pd.Series(labels, name='Label')

test_df = pd.concat([filepaths, labels], axis=1)
test_df = test_df.sort_values(by='Filepath')
print(test_df.shape)
test_df.head()

Green for correct prediction, red for incorrect. Way cooler visualization method than a classification report

In [None]:
random_index = np.random.randint(0, len(test_df) - 1, 15)
fig, axes = plt.subplots(nrows=3, ncols=5, figsize=(25, 15),
                        subplot_kw={'xticks': [], 'yticks': []})

for i, ax in enumerate(axes.flat):
    ax.imshow(plt.imread(test_df.Filepath.iloc[random_index[i]]))
    if test_df.Label.iloc[random_index[i]] == pred[random_index[i]]:
        color = "green"
    else:
        color = "red"
    ax.set_title(f"True: {test_df.Label.iloc[random_index[i]]}\nPredicted: {pred[random_index[i]]}", color=color)
plt.show()
plt.tight_layout()

But I'll do a classification report anyway. Not sure if 5 examples per species is enough, maybe I could try a bigger validation/test set in the future

In [None]:
y_test = list(test_df.Label)
print(classification_report(y_test, pred))

And finally we save the model to use in the future. Hopefully TensorFlow doesn't bug out like last time so I can actually use the model I spent so long training again in the future.

In [None]:
model.save('inceptionv3_birdclassifier.h5')