<a href="https://colab.research.google.com/github/vaneesa-writes/deep_learning_projects/blob/main/birds_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.layers import Dropout, Flatten, Dense, Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import classification_report, confusion_matrix


In [2]:
import os
import numpy as np


In [3]:
def build_model(nbr_classes):

    base_model = InceptionV3(weights="imagenet", include_top=False, input_tensor=Input(shape=(229, 229, 3)))

    head_model = base_model.output
    head_model = Flatten()(head_model)
    head_model = Dense(512)(head_model)
    head_model = Dropout(0.5)(head_model)
    head_model = Dense(nbr_classes, activation="softmax")(head_model)

    model = Model(inputs=base_model.input, outputs=head_model)

    for layer in base_model.layers:
        layer.trainable = False

    return model

In [4]:
def build_data_pipelines(batch_size, train_data_path, val_data_path, eval_data_path):

    train_augmentor = ImageDataGenerator(
        rescale = 1. / 255,
        rotation_range=25,
        zoom_range=0.15,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.15,
        horizontal_flip=True,
        fill_mode="nearest"
    )

    val_augmentor = ImageDataGenerator(
        rescale = 1. / 255
    )

    train_generator = train_augmentor.flow_from_directory(
        train_data_path,
        class_mode="categorical",
        target_size=(229,229),
        color_mode="rgb",
        shuffle=True,
        batch_size=batch_size
    )

    val_generator = val_augmentor.flow_from_directory(
        val_data_path,
        class_mode="categorical",
        target_size=(229,229),
        color_mode="rgb",
        shuffle=False,
        batch_size=batch_size
    )

    eval_generator = val_augmentor.flow_from_directory(
        eval_data_path,
        class_mode="categorical",
        target_size=(229,229),
        color_mode="rgb",
        shuffle=False,
        batch_size=batch_size
    )


    return train_generator, val_generator, eval_generator


In [5]:
def get_number_of_imgs_inside_folder(directory):

    totalcount = 0

    for root, dirnames, filenames in os.walk(directory):
        for filename in filenames:
            _, ext = os.path.splitext(filename)
            if ext in [".png", ".jpg", ".jpeg"]:
                totalcount = totalcount + 1

    return totalcount

In [19]:
def train(path_to_data, batch_size, epochs):

    path_train_data = os.path.join(path_to_data, 'train')
    path_val_data = os.path.join(path_to_data, 'valid')
    path_eval_data = os.path.join(path_to_data, 'eval')
    print(path_eval_data)

    total_train_imgs = get_number_of_imgs_inside_folder(path_train_data)
    total_val_imgs = get_number_of_imgs_inside_folder(path_val_data)
    total_eval_imgs = get_number_of_imgs_inside_folder(path_eval_data)

    print(total_train_imgs, total_val_imgs, total_eval_imgs)
    

    train_generator, val_generator, eval_generator = build_data_pipelines(
        batch_size=batch_size,
        train_data_path=path_train_data,
        val_data_path=path_val_data,
        eval_data_path=path_eval_data
    )

    classes_dict = train_generator.class_indices

    model = build_model(nbr_classes=len(classes_dict.keys()))

    optimizer = Adam(lr=1e-5)

    model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])

    ckpt_saver = ModelCheckpoint(
        path_to_save_model,
        monitor="val_accuracy",
        mode='max',
        save_best_only=True,
        save_freq='epoch',
        verbose=1
    )

    model.fit_generator(
        train_generator,
        steps_per_epoch=total_train_imgs // batch_size,
        validation_data=val_generator,
        validation_steps=total_val_imgs // batch_size,
        epochs=epochs,
        callbacks=[ckpt_saver]
    )

    print("[INFO] Evaluation phase...")

    predictions = model.predict_generator(eval_generator)
    predictions_idxs = np.argmax(predictions, axis=1)

    my_classification_report = classification_report(eval_generator.classes, predictions_idxs, 
                                                     target_names=eval_generator.class_indices.keys())

    my_confusion_matrix = confusion_matrix(eval_generator.classes, predictions_idxs)

    print("[INFO] Classification report : ")
    print(my_classification_report)

    print("[INFO] Confusion matrix : ")
    print(my_confusion_matrix)


In [None]:
if __name__ == "__main__":

    path_to_data = '/content/'
    path_to_save_model = '/content/drive/MyDrive/models/birds_weights'
    train(path_to_data, 32, 20)

/content/eval
58388 2000 2000
Found 58388 images belonging to 400 classes.
Found 2000 images belonging to 400 classes.
Found 2000 images belonging to 400 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5


  super(Adam, self).__init__(name, **kwargs)


Epoch 1/20
Epoch 1: val_accuracy improved from -inf to 0.63609, saving model to /content/drive/MyDrive/models/birds_weights
Epoch 2/20
Epoch 2: val_accuracy improved from 0.63609 to 0.75353, saving model to /content/drive/MyDrive/models/birds_weights
Epoch 3/20
Epoch 3: val_accuracy improved from 0.75353 to 0.81552, saving model to /content/drive/MyDrive/models/birds_weights
Epoch 4/20
Epoch 4: val_accuracy improved from 0.81552 to 0.82913, saving model to /content/drive/MyDrive/models/birds_weights
Epoch 5/20
Epoch 5: val_accuracy improved from 0.82913 to 0.84829, saving model to /content/drive/MyDrive/models/birds_weights
Epoch 6/20
Epoch 6: val_accuracy improved from 0.84829 to 0.86643, saving model to /content/drive/MyDrive/models/birds_weights
Epoch 7/20
Epoch 7: val_accuracy improved from 0.86643 to 0.88155, saving model to /content/drive/MyDrive/models/birds_weights
Epoch 8/20
 128/1824 [=>............................] - ETA: 11:07 - loss: 1.5735 - accuracy: 0.6384

In [11]:
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json

mkdir: cannot create directory ‘/root/.kaggle’: File exists


In [14]:
!kaggle datasets download -d gpiosenka/100-bird-species

Downloading 100-bird-species.zip to /content
 99% 1.48G/1.49G [00:12<00:00, 158MB/s]
100% 1.49G/1.49G [00:13<00:00, 121MB/s]


In [None]:
!unzip /content/100-bird-species.zip