In [1]:
import os
import numpy as np
import pandas as pd
import cv2
from glob import glob

import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam

from sklearn.model_selection import train_test_split

In [2]:
def build_model(size, num_classes):
    inputs = Input((size, size, 3))
    backbone = MobileNetV2(input_tensor=inputs, include_top=False, weights="imagenet")
    backbone.trainable = True
    x = backbone.output
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.2)(x)
    x = Dense(1024, activation="relu")(x)
    x = Dense(num_classes, activation="softmax")(x)

    model = tf.keras.Model(inputs, x)
    return model

In [3]:
def read_image(path, size):
    image = cv2.imread(path, cv2.IMREAD_COLOR)
    image = cv2.resize(image, (size, size))
    image = image / 255.0
    image = image.astype(np.float32)
    return image

In [4]:
def parse_data(x, y):
    x = x.decode()

    num_class = 120
    size = 224

    image = read_image(x, size)
    label = [0] * num_class
    label[y] = 1
    label = np.array(label)
    label = label.astype(np.int32)

    return image, label

In [5]:
def tf_parse(x, y):
    x, y = tf.numpy_function(parse_data, [x, y], [tf.float32, tf.int32])
    x.set_shape((224, 224, 3))
    y.set_shape((120))
    return x, y

In [6]:
def tf_dataset(x, y, batch=8):
    dataset = tf.data.Dataset.from_tensor_slices((x, y))
    dataset = dataset.map(tf_parse)
    dataset = dataset.batch(batch)
    dataset = dataset.repeat()
    return dataset

In [7]:
path = "dog_breed/"
train_path = os.path.join(path, "train/*")
test_path = os.path.join(path, "test/*")
labels_path = os.path.join(path, "labels.csv")


In [8]:
labels_df = pd.read_csv(labels_path)
breed = labels_df["breed"].unique()
print("Number of Breed: ", len(breed))


Number of Breed:  120


In [9]:
breed2id = {name: i for i, name in enumerate(breed)}

In [10]:
ids = glob(train_path)
labels = []

In [12]:
for image_id in ids:
    image_id = image_id.split("/")[-1].split("n")[-1].split(".")[0]
    try:
        breed_name = list(labels_df[labels_df.id == image_id[1:]]["breed"])[0]
        breed_idx = breed2id[breed_name]
        labels.append(breed_idx)
    except:
        print(image_id[:])

In [13]:
ids = ids[:1000]
labels = labels[:1000]
print(ids[0:1000])
print(labels[0:1000])

['dog_breed/train\\000bec180eb18c7604dcecc8fe0dba07.jpg', 'dog_breed/train\\001513dfcb2ffafc82cccf4d8bbaba97.jpg', 'dog_breed/train\\001cdf01b096e06d78e9e5112d419397.jpg', 'dog_breed/train\\00214f311d5d2247d5dfe4fe24b2303d.jpg', 'dog_breed/train\\0021f9ceb3235effd7fcde7f7538ed62.jpg', 'dog_breed/train\\002211c81b498ef88e1b40b9abf84e1d.jpg', 'dog_breed/train\\00290d3e1fdd27226ba27a8ce248ce85.jpg', 'dog_breed/train\\002a283a315af96eaea0e28e7163b21b.jpg', 'dog_breed/train\\003df8b8a8b05244b1d920bb6cf451f9.jpg', 'dog_breed/train\\0042188c895a2f14ef64a918ed9c7b64.jpg', 'dog_breed/train\\004396df1acd0f1247b740ca2b14616e.jpg', 'dog_breed/train\\0067dc3eab0b3c3ef0439477624d85d6.jpg', 'dog_breed/train\\00693b8bc2470375cc744a6391d397ec.jpg', 'dog_breed/train\\006cc3ddb9dc1bd827479569fcdc52dc.jpg', 'dog_breed/train\\0075dc49dab4024d12fafe67074d8a81.jpg', 'dog_breed/train\\00792e341f3c6eb33663e415d0715370.jpg', 'dog_breed/train\\007b5a16db9d9ff9d7ad39982703e429.jpg', 'dog_breed/train\\007b8a078828

In [14]:
## Spliting the dataset
train_x, valid_x = train_test_split(ids, test_size=0.2, random_state=42)
train_y, valid_y = train_test_split(labels, test_size=0.2, random_state=42)

In [15]:
## Parameters
size = 224
num_classes = 120
lr = 1e-4
batch = 16
epochs = 10

In [16]:
## Model
model = build_model(size, num_classes)
model.compile(loss="categorical_crossentropy", optimizer=Adam(lr), metrics=["acc"])
# model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5


In [17]:
## Dataset
train_dataset = tf_dataset(train_x, train_y, batch=batch)
valid_dataset = tf_dataset(valid_x, valid_y, batch=batch)

In [18]:
## Training
callbacks = [
    ModelCheckpoint("model.h5", verbose=1, save_best_only=True),
    ReduceLROnPlateau(factor=0.1, patience=5, min_lr=1e-6)
]
train_steps = (len(train_x)//batch) + 1
valid_steps = (len(valid_x)//batch) + 1
model.fit(train_dataset,
        steps_per_epoch=train_steps,
        validation_steps=valid_steps,
        validation_data=valid_dataset,
        epochs=epochs,
        callbacks=callbacks)

Epoch 1/10
Epoch 00001: val_loss improved from inf to 4.45511, saving model to model.h5
Epoch 2/10
Epoch 00002: val_loss improved from 4.45511 to 3.89181, saving model to model.h5
Epoch 3/10
Epoch 00003: val_loss improved from 3.89181 to 3.38448, saving model to model.h5
Epoch 4/10
Epoch 00004: val_loss improved from 3.38448 to 3.13200, saving model to model.h5
Epoch 5/10
Epoch 00005: val_loss improved from 3.13200 to 3.00485, saving model to model.h5
Epoch 6/10
Epoch 00006: val_loss improved from 3.00485 to 2.93436, saving model to model.h5
Epoch 7/10
Epoch 00007: val_loss improved from 2.93436 to 2.89521, saving model to model.h5
Epoch 8/10
Epoch 00008: val_loss improved from 2.89521 to 2.84883, saving model to model.h5
Epoch 9/10
Epoch 00009: val_loss improved from 2.84883 to 2.81632, saving model to model.h5
Epoch 10/10
Epoch 00010: val_loss improved from 2.81632 to 2.79742, saving model to model.h5


<tensorflow.python.keras.callbacks.History at 0x1fb8da20370>