In [89]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import tensorflow_hub as hub
from tensorflow.keras import layers, models
import tf_keras
from sklearn.model_selection import train_test_split

In [81]:
labels_csv_df = pd.read_csv("dog-breed-data/labels.csv")
labels_csv_df.head(10)

Unnamed: 0,id,breed
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo
2,001cdf01b096e06d78e9e5112d419397,pekinese
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever
5,002211c81b498ef88e1b40b9abf84e1d,bedlington_terrier
6,00290d3e1fdd27226ba27a8ce248ce85,bedlington_terrier
7,002a283a315af96eaea0e28e7163b21b,borzoi
8,003df8b8a8b05244b1d920bb6cf451f9,basenji
9,0042188c895a2f14ef64a918ed9c7b64,scottish_deerhound


In [45]:
unique_breeds = sorted(labels_csv_df["breed"].unique().tolist())

In [47]:
labels_csv_df["breed_index"] = labels_csv_df["breed"].map({breed: idx for idx, breed in enumerate(unique_breeds)})

In [51]:
labels_csv_df.head(10)

Unnamed: 0,id,breed,breed_index
0,000bec180eb18c7604dcecc8fe0dba07,boston_bull,19
1,001513dfcb2ffafc82cccf4d8bbaba97,dingo,37
2,001cdf01b096e06d78e9e5112d419397,pekinese,85
3,00214f311d5d2247d5dfe4fe24b2303d,bluetick,15
4,0021f9ceb3235effd7fcde7f7538ed62,golden_retriever,49
5,002211c81b498ef88e1b40b9abf84e1d,bedlington_terrier,10
6,00290d3e1fdd27226ba27a8ce248ce85,bedlington_terrier,10
7,002a283a315af96eaea0e28e7163b21b,borzoi,18
8,003df8b8a8b05244b1d920bb6cf451f9,basenji,7
9,0042188c895a2f14ef64a918ed9c7b64,scottish_deerhound,97


#### Load Image labels

In [71]:
IMAGE_FOLDER = "dog-breed-data/train"
IMAGE_SIZE = (224,224)
def load_image(image_id,label):
    image_path = tf.strings.join([IMAGE_FOLDER, "/", image_id, ".jpg"])
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, IMAGE_SIZE)
    image = image/255.0
    return image, label

# load_image("0021f9ceb3235effd7fcde7f7538ed62",49)

In [72]:
image_ids = labels_csv_df["id"].values
labels = labels_csv_df["breed_index"].values

#### Create TF dataset

In [73]:
dataset = tf.data.Dataset.from_tensor_slices((image_ids, labels)) # Still raw data

In [74]:
dataset = dataset.map(load_image, num_parallel_calls=tf.data.AUTOTUNE) # Actual data loading

In [76]:
BATCH_SIZE = 32
dataset = dataset.shuffle(buffer_size=1000)
dataset = dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

#### Build model

In [85]:
MODEL_URL = "https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/classification/4"

feature_extractor_layer = hub.KerasLayer(
    MODEL_URL,
    input_shape=(224, 224, 3),
    trainable=False  # freeze weights
)

In [95]:
num_classes = labels_csv_df["breed"].nunique()

model = tf_keras.Sequential([
    feature_extractor_layer,
    tf_keras.layers.Dense(units=num_classes, activation='softmax')  # your dog breed classes
])

In [96]:
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

#### Train model

In [97]:
history = model.fit(dataset, epochs=10)  # Adjust epochs as needed

Epoch 1/10


2025-04-20 12:59:53.721394: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [98]:
test_loss, test_acc = model.evaluate(dataset)
print(f"Test accuracy: {test_acc}")

Test accuracy: 0.9978477954864502


In [100]:
model.save('models/dog_breed_model_updated.h5')

#### Predict on sample image

In [109]:
loaded_model = tf_keras.models.load_model("models/dog_breed_model_updated.h5",custom_objects={"KerasLayer": hub.KerasLayer})

from PIL import Image
# --- Set constants ---
IMAGE_SIZE = (224, 224)  # Input size used during training
CLASS_NAMES = unique_breeds  # The list of breed names (same order as label encoding)

def predict_dog_breed(image_path):
    # Extract the image ID from filename
    image_id = os.path.splitext(os.path.basename(image_path))[0]

    # Get true label from CSV
    true_label_row = labels_csv_df[labels_csv_df["id"] == image_id]
    if not true_label_row.empty:
        true_label = true_label_row["breed"].values[0]
    else:
        true_label = "Unknown"

    # Load and preprocess image
    img = Image.open(image_path).convert("RGB")
    img = img.resize(IMAGE_SIZE)
    img_array = np.array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)

    # Predict
    predictions = model.predict(img_array)
    predicted_index = np.argmax(predictions, axis=1)[0]
    predicted_breed = CLASS_NAMES[predicted_index]
    confidence = predictions[0][predicted_index] * 100  # convert to %

    # Print results
    print(f"✅ True Label      : {true_label}")
    print(f"🔮 Predicted Label : {predicted_breed}")
    print(f"📊 Confidence      : {confidence:.2f}%")

In [114]:
image_path = "dog-breed-data/test/0a50874d29b277cc889c0aa1cc8cd03c.jpg"
print("Predicted Breed:", predict_dog_breed(image_path))

✅ True Label      : Unknown
🔮 Predicted Label : great_dane
📊 Confidence      : 99.14%
Predicted Breed: None
