In [None]:
!pip install kaggle

In [None]:
from google.colab import files
files.upload()

In [None]:
import os

In [None]:
!mkdir -p ~/.kaggle
!cp /content/kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d anandkumarsahu09/cattle-breeds-dataset

In [None]:
!pwd


In [None]:
!ls

In [None]:
!unzip cattle-breeds-dataset.zip -d/content/cow_breed_data

In [None]:
!ls /content/cow_breed_data

In [None]:
!ls /content/cow_breed_data/Cattle\ Breeds

In [None]:
# items = os.listdir('/content/cow_breed_data/Cattle Breeds')
# items

In [None]:
os.listdir('/content/cow_breed_data/Cattle Breeds/Brown Swiss cattle')

In [None]:
import os.path
'''dirpath:root,
dirnames:subdirectories in current directory(dirs):['subdir1','subdir2']
filenames:files in current directory'''
data = []
for dirpath,dirnames,filenames in os.walk('/content/cow_breed_data'):
  if not filenames:
    continue
  i = 0
  for file in filenames:
    i+=1
    if file.lower().endswith(('.jpg','jpeg','.png')):
      full_path = os.path.join(dirpath,file)
      relative_path = os.path.relpath(full_path,'/content/cow_breed_data')
      breed_class = os.path.basename(dirpath)

      data.append({
          'image_path':relative_path,
          'breed_class':breed_class,
          'full_path':full_path
      })
print(f"found{len(data)} images")


In [None]:
from collections import Counter
item = [d['breed_class'] for d in data]
item_=dict(Counter(item))
item_


In [None]:
items = os.listdir('/content/cow_breed_data/Cattle Breeds')
items.sort()
label_map = {name:idx for idx,name in enumerate(items)}
class_name = items

for name,idx in label_map.items():
  print(f"{idx}:{name}")

for d in data:
  d['label'] = label_map[d['breed_class']]
data[:2]

In [None]:
from sklearn.model_selection import train_test_split

labels = [d['label'] for d in data]

train_data,test_data = train_test_split(
    data,
    test_size = 0.2,
    random_state=42,
    stratify=labels#ensures that training data is distributed by 80%  of each breed and test_data has 20% of each breed
)

train_classes = Counter([d['label'] for d in train_data])
test_classes = Counter([d['label'] for d in test_data])

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Dense,Flatten,Conv2D
from tensorflow.keras import Model
from io import BytesIO

In [None]:
def read_images(add_list):
  images = []

  for d in add_list:
    path = d['full_path']
    img = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img,channels=3)
    img = tf.image.resize(img,[300,300])
    img = tf.cast(img,tf.float32)/255.0
    images.append(img)
  return tf.stack(images)#combine a list of tensors into single tensor



In [None]:
import pandas as pd
items = os.listdir('/content/cow_breed_data/Cattle Breeds')
items.sort()
label_map = {name:idx for idx,name in enumerate(items)}

for d in data:
  d['label'] = label_map[d['breed_class']]




In [None]:
import pandas as pd
def read_label(add_list):
  labels = [d['label'] for d in add_list]

  return tf.constant(labels,dtype=tf.int32)

In [None]:
X_train = read_images(train_data)
y_train = read_label(train_data)
X_test = read_images(test_data)
y_test = read_label(test_data)
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")

train_dataset = tf.data.Dataset.from_tensor_slices((X_train,y_train)).batch(32).prefetch(tf.data.AUTOTUNE)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test,y_test)).batch(32).prefetch(tf.data.AUTOTUNE)

In [None]:
from tensorflow import keras
from tensorflow.keras import layers
def create_custom_cnn_functional(input_shape=(300,300,3),num_classes=5):
  #Create input TENSOR (placeholder for data)
  inputs = keras.Input(shape=input_shape)

  #Apply layer to the tensor, get output tensor
  x = layers.Conv2D(16,(3,3),activation='relu')(inputs)
  x = layers.BatchNormalization()(x)
  x = layers.MaxPooling2D((2,2))(x)

  x = layers.Conv2D(32,(3,3),activation='relu')(x)
  x = layers.BatchNormalization()(x)
  x = layers.MaxPooling2D((2,2))(x)

  x = layers.Conv2D(64,(3,3),activation='relu')(x)
  x = layers.BatchNormalization()(x)
  x = layers.MaxPooling2D((2,2))(x)

  x = layers.Conv2D(128,(3,3),activation='relu')(x)
  x = layers.BatchNormalization()(x)
  x = layers.MaxPooling2D((2,2))(x)

  x = layers.Conv2D(256,(3,3),activation='relu')(x)
  x = layers.BatchNormalization()(x)
  x = layers.MaxPooling2D((2,2))(x)

  x = layers.Conv2D(512,(3,3),activation='relu')(x)
  x = layers.BatchNormalization()(x)
  x = layers.MaxPooling2D((2,2))(x)


  #Classification
  #Flatten : keeps every pixels info
  x = layers.GlobalAveragePooling2D()(x)#keeps summary of each feature map
  x = layers.Dense(256,activation='relu')(x)#it has mix of small  traits
  x = layers.Dropout(0.5)(x)
  x = layers.Dense(128,activation='relu')(x)#combine smaller clues into patterns which distinguish between different breeds
  x = layers.Dropout(0.3)(x)
  outputs = layers.Dense(num_classes,activation='softmax')(x)

  model = keras.Model(inputs=inputs,outputs=outputs)
  return model



In [None]:
model = create_custom_cnn_functional()
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.0005),
    loss = 'sparse_categorical_crossentropy',
    metrics=['accuracy']
)
model.summary()

In [None]:
from tensorflow.keras.callbacks import EarlyStopping,ModelCheckpoint
early_stop = EarlyStopping(
    monitor = 'val_loss',
    patience = 20,
    restore_best_weights=True
)

best_model = ModelCheckpoint(
    #after training file will be saved at
    'best_cattle_model.keras',
    monitor = 'val_accuracy',
    save_best_only=True,
    verbose=1
)

callbacks = [early_stop,best_model]

In [None]:
history = model.fit(
    train_dataset,
    validation_data=test_dataset,
    epochs=100,
    callbacks=callbacks,
    verbose=1
)

In [None]:
# ModelCheckpoint saves the best model during training
# It monitors validation accuracy and keeps the model only when it improves

# train_dataset and test_dataset are created using tf.data for efficient loading
# .batch(32) splits data into groups of 32 samples each
# .prefetch() helps speed up training by preparing data while the model trains

# batch_size in model.fit() is NOT needed if using tf.data.Dataset (already batched)

# model.fit() shows estimated accuracy after each batch (may look slightly higher)
# model.evaluate() checks accuracy on the whole dataset (more exact)
# So, small difference between train_accuracy during training and evaluate() is normal


In [None]:
test_loss,test_accuracy = model.evaluate(test_dataset,verbose=1)

train_loss,train_accuracy = model.evaluate(train_dataset)

step_accuracy → accuracy for one batch during training
calculated after each training step on that small batch

val_accuracy → accuracy on unseen validation data
calculated after each full epoch (after all batches)



In [None]:
test_accuracy

In [None]:
import matplotlib.pyplot as plt

fig, axes = plt.subplots(1, 2, figsize=(15, 5))

axes[0].plot(history.history['accuracy'],label='Training Accuracy')
axes[0].plot(history.history['val_accuracy'],label='validation Accuracy')
axes[0].set_title('Model Accuracy Over Epochs')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Accuracy')
axes[0].legend()
axes[0].grid(True)

axes[1].plot(history.history['loss'],label="training loss")
axes[1].plot(history.history['val_loss'],label='validation loss')
axes[1].set_title('Model Loss Over Epochs')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Loss')
axes[1].legend()
axes[1].grid(True)

plt.tight_layout()
plt.show()

Make Predictions

In [None]:
import numpy as np

predictions = model.predict(test_dataset)
class_names = os.listdir('/content/cow_breed_data/Cattle Breeds')
predicted_classes = np.argmax(predictions,axis=1)
predicted_classes[:10]
y_test[:10].numpy()

In [None]:
for i in range(10):
  true_label = y_test[i].numpy()
  pred_label = predicted_classes[i]
  confidence = predictions[i][pred_label]*100
  print(f"Image {i+1}:")
  print(f"  True: {class_names[true_label]}")
  print(f"  Predicted: {class_names[pred_label]} ({confidence:.2f}% confidence)")
  print(f"  Correct: {'yes' if true_label == pred_label else 'No'}")
  print()

In [None]:
def show_predictions(model, X_test, y_test, num_images=9):
  fig,axes = plt.subplots(3,3,figsize=(15,15))
  axes = axes.ravel()

  indices = np.random.choice(len(X_test),num_images,replace=False)

  for i,idx in enumerate(indices):

    img = X_test[idx]
    true_lable = y_test[idx].numpy()
    img_batch = tf.expand_dims(img, 0)  # Add batch dimension

    pred = model.predict(img_batch)
    pred_label = np.argmax(pred[0])
    confidence = pred[0][pred_label] * 100

    axes[i].imshow(img)
    axes[i].axis('off')

    color = 'green' if true_label == pred_label else 'red'
    title = f"True: {class_names[true_label]}\n"
    title += f"Pred: {class_names[pred_label]}\n"
    title += f"Confidence: {confidence:.1f}%"

    axes[i].set_title(title, color=color, fontsize=10)
  plt.tight_layout()
  plt.show()

In [None]:
show_predictions(model, X_test, y_test, num_images=9)
