# Model Design , Dataset and Train Development

In [1]:
# Import necessary libraries
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image

# Dataset and transformation definitions
# Include all relevant dataset and transform code here (as in your current script)

# Define a custom PyTorch dataset
class AnimalDataset(Dataset):
    def __init__(self, data_dir, classes, transform=None):
        self.data_dir = data_dir
        self.classes = classes
        self.transform = transform
        self.images = []
        self.labels = []

        # Load images and their labels
        for label, class_name in enumerate(classes):
            class_dir = os.path.join(data_dir, class_name)
            for img_name in os.listdir(class_dir):
                img_path = os.path.join(class_dir, img_name)
                self.images.append(img_path)
                self.labels.append(label)

        # Print the number of loaded images and their labels
        print(f"Loaded {len(self.images)} images with labels: {set(self.labels)}")

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        label = self.labels[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label

# Transformation pipeline for the images
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to 224x224
    transforms.RandomHorizontalFlip(),  # Randomly flip images horizontally
    transforms.RandomRotation(15),     # Randomly rotate images within 15 degrees
    transforms.ColorJitter(brightness=0.2, contrast=0.2),  # Adjust brightness and contrast
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),  # Normalize
])

# Define a simple custom model
class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),  # Added another Conv2D layer
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 28 * 28, 256),  # Adjusted dimensions
            nn.ReLU(),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

# Initialize weights using Xavier initialization
def initialize_weights(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)


# Model definition and initialization
# Include your model definition and `initialize_weights`

# Set up dataset and data loaders
data_dir = "./images"
classes = ["elephant", "giraffe", "kangaroo", "penguin", "tiger", "zebra"]
num_classes = len(classes)
batch_size = 8
num_epochs = 10
learning_rate = 0.001

# Dataset loading and splitting
dataset = AnimalDataset(data_dir, classes, transform=transform)
# Display 2 samples per label for debugging
label_count = {label: 0 for label in range(len(classes))}
for idx in range(len(dataset)):
    image, label = dataset[idx]
    if label_count[label] < 2:
        print(f"Image: {dataset.images[idx]}, Label: {label}")
        label_count[label] += 1
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, _ = torch.utils.data.random_split(dataset, [train_size, test_size])
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Model initialization
model = SimpleCNN(num_classes=num_classes)
model.apply(initialize_weights)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

# Training loop
for epoch in range(num_epochs):
    print(f"Starting epoch {epoch + 1} of {num_epochs}")
    model.train()
    running_loss = 0.0
    for i, (images, labels) in enumerate(train_loader):
        if i % 10 == 0:
            print(f"Processing batch {i + 1}/{len(train_loader)}")
        outputs = model(images)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    scheduler.step()
    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(train_loader):.4f}")

# Save the model
torch.save(model.state_dict(), "animal_classification_model.pth")


Loaded 180 images with labels: {0, 1, 2, 3, 4, 5}
Image: ./images\elephant\input_10.png, Label: 0
Image: ./images\elephant\input_11.png, Label: 0
Image: ./images\giraffe\input_11.png, Label: 1
Image: ./images\giraffe\input_12.png, Label: 1
Image: ./images\kangaroo\input_11.png, Label: 2
Image: ./images\kangaroo\input_12.png, Label: 2
Image: ./images\penguin\input_102.png, Label: 3
Image: ./images\penguin\input_15.png, Label: 3
Image: ./images\tiger\input_1.png, Label: 4
Image: ./images\tiger\input_13.png, Label: 4
Image: ./images\zebra\input_14.png, Label: 5
Image: ./images\zebra\input_17.png, Label: 5
Starting epoch 1 of 10
Processing batch 1/18
Processing batch 11/18
Epoch 1/10, Loss: 2.3263
Starting epoch 2 of 10
Processing batch 1/18
Processing batch 11/18
Epoch 2/10, Loss: 1.6342
Starting epoch 3 of 10
Processing batch 1/18
Processing batch 11/18
Epoch 3/10, Loss: 1.3133
Starting epoch 4 of 10
Processing batch 1/18
Processing batch 11/18
Epoch 4/10, Loss: 1.1655
Starting epoch 5 o

# Test Set, Prediction Defines and Evaluation

In [2]:
# Import necessary libraries
import os
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
from sklearn.metrics import classification_report

# Dataset and transformation definitions
# Use the same dataset and transform code as in the training notebook

# Model definition
# Include your model definition

# Load dataset and test data
data_dir = "./images"
classes = ["elephant", "giraffe", "kangaroo", "penguin", "tiger", "zebra"]
batch_size = 8

# Dataset loading and splitting
dataset = AnimalDataset(data_dir, classes, transform=transform)
_, test_dataset = torch.utils.data.random_split(dataset, [int(0.8 * len(dataset)), len(dataset) - int(0.8 * len(dataset))])
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Load the saved model
model = SimpleCNN(num_classes=len(classes))
model.load_state_dict(torch.load("animal_classification_model.pth"))
model.eval()

# Evaluation
y_true = []
y_pred = []
with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        y_true.extend(labels.numpy())
        y_pred.extend(predicted.numpy())

print("Classification Report:")
print(classification_report(y_true, y_pred, target_names=classes))

# Example prediction on a single image
def predict_image(image_path, model, transform):
    model.eval()
    image = Image.open(image_path).convert("RGB")
    if transform:
        image = transform(image).unsqueeze(0)
    with torch.no_grad():
        output = model(image)
        _, predicted = torch.max(output, 1)
    return classes[predicted.item()]

# Test prediction
example_image_path = "Sumatran_Tiger_Berlin_Tierpark.jpg"
predicted_class = predict_image(example_image_path, model, transform)
print(f"Predicted Class: {predicted_class}")


Loaded 180 images with labels: {0, 1, 2, 3, 4, 5}


  model.load_state_dict(torch.load("animal_classification_model.pth"))


Classification Report:
              precision    recall  f1-score   support

    elephant       0.86      0.75      0.80         8
     giraffe       1.00      1.00      1.00         8
    kangaroo       0.50      0.67      0.57         3
     penguin       1.00      1.00      1.00         5
       tiger       0.86      0.86      0.86         7
       zebra       1.00      1.00      1.00         5

    accuracy                           0.89        36
   macro avg       0.87      0.88      0.87        36
weighted avg       0.90      0.89      0.89        36

Predicted Class: tiger


# 1. Performance Analysis
Classification Report
The model achieved an accuracy of 89% on the test set, which is a strong result. Key highlights include:

Elephant: Precision = 0.86, Recall = 0.75, indicating a good balance but room for improvement in recall.
Giraffe: Perfect performance (Precision, Recall, and F1-score = 1.00).
Kangaroo: Precision = 0.50, Recall = 0.67, suggesting the model predicts this class conservatively.
Penguin, Tiger, and Zebra: Performed exceptionally well, with Precision and Recall ≥ 0.86.
Overall
The model is robust but struggles slightly with kangaroo and elephant, likely due to:

Data imbalance: If these classes have fewer samples or more noisy images.
Visual similarity: Confusion with other classes (e.g., elephant and giraffe in similar environments).