In [53]:
import os
import kagglehub
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import pandas as pd
from tqdm import tqdm
import cv2
from src import ImageClassifier

In [54]:
# Load dataset
path = os.path.join(
    kagglehub.dataset_download("rahmasleam/intel-image-dataset"), "Intel Image Dataset"
)
print("Dataset path:", path)

Dataset path: /Users/vladyoslav/.cache/kagglehub/datasets/rahmasleam/intel-image-dataset/versions/1/Intel Image Dataset


In [55]:
# Get list of categories
categories = list(filter(lambda category: category != ".DS_Store", os.listdir(path)))
print("Categories:", categories)

Categories: ['forest', 'buildings', 'glacier', 'street', 'mountain', 'sea']


In [56]:
# Function to get image arrays and labels
def get_image_arrays(dataset_path):
    """
    Retrieve the image arrays and their corresponding labels.

    Args:
        dataset_path (str): Path to the root folder containing images.

    Returns:
        tuple: A tuple containing two lists:
            - List of image arrays (np.ndarray).
            - List of labels for each image.
    """
    images = []
    labels = []

    for category in categories:
        category_path = os.path.join(dataset_path, category)
        for img_file in os.listdir(category_path):
            img_path = os.path.join(category_path, img_file)
            if os.path.isfile(img_path) and img_path.lower().endswith(
                (".png", ".jpg", ".jpeg")
            ):
                image = cv2.imread(img_path)
                if image is not None:  # Ensure the image was successfully loaded
                    images.append(image)
                    labels.append(category)

    return images, labels

In [57]:
# Get all image arrays and labels
images, labels = get_image_arrays(path)

# Split the dataset into training and test sets
train_images, test_images, train_labels, test_labels = train_test_split(
    images, labels, test_size=0.2, random_state=52
)

# Initialize the classifier
classifier = ImageClassifier()

# Train the model on the training set
classifier.fit(train_images, train_labels)

Extracting features from images...
Training the Random Forest classifier...
Training complete!


In [58]:
# Predict and evaluate the F1 score on the test set
y_true = []
y_pred = []
category_stats = {
    category: {"total": 0, "correct": 0, "incorrect": 0} for category in categories
}

# Loop through the test images and predict their categories
for img, true_label in tqdm(zip(test_images, test_labels), total=len(test_images)):
    # Get predicted probabilities and categorize the result
    predicted_probs = classifier.predict(img)
    predicted_label = max(predicted_probs, key=predicted_probs.get)

    # Add true and predicted labels to lists
    y_true.append(true_label)
    y_pred.append(predicted_label)

    # Update category statistics
    category_stats[true_label]["total"] += 1
    if predicted_label == true_label:
        category_stats[true_label]["correct"] += 1
    else:
        category_stats[true_label]["incorrect"] += 1

  5%|▍         | 28/600 [00:00<00:02, 279.70it/s]

Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting f

 14%|█▍        | 83/600 [00:00<00:01, 266.58it/s]

Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting f

 23%|██▎       | 139/600 [00:00<00:01, 273.83it/s]

Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting f

 32%|███▎      | 195/600 [00:00<00:01, 273.25it/s]

Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting f

 42%|████▏     | 252/600 [00:00<00:01, 273.93it/s]

Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting f

 52%|█████▏    | 310/600 [00:01<00:01, 277.29it/s]

Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting f

 61%|██████    | 366/600 [00:01<00:00, 275.31it/s]

Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting f

 70%|███████   | 422/600 [00:01<00:00, 276.05it/s]

Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting f

 80%|███████▉  | 478/600 [00:01<00:00, 275.83it/s]

Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting f

 89%|████████▉ | 534/600 [00:01<00:00, 274.10it/s]

Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting f

100%|██████████| 600/600 [00:02<00:00, 274.75it/s]

Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting features for prediction...
Extracting f




In [59]:
# Calculate F1 score
f1 = f1_score(y_true, y_pred, average="weighted")
print(f"Overall F1 score: {f1:.4f}")

Overall F1 score: 0.1256


In [60]:
# Prepare the category-wise report
report_data = []
for category in categories:
    total = category_stats[category]["total"]
    correct = category_stats[category]["correct"]
    incorrect = category_stats[category]["incorrect"]
    accuracy_category = correct / total if total > 0 else 0
    report_data.append(
        {
            "Category": category,
            "Total": total,
            "Correct": correct,
            "Incorrect": incorrect,
            "Accuracy (%)": accuracy_category * 100,
        }
    )

# Convert to DataFrame for easy display
report_df = pd.DataFrame(report_data)

# Add a row for the total F1 score (this will not be a weighted score, so it's not directly comparable)
report_df.loc["Total"] = report_df.sum(numeric_only=True)
report_df.loc["Total", "Category"] = "Total"
report_df.loc["Total", "Accuracy (%)"] = f1 * 100

# Print the report table
print(report_df)

        Category  Total  Correct  Incorrect  Accuracy (%)
0         forest   97.0      4.0       93.0      4.123711
1      buildings   89.0      5.0       84.0      5.617978
2        glacier  102.0     31.0       71.0     30.392157
3         street  110.0     13.0       97.0     11.818182
4       mountain  110.0     19.0       91.0     17.272727
5            sea   92.0      5.0       87.0      5.434783
Total      Total  600.0     77.0      523.0     12.561602
