In [1]:
import os
import cv2
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

In [2]:
def calculate_histogram(image):
    # Convert image to HSV color space
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    # Calculate histogram for the hue channel
    hist = cv2.calcHist([hsv_image], [0], None, [180], [0, 180])
    # Normalize histogram
    hist = cv2.normalize(hist, hist)
    return hist.flatten()

In [3]:
def extract_color_histogram(image, bins):
    hist_red = np.histogram(image[:, :, 0], bins=bins, range=(0, 256))[0]
    hist_green = np.histogram(image[:, :, 1], bins=bins, range=(0, 256))[0]
    hist_blue = np.histogram(image[:, :, 2], bins=bins, range=(0, 256))[0]
    res = np.concatenate((hist_red, hist_green, hist_blue))
    return res

In [4]:
def load_data(folder):
    X = []
    y = []
    classes = os.listdir(folder)
    total_images = 0
    for cls in classes:
        class_folder = os.path.join(folder, cls)
        num_images = len(os.listdir(class_folder))
        print()
        print(f"Loading data for class {cls} with {num_images} images.")
        total_images += num_images
        curr = 0
        for filename in os.listdir(class_folder):
            image_path = os.path.join(class_folder, filename)
            image = cv2.imread(image_path)
            if image is not None:
                hist = extract_color_histogram(image,17)
                X.append(hist)
                y.append(cls)
                curr += 1
                if curr %100 == 0 :
                    print(curr, end = "=>")
    print(f"Total images loaded: {total_images}")
    return [np.array(X), np.array(y)]

In [5]:
def plot_histogram(hist, cls):
    plt.figure()
    plt.title(f'Color Histogram for Class {cls}')
    plt.xlabel('Hue Value')
    plt.ylabel('Frequency')
    plt.plot(hist, color='blue')
    plt.show()

In [6]:
validation_folder = "D:/Study\COMP 6721/COMP6721-project/Dataset/Validation"
features = load_data(validation_folder)
print(features)
# X_val, y_val = load_data(validation_folder)


Loading data for class airfield with 1000 images.
100=>200=>300=>400=>500=>600=>700=>800=>900=>1000=>
Loading data for class bus stand with 1000 images.
100=>200=>300=>400=>500=>600=>700=>800=>900=>1000=>
Loading data for class canyon with 1000 images.
100=>200=>300=>400=>500=>600=>700=>800=>900=>1000=>
Loading data for class market with 1000 images.
100=>200=>300=>400=>500=>600=>700=>800=>900=>1000=>
Loading data for class temple with 1000 images.
100=>200=>300=>400=>500=>600=>700=>800=>900=>1000=>Total images loaded: 5000
[array([[  4323,   5056,   6082, ...,  18237,  18311,   5054],
       [  2681,   5349,  14025, ...,   6932,   6151,   6413],
       [ 42222,  40671,  34653, ...,  63591,  47005, 119177],
       ...,
       [     3,   1165,  14878, ...,  10273,  11599,  16484],
       [  8398,  28284,  44513, ...,   5732,   3009,   1513],
       [ 39745,  38691,  48036, ...,  14480,   2497,    117]], dtype=int64), array(['airfield', 'airfield', 'airfield', ..., 'temple', 'temple',
 

In [250]:
# Train decision tree classifier
# classifier = DecisionTreeClassifier()
classifier = DecisionTreeClassifier(criterion='entropy',max_depth=10, max_features=5)
classifier.fit(X_val, y_val)

In [251]:
# Evaluate classifier
y_pred = classifier.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.5424666666666667


In [252]:
def classify_image(image_path, classifier):
    # Load the image
    image = cv2.imread(image_path)
    if image is None:
        print(f"Failed to load image at {image_path}")
        return None

    # Calculate histogram for the image
    hist = extract_color_histogram(image,17)

    # Reshape the histogram to match the shape expected by the classifier
    hist = hist.reshape(1, -1)

    # Predict the class of the image
    predicted_class = classifier.predict(hist)[0]
    return predicted_class

In [253]:
# test_path = "D:/Study/COMP 6721/Project/COMP6721-project/Dataset/Test/airfield/00000014.jpg"
# predicted_class = classify_image(image_path, classifier)
# print("Predicted class:", predicted_class)

test_path = "D:/Study/COMP 6721/Project/COMP6721-project/Dataset/Test"
X_test_val, y_test_val = load_data(test_path)
y_test_pred = classifier.predict(X_test_val)
test_accuracy = accuracy_score(y_test_val, y_test_pred)
print("Test Accuracy:", test_accuracy)


Loading data for class airfield with 1000 images.
100=>200=>300=>400=>500=>600=>700=>800=>900=>1000=>
Loading data for class bus stand with 1000 images.
100=>200=>300=>400=>500=>600=>700=>800=>900=>1000=>
Loading data for class canyon with 1000 images.
100=>200=>300=>400=>500=>600=>700=>800=>900=>1000=>
Loading data for class market with 1000 images.
100=>200=>300=>400=>500=>600=>700=>800=>900=>1000=>
Loading data for class temple with 1000 images.
100=>200=>300=>400=>500=>600=>700=>800=>900=>1000=>Total images loaded: 5000
Test Accuracy: 0.4258
