In [54]:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
import math
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

Function for extracting a color histogram from a given image; used to extract features of (non)barnacle images:

In [55]:
def get_hist(image):
    
    # convert image to HSV
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    
    # get hist for each h s v channel
    hist_h = cv2.calcHist([hsv_image], [0], None, [32], [0, 256])
    hist_s = cv2.calcHist([hsv_image], [1], None, [32], [0, 256])
    hist_v = cv2.calcHist([hsv_image], [2], None, [32], [0, 256])
    
    # normalize + flatten for each channel
    hist_h = cv2.normalize(hist_h, hist_h).flatten()
    hist_s = cv2.normalize(hist_s, hist_s).flatten()
    hist_v = cv2.normalize(hist_v, hist_v).flatten()
    
    # concat into single feature vector
    feature = np.concatenate([hist_h, hist_s, hist_v])
    return feature

Function to load images from a directory w two sub-directories (in this case, one folder of barnacle objects and one folder of nonbarnacle objects) to extract features and labels from all images in each sub-directory.
The names of the sub-directories ('yes_barnacle' and 'no_barnacle') are used as the labels for the images.

In [65]:
def load_data(image_dir, sub1, sub2):
    
    features = []
    labels = []
    
    for label in [sub1, sub2]: # iterate through images in both sub-directories
        path = os.path.join(image_dir, label)
        for image_name in os.listdir(path):
            image_path = os.path.join(path, image_name)
            image = cv2.imread(image_path)
            if image is not None: # skip unreadable images
                features.append(get_hist(image)) # extract features via histogram extraction function + add to list of features
                labels.append(label) # extract label from name of the sub-directory name of the image
    
    return np.array(features), np.array(labels) # return array of features for all images and one of labels

Function that uses previously trained and fitted model to make a prediction on an image (whether or not the object pictured is a barnacle; should return either 'no_barnacle' or 'yes_barnacle'):

In [57]:
def predict(img, model):
    
    #extract features of image
    feature = get_hist(img)
    prediction = model.predict([feature])
    return prediction[0]

Function to display training images from a certain sub-directory, taken from a certain provided png image:

In [64]:
def show_images(folder, img_num):
    
    # get all image files from the directory
    image_files = [
        os.path.join(folder, f)
        for f in os.listdir(folder)
        if f.lower().startswith(img_num)
    ]

    # images per row to display
    images_per_row = 6

    # find number of rows needed
    n_images = len(image_files)
    n_rows = math.ceil(n_images / images_per_row)

    # set up the figure size
    plt.figure(figsize=(10, 5 * n_rows))

    # uterate over the images and plot them
    for idx, file_path in enumerate(image_files):
        # load image
        image = cv2.imread(file_path)
    
        # add image to a subplot
        plt.subplot(n_rows, images_per_row, idx + 1)
        plt.imshow(image_rgb)
        plt.title(os.path.basename(file_path), fontsize=10)
        plt.axis("off")  

    # try to make spacing better (keyword: try)
    plt.tight_layout()
    plt.show()

Function that evaluates the accuracy of the classification model on all the test images (images taken from unseen_img1.png): 

In [67]:
def evaluate_model(model, yes_b, no_b):
    correct_predictions = 0
    total_images = 0

    # evaluate for barnacle test images
    for img_file in os.listdir(yes_b):
        img_path = os.path.join(yes_b, img_file)
        
        # read image
        img = cv2.imread(img_path)
        if img is None:
            print(f"couldn't load: {img_path}")
            continue
        
        # extract features + predict
        features = get_hist(img)
        predicted_label = model.predict([features])[0]
        
        # add to total predictions if image is predicted as barnacle
        if predicted_label == 'yes_barnacle':
            correct_predictions += 1
        
        total_images += 1

    # evaluate for non-barnacle test images
    for img_file in os.listdir(no_b):
        img_path = os.path.join(no_b, img_file)
        
        # read image
        img = cv2.imread(img_path)
        if img is None:
            print(f"couldn't load: {img_path}")
            continue
        
        # extract features + predict
        features = get_hist(img)
        predicted_label = model.predict([features])[0]
        
        # add to total predictions if image is predicted as non-barnacle
        if predicted_label == 'no_barnacle':
            correct_predictions += 1
        
        total_images += 1 # add to total images predicted

    # calculate accuracy + print
    accuracy = (correct_predictions / total_images) * 100
    print(f"Accuracy: {accuracy:.2f}% ({correct_predictions}/{total_images})")
    return accuracy