In [57]:
import os
import tensorflow as tf
import cv2
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.decomposition import TruncatedSVD
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cluster import DBSCAN
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, silhouette_score

for dimension reduction look into LDA for class separability and SVD for sparse data
SVD may be better for image data for its versatility and noise reduction

In [62]:
class ImageLoader(BaseEstimator, TransformerMixin):
    def __init__(self, data_directory, image_size=(128, 128)):
        self.data_directory = data_directory
        self.image_size = image_size
        
    def fit(self, X=None, y=None):
        return self
        
    def transform(self, X=None, y=None):
        images = []
        labels = []
        # get image labels from the folder names
        class_labels = os.listdir(self.data_directory)
        class_labels.sort()
        # loop through directory
        for label_idx, class_name in enumerate(class_labels):
            class_dir = os.path.join(self.data_directory, class_name)
            # wrong directory
            if not os.path.isdir(class_dir): 
                continue
            # loop through image files
            for file_name in os.listdir(class_dir):
                file_path = os.path.join(class_dir, file_name)
                # check for valid image to load
                if file_name.endswith(('.jpg', '.jpeg', '.png')):
                    # load with opencv
                    img = cv2.imread(file_path)
                    # preprocessing
                    # resize
                    img_resized = cv2.resize(img, self.image_size)
                    # grayscale
                    img_gray = cv2.cvtColor(img_resized, cv2.COLOR_BGR2GRAY)
                    # normalization
                    img_gray_norm = img_gray / 255.0
                    
                    # append image with label
                    images.append(img_gray_norm)
                    labels.append(label_idx)
                    
        # convert to numpy
        images = np.array(images)
        labels = np.array(labels)
        return images, labels


In [63]:
class SIFTFeatureExtractor(BaseEstimator, TransformerMixin):
    def __init__(self, n_components=100):
        # use opencv SIFT
        self.sift = cv2.SIFT_create()
        self.svd = TruncatedSVD(n_components=n_components)

    def fit(self, X, y=None):
        sift_features = []
        for img in X:
            # uint8 images
            img_uint8 = (img * 255).astype(np.uint8)
            kp, des = self.sift.detectAndCompute(img_uint8, None)
            sift_features.append(des.flatten() if des is not None else np.zeros(128))
        sift_features = np.array([f for f in sift_features if f is not None])
        # SVD dimension reduction
        self.svd.fit(sift_features)
        return self

    def transform(self, X):
        sift_features = []
        for img in X:
            # SIFT takes in uint8 format images
            img_uint8 = (img * 255).astype(np.uint8)
            kp, des = self.sift.detectAndCompute(img_uint8, None)
            # in case no features found
            sift_features.append(des.flatten() if des is not None else np.zeros(128))
        sift_features = np.array([f for f in sift_features if f is not None])
        return np.array(sift_features)

In [64]:
class FourierFeatureExtractor(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self

    def transform(self, X):
        fft_features = []
        for img in X:
            # fft
            f_transform = np.fft.fft2(img)
            # shift zero frequency to center
            f_shift = np.fft.fftshift(f_transform)
            magnitude_spectrum = 20 * np.log(np.abs(f_shift))
            fft_features.append(magnitude_spectrum.flatten())
        return np.array(fft_features)

In [65]:
# set variable
data_directory = 'data/BigCats'
RANDOM_SEED = 192
# kNN neighbors
number_neighbor = 5
# SVD components
n_components = 100

In [66]:
decision_tree = DecisionTreeClassifier(random_state=RANDOM_SEED)
naive_bayes = GaussianNB()
knn = KNeighborsClassifier(n_neighbors=number_neighbor)
dbscan = DBSCAN(eps=0.5, min_samples=5)

In [67]:
# define the pipelines for different classification methods
sift_decision_tree_pipeline = Pipeline([
    ('sift', SIFTFeatureExtractor(n_components=n_components)),
    ('classifier', DecisionTreeClassifier(random_state=RANDOM_SEED))
])
sift_naive_bayes_pipeline = Pipeline([
    ('sift', SIFTFeatureExtractor(n_components=n_components)),
    ('classifier', GaussianNB())
])
sift_knn_pipeline = Pipeline([
    ('sift', SIFTFeatureExtractor(n_components=n_components)),
    ('classifier', KNeighborsClassifier(n_neighbors=number_neighbor))
])
fourier_decision_tree_pipeline = Pipeline([
    ('fourier', FourierFeatureExtractor()),
    ('classifier', DecisionTreeClassifier(random_state=RANDOM_SEED))
])

fourier_naive_bayes_pipeline = Pipeline([
    ('fourier', FourierFeatureExtractor()),
    ('classifier', GaussianNB())
])

fourier_knn_pipeline = Pipeline([
    ('fourier', FourierFeatureExtractor()),
    ('classifier', KNeighborsClassifier(n_neighbors=number_neighbor))
])


In [68]:
# data load and split
image_loader = ImageLoader(data_directory=data_directory, image_size=(128, 128))
images, labels = image_loader.fit_transform(None)

X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=RANDOM_SEED)


In [69]:
print(X_train.shape)
print(y_train.shape)

(136, 128, 128)
(136,)


In [70]:
# example call so far
# still need parameter search
# maybe dimension reduction
# or other things
sift_decision_tree_pipeline.fit(X_train, y_train)
y_pred = sift_decision_tree_pipeline.predict(X_test)
print("SIFT + Decision Tree: Accuracy =", accuracy_score(y_test, y_pred), "F1-Score =", f1_score(y_test, y_pred, average='weighted'))


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (136,) + inhomogeneous part.