In [None]:
# https://github.com/whimian/SVM-Image-Classification/blob/master/Image%20Classification%20using%20scikit-learn.ipynb

from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np

from sklearn import svm, metrics, datasets
from sklearn.utils import Bunch
from sklearn.model_selection import GridSearchCV, train_test_split

from skimage.io import imread
from skimage.transform import resize

In [None]:
def load_image_files(container_path, dimension=(128, 128)):
    """
    Load image files with categories as subfolder names 
    which performs like scikit-learn sample dataset
    
    Parameters
    ----------
    container_path : string or unicode
        Path to the main folder holding one subfolder per category
    dimension : tuple
        size to which image are adjusted to
        
    Returns
    -------
    Bunch
    """
    image_dir = Path(container_path)
    folders = [directory for directory in image_dir.iterdir() if directory.is_dir()]
    categories = [fo.name for fo in folders]

    descr = "A image classification dataset"
    images = []         # matrix storing resized images
    flat_data = []      # flatten image information
    target = []         # label of folders
    for i, direc in enumerate(folders):
        for file in direc.iterdir():
            img = skimage.io.imread(file)
            img_resized = resize(img, dimension, anti_aliasing=True, mode='reflect')
            flat_data.append(img_resized.flatten()) 
            images.append(img_resized)
            target.append(i)
    flat_data = np.array(flat_data)
    target = np.array(target)
    images = np.array(images)

    return Bunch(data=flat_data,
                 target=target,
                 target_names=categories,
                 images=images,
                 DESCR=descr)

In [None]:
image_dataset = load_image_files("Prime_FULL/")

SVM Classifier from HW

In [None]:
import numpy as np
from numpy.linalg import norm

class SVM:

    def __init__(self, C = 1.0):
        # C = error term
        self.C = C
        self.w = 0
        self.b = 0
    ## question i
    def hingeloss(self, w, b, x, y):
        """Function computes the hinge loss"""
        """
        Parameters:
        -----------
        w: ndarray of shape (D,)
            1D numpy array representing the normal vector to the SVM hyperplane.

        b: ndarray of shape (D,)
            1D numpy array representing the vector that translates the SVM hyperplane.

        X: ndarray of shape (N,D)
            2D numpy array containing N training examples having D dimensions each.

        Y: ndarray of shape (N,)
            1D numpy array containing containing ground truth class information.

        Returns:
        --------
        y_pred: float
            float containing the hinge loss.

        """
        # hinge loss function / calculation
        # regularization term
        # partial derivative of L_hinge wrt w
        loss = 0.5*norm(w,2)*norm(w,2)+self.C*np.sum(np.maximum(np.zeros(x.shape[0]),np.ones(x.shape[0])-y*((w)@np.transpose(x)-b*np.ones(x.shape[0]))),axis=1)
        return float(loss)


    def fit(self, X, Y, batch_size=100, learning_rate=0.001, epochs=1000):
        # the number of features in X
        number_of_features = X.shape[1]

        # the number of Samples in X
        number_of_samples = X.shape[0]

        c = self.C

        # creating ids from 0 to number_of_samples - 1
        ids = np.arange(number_of_samples)

        # shuffling the samples randomly
        np.random.shuffle(ids)

        # creating an array of zeros
        w = np.zeros((1, number_of_features))
        b = 0
        losses = []

        # gradient descent algorithm
        for i in range(epochs):
            # calculating the Hinge Loss
            l = self.hingeloss(w, b, X, Y)
       
            # appending all losses
            losses.append(l)

            # starting from 0 to the number of samples with batch_size as interval
            for batch_initial in range(0, number_of_samples, batch_size):
                gradw = 0
                gradb = 0

                for j in range(batch_initial, batch_initial+ batch_size):
                    if j < number_of_samples:
                        x = ids[j]
                        ## question ii
                        # calculating the gradients

                        comp = Y[x]*(w @ X[x].T+b)
                        if(1-comp >0):
                          gradw +=  c *-Y[x]*X[x]
                          gradb += c *-Y[x]
                        else:
                          gradw += 0
                          gradb += 0

              ## question iii
              # updating weights and bias
                w = w - learning_rate * w - learning_rate*gradw
                b = b - learning_rate*gradb

        self.w = w
        self.b = b

        return self.w, self.b, losses

    def predict(self, X):
        ## question iv
        # prediction
        """Function predicts the output of the SVM"""
        """
        Parameters:
        -----------
        X: ndarray of shape (N,D)
            2D numpy array containing N testing examples having D dimensions each.

        Returns:
        --------
        svm_prediction: ndarray of shape (N,)
            The prediction of the SVM algorithm in the form of integers. (The 
            output should be an element of the set {1, -1}.)
        """
        svm_prediction = np.zeros([X.shape[0],1])
        for i in range(X.shape[0]):
          svm_prediction[i]= 1 if w@(X[i].reshape(-1,1))+b>0 else -1
        return svm_prediction



In [None]:
# split data
X_train, X_test, y_train, y_test = train_test_split(
    image_dataset.data, image_dataset.target, test_size=0.3,random_state=109)

In [None]:
param_grid = [
  {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
  {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
 ]
svc = svm.SVC()
clf = GridSearchCV(svc, param_grid)
clf.fit(X_train, y_train)

In [None]:

y_pred = clf.predict(X_test)

In [None]:

print("Classification report for - \n{}:\n{}\n".format(
    clf, metrics.classification_report(y_test, y_pred)))