# Image classification using SVM

In [117]:
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
%matplotlib notebook
from sklearn import svm, metrics, datasets
from sklearn.utils import Bunch
from sklearn.model_selection import GridSearchCV, train_test_split

from skimage.io import imread
from skimage.transform import resize

# Load the data and preprocess the data

In [120]:
def load_image_files(container_path, dimension=(64, 64)):
    """
    Load image files with categories as subfolder names 
    which performs like scikit-learn sample dataset
    
    Parameters
    ----------
    container_path : string or unicode
        Path to the main folder holding one subfolder per category
    dimension : tuple
        size to which image are adjusted to
        
    Returns
    -------
    Bunch
    """
    image_dir = Path(container_path)
    folders = [directory for directory in image_dir.iterdir() if directory.is_dir()]
    categories = [fo.name for fo in folders]

    descr = "A image classification dataset"
    images = []
    flat_data = []
    target = []
    for i, direc in enumerate(folders):
        for file in direc.iterdir():
            img = imread(file)
            img_resized = resize(img, dimension, anti_aliasing=True, mode='reflect')
            flat_data.append(img_resized.flatten()) 
            images.append(img_resized)
            target.append(i)
    flat_data = np.array(flat_data)
    target = np.array(target)
    images = np.array(images)

    return Bunch(data=flat_data,
                 target=target,
                 target_names=categories,
                 images=images,
                 DESCR=descr)

In [121]:
image_dataset = load_image_files('C:/Users/haiva/Downloads/natural_images/')

# spilt the data set into train and test

In [122]:
X_train, X_test, y_train, y_test = train_test_split(
    image_dataset.data, image_dataset.target, test_size=0.3,random_state=109)

# using gridsearch cv for best parameters
apply SVC from SVM and fit the data

In [124]:
param_grid = [
  {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
  {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
 ]
svc = svm.SVC()
clf = GridSearchCV(svc, param_grid)
clf.fit(X_train, y_train)

GridSearchCV(estimator=SVC(),
             param_grid=[{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
                         {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001],
                          'kernel': ['rbf']}])

# predict the data
make predictions on test data

In [125]:
y_pred = clf.predict(X_test)

# print classification report

In [126]:
print("Classification report for - \n{}:\n{}\n".format(
    clf, metrics.classification_report(y_test, y_pred)))

Classification report for - 
GridSearchCV(estimator=SVC(),
             param_grid=[{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
                         {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001],
                          'kernel': ['rbf']}]):
              precision    recall  f1-score   support

           0       0.93      0.94      0.93       240
           1       0.87      0.92      0.89       285
           2       0.59      0.74      0.66       252
           3       0.57      0.48      0.52       225
           4       0.82      0.76      0.79       253
           5       0.98      1.00      0.99       292
           6       0.97      0.86      0.91       248
           7       0.99      0.97      0.98       275

    accuracy                           0.84      2070
   macro avg       0.84      0.83      0.83      2070
weighted avg       0.85      0.84      0.84      2070




# print accuracy of the model

In [127]:
from sklearn.metrics import accuracy_score
print('the accuracy obtained on the test set is:', accuracy_score(y_pred,y_test))

the accuracy obtained on the test set is: 0.842512077294686


# make a data frame for predictions

In [128]:
import pandas as pd
pred=pd.DataFrame(y_test,columns=["test"])
pred["predctions"]=y_pred

In [129]:
# Define a function to map the values 
def set_value(row_number, assigned_value): 
    return assigned_value[row_number] 

In [130]:
dictionary ={0 :"airplane" , 1 : "car", 2 : "cat",3:"dog",4:"flower",5:"fruit",6:"motorbike",7:"person"} 

In [131]:
pred['original'] = pred['test'].apply(set_value, args =(dictionary, )) 
pred['Predicated'] = pred['predctions'].apply(set_value, args =(dictionary, )) 

In [132]:
result=pred.iloc[:,[2,3]]

In [133]:
result[1:20]

Unnamed: 0,original,Predicated
1,fruit,fruit
2,cat,dog
3,fruit,fruit
4,motorbike,motorbike
5,person,person
6,dog,flower
7,airplane,airplane
8,fruit,fruit
9,dog,cat
10,car,car


# save the model

In [134]:
import joblib 
# Save the model as a pickle in a file 
joblib.dump(clf, 'saved_modell.pkl')

['saved_modell.pkl']