In [1]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
# Import Warnings 
import warnings
# warnings.filterwarnings('ignore')
# import pandas as pd
# from sklearn.utils import shuffle
# from sklearn.model_selection import train_test_split

In [2]:
# SHOW ALL AVAILABLE CLASSES/LABELS
PATH = os.getcwd()
# Define data path(where data resides)
data_path = 'data'
data_dir_list = os.listdir(data_path)
# data_dir_list.remove('.ipynb_checkpoints')
print(data_dir_list)

['bike', 'cars', 'cats', 'dogs', 'flowers', 'horses', 'human']


In [3]:
# ANY CONSTANTS OR STANDARD PARAMETERS

# img_rows=128
# img_cols=128
# num_channel=1
# num_epoch=100

# Define the number of classes
num_classes = 7

# PCA Dimensions
pca_dims = 150

# Decision tree params
dt_max_depth = 7


In [4]:
# LOAD IMAGE DATA AND LABELS
img_data_list=[]
img_class_list = []
img_class_map = {}

i = 0
for dataset in data_dir_list:
  img_list=os.listdir(data_path+'/'+ dataset)
  print ('Loaded the images of dataset-'+'{}\n'.format(dataset))
  img_class_map[i] = dataset
  for img in img_list:
    input_img=cv2.imread(data_path + '/'+ dataset + '/'+ img )
    input_img=cv2.cvtColor(input_img, cv2.COLOR_BGR2GRAY)
    input_img_resize=cv2.resize(input_img,(128,128))
    img_data_list.append(input_img_resize)
    img_class_list.append(i)
  i += 1

img_data = np.array(img_data_list)
img_data = img_data.astype('float32')
img_data /= 255
print (img_data.shape)

Loaded the images of dataset-bike

Loaded the images of dataset-cars

Loaded the images of dataset-cats

Loaded the images of dataset-dogs

Loaded the images of dataset-flowers

Loaded the images of dataset-horses

Loaded the images of dataset-human

(1803, 128, 128)


In [5]:
# RESHAPE(FLATTEN) EACH IMAGE INTO A 1D ARRAY(FROM 128*128 TO 16384)
new_img_data = img_data.reshape(img_data.shape[0], (img_data.shape[1]*img_data.shape[2]))
new_img_data.shape

(1803, 16384)

In [6]:
# STANDARDIZE DATA
from sklearn.preprocessing import StandardScaler

new_img_data_std = StandardScaler().fit_transform(new_img_data)
new_img_data_std

array([[ 0.3595882 , -0.2872668 ,  0.10080811, ..., -0.27797526,
        -0.6658493 , -0.48141   ],
       [-0.6854825 , -0.69422996, -0.0481398 , ..., -0.37231088,
        -0.05459138,  0.17567816],
       [ 0.72604156,  0.6487485 ,  1.0080364 , ..., -0.18363966,
        -0.02324482,  0.09745339],
       ...,
       [ 1.4725205 ,  1.4898057 ,  1.4955022 , ..., -0.23080747,
        -0.71286917, -0.79430914],
       [-1.2962382 , -1.3318056 , -1.2938861 , ...,  0.05219936,
         0.13348798, -0.01206131],
       [ 1.0653502 ,  1.0692772 ,  1.0621992 , ...,  0.91694254,
         0.9171521 ,  0.9109912 ]], dtype=float32)

In [7]:
# APPLY PCA
from sklearn.decomposition import PCA
pca = PCA(n_components=pca_dims)
new_img_data_std_pca = pca.fit_transform(new_img_data_std)

In [8]:
# PCA VARIANCE RETAINED 
# print(pca.explained_variance_ratio_)
print(sum(pca.explained_variance_ratio_))

0.8432891964912415


In [9]:
from sklearn.model_selection import train_test_split

X = new_img_data_std_pca
y = img_class_list
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)

In [10]:
print(X_train.shape)
print(X_test.shape)
print(len(y_train))
print(len(y_test))

(1352, 150)
(451, 150)
1352
451


In [11]:
# DECISION TREE
from sklearn.tree import DecisionTreeClassifier
dtree_model = DecisionTreeClassifier(max_depth = dt_max_depth).fit(X_train, y_train)

In [12]:
# DECISION TREE CLASSIFICATION(TESTING)
dtree_proba_predictions = dtree_model.predict_proba(X_test)
dtree_predictions = dtree_model.predict(X_test)
#print(dtree_proba_predictions)
#print(dtree_predictions, y_test)
print("Accuracy: ",dtree_model.score(X_test, y_test))

Accuracy:  0.352549889135255


In [13]:
# Naive Bayes
from sklearn.naive_bayes import GaussianNB
bayes_model = GaussianNB()
bayes_model.fit(X_train,y_train)

In [14]:
bayes_proba_predictions = bayes_model.predict_proba(X_test)
bayes_predictions = bayes_model.predict(X_test)
#print(dtree_proba_predictions)
#print(dtree_predictions, y_test)
print("Accuracy: ",bayes_model.score(X_test, y_test))

Accuracy:  0.4124168514412417


In [15]:
# Support Vector Machine
from sklearn.svm import SVC
SVM_model = SVC()
SVM_model.fit(X_train,y_train)

In [16]:
# SVM_proba_predictions = SVM_model.predict_proba(X_test)
# SVM_predictions = SVM_model.predict(X_test)
#print(dtree_proba_predictions)
#print(dtree_predictions, y_test)
print("Accuracy: ",SVM_model.score(X_test, y_test))

Accuracy:  0.5277161862527716


In [17]:
# K-Nearest Neighbour
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=10)
knn.fit(X_train, y_train)

In [18]:
knn_proba_predictions = knn.predict_proba(X_test)
knn_predictions = knn.predict(X_test)
#print(dtree_proba_predictions)
#print(dtree_predictions, y_test)
print("Accuracy: ",knn.score(X_test, y_test))

Accuracy:  0.3946784922394678
