In [71]:
import numpy as np
import matplotlib.pyplot as plt
import cv2 as cv
from skimage.feature import hog
from skimage.transform import resize
from skimage.io import imread, imshow, imsave

In [70]:
import zipfile
import glob

file = glob.glob('./clothing-dataset-small-master.zip')

with zipfile.ZipFile(file[0], 'r') as zip_ref:
    zip_ref.extractall('data/')

In [75]:
from PIL import Image
import os

data_types = ["train", "test", "validation"]

for data_type in data_types:
    folder_path = "./data/clothing-dataset-small-master/" + data_type
    new_folder_path = "./data/clothing-dataset-resized/" + data_type

    for folder in os.listdir(folder_path):
        new_path = os.path.join(new_folder_path, folder)
        if not os.path.exists(new_path):
            os.makedirs(new_path)

        for filename in os.listdir(os.path.join(folder_path, folder)):
            # Open the image
            if filename.endswith(".jpg") or filename.endswith(".png"):
                image_path = os.path.join(folder_path, folder, filename)
                img = cv.imread(image_path,cv.IMREAD_GRAYSCALE)
                resized_img  = cv.resize(img, (64, 128),interpolation =cv.INTER_LINEAR)
                imshow(resized_img )
                fd, hog_image = hog(resized_img , orientations=9, pixels_per_cell=(8, 8), 
                    cells_per_block=(2, 2), visualize=True)
                # Display original and HOG images
#                 hog_img = Image.fromarray(hog_image.astype(np.uint8))
#                 imshow(hog_img)

                new_image_path = os.path.join(new_folder_path, folder, filename)
#                 imsave(new_image_path, hog)
                cv.imwrite(new_image_path, hog_image)

In [89]:
X_train = []
y_train = []
train_folder = "./data/clothing-dataset-resized/train"
for folder in os.listdir(train_folder):
    for filename in os.listdir(os.path.join(train_folder, folder)):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            image_path = os.path.join(train_folder, folder, filename)
            img = Image.open(image_path)
            array = np.array(img).ravel()
            X_train.append(array)
            y_train.append(folder)
            
X_test = []
y_test = []
test_folder = "./data/clothing-dataset-resized/test"
for folder in os.listdir(test_folder):
    for filename in os.listdir(os.path.join(test_folder, folder)):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            image_path = os.path.join(test_folder, folder, filename)
            img = Image.open(image_path)
            array = np.array(img).ravel()
            X_test.append(array)
            y_test.append(folder)
            
X_val = []
y_val = []
val_folder = "./data/clothing-dataset-resized/validation"
for folder in os.listdir(val_folder):
    for filename in os.listdir(os.path.join(val_folder, folder)):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            image_path = os.path.join(val_folder, folder, filename)
            img = Image.open(image_path)
            array = np.array(img).ravel()
            X_val.append(array)
            y_val.append(folder)

In [93]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

max_accuracy = 0
k = 2
for i in range(2, 10):
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    if accuracy > max_accuracy:
        max_accuracy = accuracy
        k = i

print("Best k: " + str(k))
print("Best accuracy: " + str(max_accuracy))

knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_val)
print("Validation accuracy: " + str(accuracy_score(y_val, y_pred)))

Best k: 9
Best accuracy: 0.4596774193548387
Validation accuracy: 0.5659824046920822


In [94]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

max_accuracy = 0
max_depth = 2
for i in range(2, 10):
    dt = DecisionTreeClassifier(max_depth=i, max_features=2048)
    dt.fit(X_train, y_train)
    y_pred = dt.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    if accuracy > max_accuracy:
        max_accuracy = accuracy
        max_depth = i

print("Best depth: " + str(max_depth))
print("Best accuracy: " + str(max_accuracy))

dt = DecisionTreeClassifier(max_depth=max_depth, max_features=1024)
dt.fit(X_train, y_train)
y_pred = dt.predict(X_val)
print("Validation accuracy: " + str(accuracy_score(y_val, y_pred)))

Best depth: 6
Best accuracy: 0.34946236559139787
Validation accuracy: 0.39589442815249265
