In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
import zipfile
import glob

file = glob.glob('./clothing-dataset-small-master.zip')

with zipfile.ZipFile(file[0], 'r') as zip_ref:
    zip_ref.extractall('data/')

In [15]:
for data_type in ["train", "validation", "test"]:
    folder_path = "./data/clothing-dataset-small-master/" + data_type
    
    # combine validation and train
    new_folder_path = "./data/clothing-dataset/" + ("test" if data_type == "test" else "train")

    for folder in os.listdir(folder_path):
        new_path = os.path.join(new_folder_path, folder)
        if not os.path.exists(new_path):
            os.makedirs(new_path)

        for filename in os.listdir(os.path.join(folder_path, folder)):
            if filename.endswith(".jpg") or filename.endswith(".png"):
                image_path = os.path.join(folder_path, folder, filename)
                img = Image.open(image_path)
                new_image_path = os.path.join(new_folder_path, folder, filename)
                img.save(new_image_path)
                                              
                # flip all images besides t-shirts to balance the data
                if data_type != "test" and folder != "t-shirt":
                    img = img.transpose(Image.FLIP_LEFT_RIGHT)
                    new_image_path = os.path.join(new_folder_path, folder, "flipped_" + filename)
                    img.save(new_image_path)

In [None]:
from PIL import Image
import os

# Feature: Grayscale
X_train = []
y_train = []
X_test = []
y_test = []

for data_type in ["train", "test"]:
    folder_path = "./data/clothing-dataset/" + data_type

    for folder in os.listdir(folder_path):
        for filename in os.listdir(os.path.join(folder_path, folder)):
            # Open the image
            if filename.endswith(".jpg") or filename.endswith(".png"):
                image_path = os.path.join(folder_path, folder, filename)
                img = Image.open(image_path)
                # resize the image
                img = img.resize((64, 64))
                # convert to grayscale
                img = img.convert('L')
                # flatten to 1D array
                array = np.array(img).ravel()
                
                if data_type == "test":
                    X_test.append(array)
                    y_test.append(folder)
                else:
                    X_train.append(array)
                    y_train.append(folder)

In [10]:
from skimage.io import imread, imshow
from skimage import transform
from skimage.filters import prewitt
import os

def preprocess_image_edge_only(image):
    resized_image = transform.resize(image, (64, 64), anti_aliasing=True)
    edges_prewitt = prewitt(resized_image)
    edges_prewitt_array = edges_prewitt.reshape(1, 64 * 64)
    return edges_prewitt_array[0]

def preprocess_image(image):
    resized_image = transform.resize(image, (64, 64), anti_aliasing=True)
    edges_prewitt = prewitt(resized_image)
    edges_prewitt_array = edges_prewitt.reshape(1, 64 * 64)
    image_array = resized_image.reshape(1, 64 * 64)
    return np.concatenate((edges_prewitt_array[0], image_array[0]))

In [11]:
# Feature: Edges
X_train = []
y_train = []
X_test = []
y_test = []

data_types = ["train", "test"]

image = None
for data_type in data_types:
    folder_path = "./data/clothing-dataset/" + data_type

    for folder in os.listdir(folder_path):
        for filename in os.listdir(os.path.join(folder_path, folder)):
            # Open the image
            if filename.endswith(".jpg") or filename.endswith(".png"):
                image_path = os.path.join(folder_path, folder, filename)
                image = imread(image_path,as_gray=True)
                result = preprocess_image_edge_only(image)
                if data_type == "test":
                    X_test.append(result)
                    y_test.append(folder)
                else:
                    X_train.append(result)
                    y_train.append(folder)

In [10]:
# Feature: Grayscale + Edges
X_train = []
y_train = []
X_test = []
y_test = []

data_types = ["train", "test"]

image = None
count = 0
for data_type in data_types:
    folder_path = "./data/clothing-dataset/" + data_type

    for folder in os.listdir(folder_path):
        for filename in os.listdir(os.path.join(folder_path, folder)):
            # Open the image
            if filename.endswith(".jpg") or filename.endswith(".png"):
                image_path = os.path.join(folder_path, folder, filename)
                image = imread(image_path,as_gray=True)
                result = preprocess_image(image)
                if data_type == "test":
                    X_test.append(result)
                    y_test.append(folder)
                else:
                    X_train.append(result)
                    y_train.append(folder)

In [12]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

max_accuracy = 0
k = 2
for i in [3,5,10,15,25,49]:
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(accuracy)
    if accuracy > max_accuracy:
        max_accuracy = accuracy
        k = i

print("Best k: " + str(k))
print("Best accuracy: " + str(max_accuracy))

0.3279569892473118
0.3279569892473118
0.34139784946236557
0.3682795698924731
0.3225806451612903
0.3064516129032258
Best k: 15
Best accuracy: 0.3682795698924731
