In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
import zipfile
import glob

file = glob.glob('./clothing-dataset-small-master.zip')

with zipfile.ZipFile(file[0], 'r') as zip_ref:
    zip_ref.extractall('data/')

In [5]:
from PIL import Image
import os

# combine train and validation datasets + resize and grayscale images
for data_type in ["train", "test", "validation"]:
    folder_path = "./data/clothing-dataset-small-master/" + data_type
    
    new_data_type = data_type if data_type == "test" else "train"
    new_folder_path = "./data/clothing-dataset-processed/" + new_data_type

    for folder in os.listdir(folder_path):
        new_path = os.path.join(new_folder_path, folder)
        if not os.path.exists(new_path):
            os.makedirs(new_path)

        for filename in os.listdir(os.path.join(folder_path, folder)):
            # Open the image
            if filename.endswith(".jpg") or filename.endswith(".png"):
                image_path = os.path.join(folder_path, folder, filename)
                img = Image.open(image_path)
                img = img.resize((64, 64))
                img = img.convert('L')
                new_image_path = os.path.join(new_folder_path, folder, filename)
                img.save(new_image_path)
                
                # flip all images besides t-shirts to balance the data
                if data_type != "test" and folder != "t-shirt":
                    flipped_filename = "flipped_" + filename
                    flipped_image_path = os.path.join(new_folder_path, folder, flipped_filename)
                    flippedImg = img.transpose(Image.FLIP_LEFT_RIGHT)
                    flippedImg.save(flipped_image_path)

PermissionError: [Errno 13] Permission denied: './data/clothing-dataset-processed/train\\shorts\\3f7d16eb-7c7b-45fe-8d3b-ffb38124b52c.jpg'

In [8]:
X_train = []
y_train = []
train_folder = "./data/clothing-dataset-processed/train"
for folder in os.listdir(train_folder):
    for filename in os.listdir(os.path.join(train_folder, folder)):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            image_path = os.path.join(train_folder, folder, filename)
            img = Image.open(image_path)
            array = np.array(img).ravel()
            X_train.append(array)
            y_train.append(folder)
            
X_test = []
y_test = []
test_folder = "./data/clothing-dataset-processed/test"
for folder in os.listdir(test_folder):
    for filename in os.listdir(os.path.join(test_folder, folder)):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            image_path = os.path.join(test_folder, folder, filename)
            img = Image.open(image_path)
            array = np.array(img).ravel()
            X_test.append(array)
            y_test.append(folder)

In [None]:
# KNN using grayscale pixel values

from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

max_accuracy = 0
k = 2
for i in range(2, 100):
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    if accuracy > max_accuracy:
        max_accuracy = accuracy
        k = i

print("Best k: " + str(k))
print("Best accuracy: " + str(max_accuracy))

knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_val)
print("Validation accuracy: " + str(accuracy_score(y_val, y_pred)))

Best k: 49
Best accuracy: 0.3602150537634409
Validation accuracy: 0.41642228739002934


In [None]:
# DT using grayscale pixel values

from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

max_accuracy = 0
max_depth = 2
for i in range(2, 100):
    dt = DecisionTreeClassifier(max_depth=i, max_features=2048)
    dt.fit(X_train, y_train)
    y_pred = dt.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    if accuracy > max_accuracy:
        max_accuracy = accuracy
        max_depth = i

print("Best depth: " + str(max_depth))
print("Best accuracy: " + str(max_accuracy))

dt = DecisionTreeClassifier(max_depth=max_depth, max_features=2048)
dt.fit(X_train, y_train)
y_pred = dt.predict(X_val)
print("Validation accuracy: " + str(accuracy_score(y_val, y_pred)))

Best depth: 45
Best accuracy: 0.3387096774193548
Validation accuracy: 0.34310850439882695


In [None]:
from skimage.io import imread, imshow
from skimage import transform
from skimage.filters import prewitt
import os

def preprocess_image_edge_only(image):
    resized_image = transform.resize(image, (256, 256), anti_aliasing=True)
    edges_prewitt = prewitt(resized_image)
    edges_prewitt_array = edges_prewitt.reshape(1, 256 * 256)[0]
    return edges_prewitt_array

def preprocess_image(image):
    resized_image = transform.resize(image, (256, 256), anti_aliasing=True)
    edges_prewitt = prewitt(resized_image)
    edges_prewitt_array = edges_prewitt.reshape(1, 256 * 256)
    image_array = resized_image.reshape(1, 256 * 256)
    return np.concatenate((edges_prewitt_array[0], image_array[0]))

In [2]:
X_train = []
y_train = []
X_test = []
y_test = []
X_val = []
y_val = []

data_types = ["train", "test", "validation"]

image = None
for data_type in data_types:
    folder_path = "./data/clothing-dataset-small-master/" + data_type

    for folder in os.listdir(folder_path):
        for filename in os.listdir(os.path.join(folder_path, folder)):
            # Open the image
            if filename.endswith(".jpg") or filename.endswith(".png"):
                image_path = os.path.join(folder_path, folder, filename)
                image = imread(image_path,as_gray=True)
                result = preprocess_image_edge_only(image)
                if data_type == "train":
                    X_train.append(result)
                    y_train.append(folder)
                elif data_type == "test":
                    X_test.append(result)
                    y_test.append(folder)
                elif data_type == "validation":
                    X_val.append(result)
                    y_val.append(folder)

In [3]:
# KNN using edge operators to extract shape

from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

max_accuracy = 0
k = 2
for i in range(2, 100):
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    if accuracy > max_accuracy:
        max_accuracy = accuracy
        k = i

print("Best k: " + str(k))
print("Best accuracy: " + str(max_accuracy))

knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_val)
print("Validation accuracy: " + str(accuracy_score(y_val, y_pred)))

Best k: 79
Best accuracy: 0.19086021505376344
Validation accuracy: 0.2961876832844575


In [6]:
# DT using edge operators to extract shape

from sklearn.model_selection import ParameterSampler
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier

max_accuracy = 0
depth = 5
for i in range(2, 100):
    dt = DecisionTreeClassifier(max_depth=i, max_features=2048)
    dt.fit(X_train, y_train)
    y_pred = dt.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    if accuracy > max_accuracy:
        max_accuracy = accuracy
        depth = i

print("Best maximum depth: " + str(depth))
print("Best accuracy: " + str(max_accuracy))

dt = DecisionTreeClassifier(max_depth=depth, max_features=2048)
dt.fit(X_train, y_train)
y_pred = dt.predict(X_val)
print("Validation accuracy: " + str(accuracy_score(y_val, y_pred)))

Best maximum depth: 70
Best accuracy: 0.22311827956989247
Validation accuracy: 0.22287390029325513


In [10]:
X_train = []
y_train = []
X_test = []
y_test = []
X_val = []
y_val = []

data_types = ["train", "test", "validation"]

image = None
count = 0
for data_type in data_types:
    folder_path = "./data/clothing-dataset-small-master/" + data_type

    for folder in os.listdir(folder_path):
        for filename in os.listdir(os.path.join(folder_path, folder)):
            # Open the image
            if filename.endswith(".jpg") or filename.endswith(".png"):
                image_path = os.path.join(folder_path, folder, filename)
                image = imread(image_path,as_gray=True)
                result = preprocess_image(image)
                if data_type == "train":
                    X_train.append(result)
                    y_train.append(folder)
                elif data_type == "test":
                    X_test.append(result)
                    y_test.append(folder)
                elif data_type == "validation":
                    X_val.append(result)
                    y_val.append(folder)

In [11]:
# KNN using both edge operators and grayscale pixel values

max_accuracy = 0
k = 2
for i in range(2, 100):
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    if accuracy > max_accuracy:
        max_accuracy = accuracy
        k = i

print("Best k: " + str(k))
print("Best accuracy: " + str(max_accuracy))

knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_val)
print("Validation accuracy: " + str(accuracy_score(y_val, y_pred)))

Best k: 64
Best accuracy: 0.3602150537634409
Validation accuracy: 0.40762463343108507


In [None]:
# DT using both edge operators and grayscale pixel values

from sklearn.model_selection import ParameterSampler
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier

max_accuracy = 0
depth = 5
for i in range(2, 100):
    dt = DecisionTreeClassifier(max_depth=i, max_features=2048)
    dt.fit(X_train, y_train)
    y_pred = dt.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    if accuracy > max_accuracy:
        max_accuracy = accuracy
        depth = i

print("Best maximum depth: " + str(depth))
print("Best accuracy: " + str(max_accuracy))

dt = DecisionTreeClassifier(max_depth=depth, max_features=2048)
dt.fit(X_train, y_train)
y_pred = dt.predict(X_val)
print("Validation accuracy: " + str(accuracy_score(y_val, y_pred)))

Best maximum depth: 15
Best accuracy: 0.3575268817204301
Validation accuracy: 0.2903225806451613
