In [2]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import os
import cv2 as cv
from skimage.feature import hog
from skimage.transform import resize
from skimage.io import imread, imshow, imsave

In [2]:
import zipfile
import glob

file = glob.glob('./clothing-dataset-small-master.zip')

with zipfile.ZipFile(file[0], 'r') as zip_ref:
    zip_ref.extractall('data/')

In [3]:
for data_type in ["train", "validation", "test"]:
    folder_path = "./data/clothing-dataset-small-master/" + data_type
    
    # combine validation and train
    new_folder_path = "./data/clothing-dataset/" + ("test" if data_type == "test" else "train")

    for folder in os.listdir(folder_path):
        new_path = os.path.join(new_folder_path, folder)
        if not os.path.exists(new_path):
            os.makedirs(new_path)

        for filename in os.listdir(os.path.join(folder_path, folder)):
            if filename.endswith(".jpg") or filename.endswith(".png"):
                image_path = os.path.join(folder_path, folder, filename)
                img = Image.open(image_path)
                new_image_path = os.path.join(new_folder_path, folder, filename)
                img.save(new_image_path)
                                              
                # flip all images besides t-shirts to balance the data
                if folder != "t-shirt":
                    img = img.transpose(Image.FLIP_LEFT_RIGHT)
                    new_image_path = os.path.join(new_folder_path, folder, "flipped_" + filename)
                    img.save(new_image_path)

  img = img.transpose(Image.FLIP_LEFT_RIGHT)


In [17]:
# Generate HOG data from above data
data_types = ["train", "test"]

for data_type in data_types:
    folder_path = "./data/clothing-dataset/" + data_type
    new_folder_path = "./data/clothing-dataset-hog/" + data_type

    for folder in os.listdir(folder_path):
        new_path = os.path.join(new_folder_path, folder)
        if not os.path.exists(new_path):
            os.makedirs(new_path)

        for filename in os.listdir(os.path.join(folder_path, folder)):
            # Open the image
            if filename.endswith(".jpg") or filename.endswith(".png"):
                image_path = os.path.join(folder_path, folder, filename)
                img = cv.imread(image_path,cv.IMREAD_GRAYSCALE)
                resized_img  = cv.resize(img, (64, 128),interpolation =cv.INTER_LINEAR)
                fd, hog_image = hog(resized_img , orientations=9, pixels_per_cell=(8, 8), 
                    cells_per_block=(2, 2), visualize=True)
                new_image_path = os.path.join(new_folder_path, folder, filename)
                cv.imwrite(new_image_path, hog_image)

In [3]:
# Generate SIFT data from above data
data_types = ["train", "test"]


for data_type in data_types:
    folder_path = "./data/clothing-dataset/" + data_type
    new_folder_path = "./data/clothing-dataset-sift/" + data_type

    for folder in os.listdir(folder_path):
        new_path = os.path.join(new_folder_path, folder)
        
        if not os.path.exists(new_path):
            os.makedirs(new_path)

        for filename in os.listdir(os.path.join(folder_path, folder)):
            # Open the image
            if filename.endswith(".jpg") or filename.endswith(".png"):
                image_path = os.path.join(folder_path, folder, filename)
                img = cv.imread(image_path, cv.IMREAD_GRAYSCALE)

                # Apply the SIFT algorithm
                sift = cv.SIFT_create()
                keypoints, descriptors = sift.detectAndCompute(img, None)

                # Draw the keypoints on the image
                img_with_keypoints = cv.drawKeypoints(img, keypoints, None, flags=cv.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)

                # Save the image with keypoints to the new folder
                new_image_path = os.path.join(new_folder_path, folder, filename)
                cv.imwrite(new_image_path, img_with_keypoints)

In [4]:
# Feature: Grayscale
X_train = []
y_train = []
X_test = []
y_test = []

for data_type in ["train", "test"]:
    folder_path = "./data/clothing-dataset/" + data_type

    for folder in os.listdir(folder_path):
        for filename in os.listdir(os.path.join(folder_path, folder)):
            # Open the image
            if filename.endswith(".jpg") or filename.endswith(".png"):
                image_path = os.path.join(folder_path, folder, filename)
                img = Image.open(image_path)
                # resize the image
                img = img.resize((64, 64))
                # convert to grayscale
                img = img.convert('L')
                # flatten to 1D array
                array = np.array(img).ravel()
                
                if data_type == "test":
                    X_test.append(array)
                    y_test.append(folder)
                else:
                    X_train.append(array)
                    y_train.append(folder)

In [7]:
from skimage.io import imread, imshow
from skimage import transform
from skimage.filters import prewitt

def preprocess_image_edge_only(image):
    resized_image = transform.resize(image, (64, 64), anti_aliasing=True)
    edges_prewitt = prewitt(resized_image)
    edges_prewitt_array = edges_prewitt.reshape(1, 64 * 64)
    return edges_prewitt_array[0]

def preprocess_image(image):
    resized_image = transform.resize(image, (64, 64), anti_aliasing=True)
    edges_prewitt = prewitt(resized_image)
    edges_prewitt_array = edges_prewitt.reshape(1, 64 * 64)
    image_array = resized_image.reshape(1, 64 * 64)
    return np.concatenate((edges_prewitt_array[0], image_array[0]))

In [8]:
# Feature: Edges
X_train = []
y_train = []
X_test = []
y_test = []

data_types = ["train", "test"]

image = None
for data_type in data_types:
    folder_path = "./data/clothing-dataset/" + data_type

    for folder in os.listdir(folder_path):
        for filename in os.listdir(os.path.join(folder_path, folder)):
            # Open the image
            if filename.endswith(".jpg") or filename.endswith(".png"):
                image_path = os.path.join(folder_path, folder, filename)
                image = imread(image_path,as_gray=True)
                result = preprocess_image_edge_only(image)
                if data_type == "test":
                    X_test.append(result)
                    y_test.append(folder)
                else:
                    X_train.append(result)
                    y_train.append(folder)

In [9]:
# Feature: Grayscale + Edges
X_train = []
y_train = []
X_test = []
y_test = []

data_types = ["train", "test"]

image = None
count = 0
for data_type in data_types:
    folder_path = "./data/clothing-dataset/" + data_type

    for folder in os.listdir(folder_path):
        for filename in os.listdir(os.path.join(folder_path, folder)):
            # Open the image
            if filename.endswith(".jpg") or filename.endswith(".png"):
                image_path = os.path.join(folder_path, folder, filename)
                image = imread(image_path,as_gray=True)
                result = preprocess_image(image)
                if data_type == "test":
                    X_test.append(result)
                    y_test.append(folder)
                else:
                    X_train.append(result)
                    y_train.append(folder)

In [6]:
# Feature: HOG
X_train = []
y_train = []
X_test = []
y_test = []

data_types = ["train", "test"]

image = None
count = 0
for data_type in data_types:
    folder_path = "./data/clothing-dataset-hog/" + data_type

    for folder in os.listdir(folder_path):
        for filename in os.listdir(os.path.join(folder_path, folder)):
            # Open the image
            if filename.endswith(".jpg") or filename.endswith(".png"):
                image_path = os.path.join(folder_path, folder, filename)
                image = imread(image_path)
                result = np.array(img).ravel()
                if data_type == "test":
                    X_test.append(result)
                    y_test.append(folder)
                else:
                    X_train.append(result)
                    y_train.append(folder)

In [None]:
# Feature: SIFT
X_train = []
y_train = []
X_test = []
y_test = []

data_types = ["train", "test"]

image = None
count = 0
for data_type in data_types:
    folder_path = "./data/clothing-dataset-sift/" + data_type

    for folder in os.listdir(folder_path):
        for filename in os.listdir(os.path.join(folder_path, folder)):
            # Open the image
            if filename.endswith(".jpg") or filename.endswith(".png"):
                image_path = os.path.join(folder_path, folder, filename)
                image = imread(image_path)
                result = np.array(img).ravel()
                if data_type == "test":
                    X_test.append(result)
                    y_test.append(folder)
                else:
                    X_train.append(result)
                    y_train.append(folder)

In [7]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score, make_scorer
from sklearn.model_selection import RandomizedSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

# define the number of folds for cross validation
NUM_FOLDS = 5

# define param search space for knn, dt, and rf
knn_param_grid = {'n_neighbors': list(range(2, 100))}
dt_param_grid = {'max_depth': list(range(2, 100))}
rf_param_grid = {
    # 'n_estimators': [10, 50, 100, 200],
    'max_depth': list(range(2, 100)),
    # 'min_samples_split': [2, 5, 10],
    # 'min_samples_leaf': [1, 2, 4],
    # 'max_features': ['auto', 'sqrt', 'log2']
}

# define models
knn = KNeighborsClassifier()
dt = DecisionTreeClassifier(max_features=512)
rf = RandomForestClassifier()

# define scoring metric
f1_scorer = make_scorer(f1_score, average='micro')

In [4]:
# KNN
knn_model = RandomizedSearchCV(knn, knn_param_grid, cv=NUM_FOLDS, scoring=f1_scorer, n_jobs=-1)
knn_model.fit(X_train, y_train)

print("KNN:")
print("Best k: ", knn_model.best_params_)
print("Best F1 score: ", knn_model.best_score_)

knn_final = KNeighborsClassifier(n_neighbors=int(knn_model.best_params_['n_neighbors']))
knn_final.fit(X_train, y_train)
y_pred = knn_final.predict(X_test)
print("Test F1 Score: ", f1_score(y_test, y_pred, average='micro'))

NameError: name 'RandomizedSearchCV' is not defined

In [None]:
# DT
dt_model = RandomizedSearchCV(dt, dt_param_grid, cv=NUM_FOLDS, scoring=f1_scorer, n_jobs=-1)
dt_model.fit(X_train, y_train)

print("DT:")
print("Best depth: ", dt_model.best_params_)
print("Best F1 score: ", dt_model.best_score_)

dt_final = DecisionTreeClassifier(max_depth=int(dt_model.best_params_['max_depth']), max_features=512)
dt_final.fit(X_train, y_train)
y_pred = dt_final.predict(X_test)
print("Test F1 Score: ", f1_score(y_test, y_pred, average='micro'))

In [None]:
# Random Forest (RF)
rf_model = RandomizedSearchCV(rf, rf_param_grid, cv=NUM_FOLDS, scoring=f1_scorer, n_jobs=-1)
rf_model.fit(X_train, y_train)

print("Random Forest:")
print("Best parameters: ", rf_model.best_params_)
print("Best F1 score: ", rf_model.best_score_)

rf_final = RandomForestClassifier(**rf_model.best_params_)
rf_final.fit(X_train, y_train)
y_pred_rf = rf_final.predict(X_test)
print("Test F1 Score: ", f1_score(y_test, y_pred_rf, average='micro'))

In [7]:
from sklearn import svm
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDClassifier

# SVM
svc = make_pipeline(StandardScaler(), SGDClassifier(max_iter=3000))
svc.fit(X_train, y_train)
y_pred = svc.predict(X_test)
print("Test F1 Score: ", f1_score(y_test, y_pred, average='micro'))

# svm_model = RandomizedSearchCV(svc, param_distributions=param_dist, cv=2, scoring=f1_scorer, n_jobs=-1, n_iter=2)
# svm_model.fit(X_train, y_train)

# print("SVM")
# print("Best params: ", svm_model.best_params_)
# print("Best F1 score: ", svm_model.best_score_)

Test F1 Score:  0.30346820809248554


