# CHAPTER 2.3

### Creating a multi-label classifier to label watches

we'll implement a CNN to classify the gender and style/usage of watches.


In [1]:
import os
import pathlib
from csv import DictReader

import glob
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import *

In [2]:
SEED = 999
np.random.seed(SEED)

base_path = (pathlib.Path.home() / '.keras' / 'datasets' /
             'fashion-product-images-small')
styles_path = str(base_path / 'styles.csv')
images_path_pattern = str(base_path / 'images/*.jpg')
image_paths = glob.glob(images_path_pattern)

In [3]:
styles_path

'C:\\Users\\Zeki\\.keras\\datasets\\fashion-product-images-small\\styles.csv'

In [4]:
image_paths

['C:\\Users\\Zeki\\.keras\\datasets\\fashion-product-images-small\\images\\10000.jpg',
 'C:\\Users\\Zeki\\.keras\\datasets\\fashion-product-images-small\\images\\10001.jpg',
 'C:\\Users\\Zeki\\.keras\\datasets\\fashion-product-images-small\\images\\10002.jpg',
 'C:\\Users\\Zeki\\.keras\\datasets\\fashion-product-images-small\\images\\10003.jpg',
 'C:\\Users\\Zeki\\.keras\\datasets\\fashion-product-images-small\\images\\10004.jpg',
 'C:\\Users\\Zeki\\.keras\\datasets\\fashion-product-images-small\\images\\10005.jpg',
 'C:\\Users\\Zeki\\.keras\\datasets\\fashion-product-images-small\\images\\10006.jpg',
 'C:\\Users\\Zeki\\.keras\\datasets\\fashion-product-images-small\\images\\10007.jpg',
 'C:\\Users\\Zeki\\.keras\\datasets\\fashion-product-images-small\\images\\10008.jpg',
 'C:\\Users\\Zeki\\.keras\\datasets\\fashion-product-images-small\\images\\10009.jpg',
 'C:\\Users\\Zeki\\.keras\\datasets\\fashion-product-images-small\\images\\10010.jpg',
 'C:\\Users\\Zeki\\.keras\\datasets\\fashio

In [24]:
len(image_paths)

44441

In [5]:
with open(styles_path, 'r') as f:
    dict_reader = DictReader(f)
    STYLES = [*dict_reader]

In [6]:
STYLES

[{'id': '15970',
  'gender': 'Men',
  'masterCategory': 'Apparel',
  'subCategory': 'Topwear',
  'articleType': 'Shirts',
  'baseColour': 'Navy Blue',
  'season': 'Fall',
  'year': '2011',
  'usage': 'Casual',
  'productDisplayName': 'Turtle Check Men Navy Blue Shirt'},
 {'id': '39386',
  'gender': 'Men',
  'masterCategory': 'Apparel',
  'subCategory': 'Bottomwear',
  'articleType': 'Jeans',
  'baseColour': 'Blue',
  'season': 'Summer',
  'year': '2012',
  'usage': 'Casual',
  'productDisplayName': 'Peter England Men Party Blue Jeans'},
 {'id': '59263',
  'gender': 'Women',
  'masterCategory': 'Accessories',
  'subCategory': 'Watches',
  'articleType': 'Watches',
  'baseColour': 'Silver',
  'season': 'Winter',
  'year': '2016',
  'usage': 'Casual',
  'productDisplayName': 'Titan Women Silver Watch'},
 {'id': '21379',
  'gender': 'Men',
  'masterCategory': 'Apparel',
  'subCategory': 'Bottomwear',
  'articleType': 'Track Pants',
  'baseColour': 'Black',
  'season': 'Fall',
  'year': '20

In [10]:
type(STYLES)

list

In [11]:
len(STYLES)

44446

In [17]:
article_type = 'Watches'
genders = {'Men', 'Women'}
usages = {'Casual', 'Smart Casual', 'Formal'}

In [18]:
STYLES = {style['id']: style
              for style in STYLES
              if (style['articleType'] == article_type and
                  style['gender'] in genders and
                  style['usage'] in usages)}

In [19]:
STYLES

{'59263': {'id': '59263',
  'gender': 'Women',
  'masterCategory': 'Accessories',
  'subCategory': 'Watches',
  'articleType': 'Watches',
  'baseColour': 'Silver',
  'season': 'Winter',
  'year': '2016',
  'usage': 'Casual',
  'productDisplayName': 'Titan Women Silver Watch'},
 '30039': {'id': '30039',
  'gender': 'Men',
  'masterCategory': 'Accessories',
  'subCategory': 'Watches',
  'articleType': 'Watches',
  'baseColour': 'Black',
  'season': 'Winter',
  'year': '2016',
  'usage': 'Casual',
  'productDisplayName': 'Skagen Men Black Watch'},
 '29928': {'id': '29928',
  'gender': 'Men',
  'masterCategory': 'Accessories',
  'subCategory': 'Watches',
  'articleType': 'Watches',
  'baseColour': 'Black',
  'season': 'Winter',
  'year': '2016',
  'usage': 'Casual',
  'productDisplayName': 'Police Men Black Dial Watch PL12889JVSB'},
 '17429': {'id': '17429',
  'gender': 'Men',
  'masterCategory': 'Accessories',
  'subCategory': 'Watches',
  'articleType': 'Watches',
  'baseColour': 'Black'

In [20]:
type(STYLES)

dict

In [21]:
len(STYLES)

2355

In [23]:
STYLES.keys()

dict_keys(['59263', '30039', '29928', '17429', '51658', '23278', '44984', '11188', '45258', '44970', '8110', '43190', '32335', '56670', '36795', '40527', '45869', '26351', '29917', '43164', '45293', '36761', '37481', '8117', '39375', '59290', '52678', '39711', '36759', '44977', '29745', '44983', '59264', '43163', '36766', '45294', '30001', '29910', '40520', '37472', '51660', '43197', '56677', '32332', '36792', '45260', '46418', '5068', '45893', '36905', '47992', '17418', '29919', '59252', '45269', '40529', '52671', '23282', '46427', '43155', '23276', '51656', '40516', '32304', '45256', '21713', '29926', '30037', '17427', '53565', '43199', '59255', '38050', '47995', '36902', '37488', '10098', '36768', '17420', '10605', '52682', '29921', '32303', '45251', '23271', '51651', '40511', '36757', '46420', '43152', '23285', '44979', '58942', '30064', '26333', '45037', '10235', '40545', '46480', '45205', '32357', '46474', '25314', '30090', '32368', '45008', '38004', '41038', '28434', '30097', '4

In [25]:
image_paths = [*filter(lambda p: p.split(os.path.sep)[-1][:-4]
                                 in STYLES.keys(),
                       image_paths)]

In [26]:
len(image_paths)

2355

IMPORTANT NOTE : The images folder contains lots of different type of images. Until this point, We find images which contains the watches from csv file according to their id and prepare our imagepaths with these images.

 Now try to explain how we will label datasets. We will made this to all images in our path. We take id of image and search it in STYLES and take the labels from that dictionary

In [27]:
image_paths[0]

'C:\\Users\\Zeki\\.keras\\datasets\\fashion-product-images-small\\images\\10098.jpg'

In [28]:
example_img_path= 'C:\\Users\\Zeki\\.keras\\datasets\\fashion-product-images-small\\images\\10098.jpg'

In [38]:
img = load_img(example_img_path, target_size=(64,64))
img = img_to_array(img)

In [39]:
img.shape

(64, 64, 3)

In [30]:
image_id = example_img_path.split(os.path.sep)[-1][:-4]

In [31]:
image_id

'10098'

In [33]:
image_style = STYLES[image_id]

In [34]:
image_style

{'id': '10098',
 'gender': 'Men',
 'masterCategory': 'Accessories',
 'subCategory': 'Watches',
 'articleType': 'Watches',
 'baseColour': 'White',
 'season': 'Winter',
 'year': '2016',
 'usage': 'Casual',
 'productDisplayName': 'Fastrack Men Analogue Plastic White Watch'}

In [35]:
label = (image_style['gender'], image_style['usage'])

In [36]:
label

('Men', 'Casual')

In [37]:
def load_images_and_labels(image_paths, styles, target_size):
    images = []
    labels = []

    for image_path in image_paths:
        image = load_img(image_path, target_size=target_size)
        image = img_to_array(image)
        image_id = image_path.split(os.path.sep)[-1][:-4]

        image_style = styles[image_id]
        label = (image_style['gender'], image_style['usage'])

        images.append(image)
        labels.append(label)

    return np.array(images), np.array(labels)

In [40]:
X, y = load_images_and_labels(image_paths, STYLES, (64, 64))
X = X.astype('float') / 255.0


In [41]:
print(y)

[['Men' 'Casual']
 ['Men' 'Casual']
 ['Women' 'Casual']
 ...
 ['Women' 'Casual']
 ['Men' 'Casual']
 ['Men' 'Casual']]


In [42]:
print(X.shape)
print(y.shape)

(2355, 64, 64, 3)
(2355, 2)


In [43]:
mlb = MultiLabelBinarizer()
y = mlb.fit_transform(y)
print(y)

[[1 0 1 0 0]
 [1 0 1 0 0]
 [1 0 0 0 1]
 ...
 [1 0 0 0 1]
 [1 0 1 0 0]
 [1 0 1 0 0]]


In [44]:
print(y.shape)

(2355, 5)


In [47]:
print(len(mlb.classes_))

5


In [45]:
(X_train, X_test,
 y_train, y_test) = train_test_split(X, y,
                                     stratify=y,
                                     test_size=0.2,
                                     random_state=SEED)
(X_train, X_valid,
 y_train, y_valid) = train_test_split(X_train, y_train,
                                      stratify=y_train,
                                      test_size=0.2,
                                      random_state=SEED)

In [46]:
def build_network(width, height, depth, classes):
    input_layer = Input(shape=(width, height, depth))

    x = Conv2D(filters=32,
               kernel_size=(3, 3),
               padding='same')(input_layer)
    x = ReLU()(x)
    x = BatchNormalization(axis=-1)(x)
    x = Conv2D(filters=32,
               kernel_size=(3, 3),
               padding='same')(x)
    x = ReLU()(x)
    x = BatchNormalization(axis=-1)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(rate=0.25)(x)

    x = Conv2D(filters=64,
               kernel_size=(3, 3),
               padding='same')(x)
    x = ReLU()(x)
    x = BatchNormalization(axis=-1)(x)
    x = Conv2D(filters=64,
               kernel_size=(3, 3),
               padding='same')(x)
    x = ReLU()(x)
    x = BatchNormalization(axis=-1)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(rate=0.25)(x)

    x = Flatten()(x)
    x = Dense(units=512)(x)
    x = ReLU()(x)
    x = BatchNormalization(axis=-1)(x)
    x = Dropout(rate=0.25)(x)

    x = Dense(units=classes)(x)
    output = Activation('sigmoid')(x)

    return Model(input_layer, output)

In [48]:
model = build_network(width=64,height=64,depth=3,classes=len(mlb.classes_))


In [49]:
model.compile(loss='binary_crossentropy',optimizer='rmsprop',metrics=['accuracy'])

In [50]:
BATCH_SIZE = 64
EPOCHS = 20
model.fit(X_train, y_train,
          validation_data=(X_valid, y_valid),
          batch_size=BATCH_SIZE,
          epochs=EPOCHS)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x244c344d940>

In [51]:
model.save('watches_multilabel.hdf5')

In [52]:
result = model.evaluate(X_test, y_test, batch_size=BATCH_SIZE)
print(f'Test accuracy: {result[1]}')

Test accuracy: 0.9808917045593262


LETS LOOK AT AN EXAMPLE AND UNDERSTAND OUR PREDICTION RESULTS

In [54]:
X_test[0].shape

(64, 64, 3)

In [55]:
test_image = np.expand_dims(X_test[0], axis=0)

In [56]:
test_image.shape

(1, 64, 64, 3)

In [57]:
probabilities = model.predict(test_image)[0]

In [58]:
probabilities

array([9.9999416e-01, 4.1599713e-05, 5.6341656e-02, 1.2129077e-05,
       9.4986004e-01], dtype=float32)

In [59]:
for label, p in zip(mlb.classes_, probabilities):
    print(f'{label}: {p * 100:.2f}%')

Casual: 100.00%
Formal: 0.00%
Men: 5.63%
Smart Casual: 0.00%
Women: 94.99%


In [60]:
ground_truth_labels = np.expand_dims(y_test[0], axis=0)
ground_truth_labels = mlb.inverse_transform(ground_truth_labels)
print(f'Ground truth labels: {ground_truth_labels}')

Ground truth labels: [('Casual', 'Women')]


In [61]:
y_test[0]

array([1, 0, 0, 0, 1])

In [64]:
y_test[0].shape

(5,)

In [62]:
ground_truth_labels = np.expand_dims(y_test[0], axis=0)

In [63]:
ground_truth_labels

array([[1, 0, 0, 0, 1]])

In [65]:
ground_truth_labels.shape

(1, 5)

In [66]:
ground_truth_labels = mlb.inverse_transform(ground_truth_labels)

In [67]:
ground_truth_labels

[('Casual', 'Women')]