In [1]:
import tensorflow as tf
from tensorflow.keras.utils import plot_model
from tensorflow import keras
from tensorflow.keras.preprocessing import image
from tensorflow.keras import layers

from sklearn.preprocessing import LabelEncoder

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import itertools
import cv2

2022-07-19 10:04:23.330799: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-07-19 10:04:23.330824: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
from utils import remove_items, split_data, group_color

In [3]:
def clean_df(csv_path="data/styles.csv"):
      """
      function to fetch and clean the data

      input : csv_path, path to .csv file
      return: styles, dataframe
      """

      styles = pd.read_csv(csv_path, on_bad_lines='skip')

      # drop unnecessary columns which are not needed to make recommendation
      styles = styles.drop(["productDisplayName"], axis=1) 
      styles = styles.drop(["year"], axis=1) 
      styles = styles[(styles.masterCategory=='Apparel')|(styles.masterCategory=='Footwear')]
      styles = styles.drop(styles[styles["subCategory"]=="Innerwear"].index)
      styles = styles.dropna()

      styles = remove_items(styles,"subCategory", ["Apparel Set", "Dress", "Loungewear and Nightwear", "Saree", "Socks"])
      styles["subCategory"] = styles["subCategory"].transform(lambda x: "Footwear" if (x in ["Shoes", "Flip Flops", "Sandal"]) else x)
      styles = styles.drop(labels = [6695, 16194, 32309, 36381, 40000], axis=0) # drop incomplete rows

      # group colors to the color-wheel
      group_color(styles) 

      return styles

In [4]:
styles = clean_df()
styles["subCategory"].unique() # sanity check after cleaning df: three subcategories

array(['Topwear', 'Bottomwear', 'Footwear'], dtype=object)

In [5]:
le = LabelEncoder()
styles["subCategory"] = le.fit_transform(styles["subCategory"])

In [6]:
def create_array(df):
      """
      function to fetch dataset 

      input : dataframe
      return: dataset
      """

      train_imgs = np.zeros((len(df.id), 80, 60, 3))

      for i in range(len(df.id)):      
        ID = df.id.iloc[i]
        img_path = f"data/images/{ID}.jpg"   
        img = cv2.imread(img_path)

        if img.shape != (80, 60, 3):
          img = image.load_img(img_path, target_size=(80, 60, 3))

        train_imgs[i] = img

      data = tf.data.Dataset.from_tensor_slices(
      (
        {
          "images": train_imgs
        },
        
        {
          "subCategory": df[["subCategory"]]
        }
      )
      )

      return data

In [7]:
def build_model(width, height, num_classes, hparam):
        """
        function to build model for subCategories

        input : width, image width 
                height, image height
                num_classes, number of classes
        return: keras.Model
        """

        resnet50 = keras.applications.ResNet50(weights='imagenet', include_top=False, input_shape=(80, 60, 3))
        resnet50.trainable=False

        inputs = keras.Input(shape=(width,height,3), name="images")

        x = layers.RandomFlip("horizontal")(inputs)
        x = layers.RandomRotation(factor=0.2)(x)
        x = layers.RandomZoom(height_factor=0.1, width_factor=0.1)(x)
        x = layers.RandomContrast(factor=0.2)(x)

        x = resnet50(x, training=False)
        x = layers.Conv2D(32, (2, 2), activation='relu')(x)
        x = layers.Flatten()(x)
        x = layers.Dense(1024, activation='relu', kernel_regularizer=keras.regularizers.l2(l=hparam[3]))(x)
        x = layers.Dense(512, activation="relu", kernel_regularizer=keras.regularizers.l2(l=hparam[3]))(x)
        x = layers.Dropout(hparam[1])(x)
        x = layers.Dense(256, activation='relu', kernel_regularizer=keras.regularizers.l2(l=hparam[3]))(x)
        x = layers.Dropout(hparam[1])(x)
        x = layers.Dense(128, activation='relu', kernel_regularizer=keras.regularizers.l2(l=hparam[3]))(x)
        x = layers.Dropout(hparam[1])(x)
        x = layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l2(l=hparam[3]))(x)
        x = layers.Dropout(hparam[1])(x)
        x = layers.Dense(len(num_classes))(x)
        x = layers.Activation('softmax', name='subCategory')(x)

        model = keras.Model(inputs=inputs, outputs=x)

        return model

In [8]:
batch_size = 32
sub_train, sub_val, sub_test = split_data(create_array(styles), batch_size=batch_size)

2022-07-19 10:04:42.546702: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2022-07-19 10:04:42.546786: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (vikram-HP-Pavilion-Laptop-14-bf1xx): /proc/driver/nvidia/version does not exist
2022-07-19 10:04:42.547665: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-07-19 10:04:42.550495: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 3140928000 exceeds 10% of free system memory.


In [9]:
epochs_list = [5, 10, 15]
dropout_list = [0.1, 0.25]
learning_rates = [1e-3, 1e-4]
regularization_list = [0.1, 0.25]

# function to get hyperparameter combinations
def get_hyperparameter_combinations(epochs, dropout, learning_rates, reg):
  hyperparameters_list = [epochs, dropout, learning_rates, reg]
  hyperparameters_comb = list(itertools.product(*hyperparameters_list))
  return hyperparameters_comb

In [10]:
def hyperparameter_tuning(hyperparameters_comb):
        best_acc = 0
        for i, hparam in enumerate(hyperparameters_comb):
                print('\nCOMBO', i, hparam)
                test_net = build_model(80, 60, num_classes=le.classes_, hparam=hparam)
                test_net.compile(optimizer='adam',
                                loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
                                metrics=['accuracy'])
                with tf.device("/gpu:0"):
                        test_stats = test_net.fit(sub_train, epochs=hparam[0], validation_data=sub_val)

                score = test_net.evaluate(sub_test)
                current_acc = score[1]
                print('Current accuracy for', i, 'th combination is:', current_acc)

                if current_acc > best_acc:
                        best_acc = current_acc
                        best_idx = i
                        best_net = test_net
                        best_stats = test_stats
        return best_idx, best_stats, best_net, best_acc

In [11]:
hyperparameters_comb = get_hyperparameter_combinations(epochs_list, dropout_list, learning_rates, regularization_list)
best_idx, best_stats, best_net, best_acc = hyperparameter_tuning(hyperparameters_comb)
print('Best Test score : ', best_acc)
print('Hidden size used : ', hyperparameters_comb[best_idx][1])
print('No of iterations required : ', hyperparameters_comb[best_idx][0])
print('Learning rate used : ', hyperparameters_comb[best_idx][2])
print('regularization strength : ', hyperparameters_comb[best_idx][3])


COMBO 0 (5, 0.1, 0.001, 0.001)


2022-07-19 10:04:52.815676: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 3140928000 exceeds 10% of free system memory.


Epoch 1/5

2022-07-19 10:07:19.094979: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 3140928000 exceeds 10% of free system memory.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


2022-07-19 10:21:21.662466: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 3140928000 exceeds 10% of free system memory.


Current accuracy for 0 th combination is: 0.9878676533699036

COMBO 1 (5, 0.1, 0.001, 0.1)
Epoch 1/5
Epoch 2/5

KeyboardInterrupt: 

In [None]:
best_net.summary()

In [None]:
best_net.evaluate(sub_test)

In [None]:
# batch_size = 32
# epochs = 15

# sub_train, sub_val, sub_test = split_data(create_array(styles), batch_size=batch_size)

# sub_model = build_model(80, 60, num_classes=le.classes_)
# sub_model.summary()

# sub_model.compile(optimizer='adam',
#                   loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
#                   metrics=['accuracy'])

In [None]:
# plot_model(sub_model, to_file='model_category.png')

# with tf.device("/gpu:0"):
#     best_stats = sub_model.fit(sub_train, epochs=epochs, validation_data=sub_val)

In [None]:
plt.plot(best_stats.history['loss'], label='train_loss')
plt.plot(best_stats.history['val_loss'], label='val_loss')
plt.title("Loss Curve")
plt.xlabel("Epochs"), plt.ylabel("Loss"), plt.legend(loc="best")
plt.tight_layout()
plt.show()

In [None]:
plt.plot(best_stats.history['accuracy'], label='train_accuracy')
plt.plot(best_stats.history['val_accuracy'], label='val_accuracy')
plt.title("Accuracy Curve")
plt.xlabel("Epoch"), plt.ylabel("Accuracy"), plt.legend(loc="best")
plt.tight_layout()
plt.show()

In [None]:
# sub_model.evaluate(sub_test)
# sub_model.save("/models/models/model_category")