In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing import image
from tensorflow.keras.utils import plot_model

from sklearn.preprocessing import LabelEncoder

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import cv2
import itertools

In [None]:
from utils import remove_items, split_data, group_color

In [None]:
def clean_df(x):
    """
    function to clean dataframe and keep selected columns

    input : x, columns to keep in dataframe
    output: styles, modified dataframe
    """

    styles = pd.read_csv("data/styles.csv", on_bad_lines='skip')

    # drop unnecessary columns which are not needed to make recommendation
    styles = styles.drop(["productDisplayName"], axis=1)
    styles = styles.drop(["year"], axis=1)
    styles = styles[(styles.masterCategory=='Apparel')| (styles.masterCategory=='Footwear')]
    styles = styles.drop(styles[styles["subCategory"]=="Innerwear"].index)
    styles = styles.dropna()

    styles = remove_items(styles,"subCategory", ["Apparel Set", "Dress", "Loungewear and Nightwear", "Saree", "Socks"])
    styles["subCategory"] = styles["subCategory"].transform(lambda x: "Footwear" if(x in ["Shoes", "Flip Flops", "Sandal"]) else x)
    styles = styles.drop(labels=[6695, 16194, 32309, 36381, 40000], axis=0) # drop incomplete rows
    styles = styles[styles.subCategory==x]

    # group colors to the color-wheel
    group_color(styles)
    styles.baseColour=styles.colorgroup
    
    return styles

In [None]:
topwear_df    = clean_df("Topwear")
bottomwear_df = clean_df("Bottomwear")
footwear_df   = clean_df("Footwear")

In [None]:
topwear_df.head()

In [None]:
bottomwear_df.head()

In [None]:
footwear_df.head()

In [None]:
def encode_labels(styles):
    """
    function to encode data 

    input : styles, dataframe to encode
    output: styles, (encoded) dataframe 
            articleTypeLB, genderLB, baseColourLB, seasonLB, usageLB: all labelEncoders
    """

    articleTypeLB = LabelEncoder()
    genderLB      = LabelEncoder()
    baseColourLB  = LabelEncoder()
    seasonLB      = LabelEncoder()
    usageLB       = LabelEncoder()

    styles['articleType'] = articleTypeLB.fit_transform(styles['articleType'])
    styles['gender']      = genderLB.fit_transform(styles['gender'])
    styles['baseColour']  = baseColourLB.fit_transform(styles['baseColour'])
    styles['season']      = seasonLB.fit_transform(styles['season'])
    styles['usage']       = usageLB.fit_transform(styles['usage'])

    return styles, articleTypeLB, genderLB, baseColourLB, seasonLB, usageLB

In [None]:
# convert text fields to one-hot encoded values
topwear_df, top_article, top_gender, top_color, top_season, top_usage = encode_labels(topwear_df)
bottomwear_df, bottom_article, bottom_gender, bottom_color, bottom_season, bottom_usage = encode_labels(bottomwear_df)
footwear_df, foot_article, foot_gender, foot_color, foot_season, foot_usage = encode_labels(footwear_df)

In [None]:
print(topwear_df['articleType'], top_article)

In [None]:
def create_array(df):
      """
      function to fetch dataset; to give as input to model
      
      input : dataframe
      return: data
      """

      train_imgs = np.zeros((len(df.id), 80, 60, 3))

      for i in range(len(df.id)):
          
        ID = df.id.iloc[i]
        img_path = f"data/images/{ID}.jpg"   
        img = cv2.imread(img_path)
      
        if img.shape != (80, 60, 3):
            img = image.load_img(img_path, target_size=(80, 60, 3))
        
        train_imgs[i] = img
      
      data = tf.data.Dataset.from_tensor_slices(
        (
          {
            "images" : train_imgs
          },

          {
            'articleType': df[['articleType']],
            'gender'    : df[['gender']],
            'baseColour': df[['baseColour']],
            'season'    : df[['season']],
            'usage'     : df[['usage']]
          }
        )
      )

      return data

In [None]:
def add_layers(resnet_input, num_classes, activation, name, hparam):
        """
        function to build model branch

        input : resnet_input: keras.Input
                num_classes : number of output classes
                activation  : type of activation
                name        : output name
        return: final layer output
        """

        x = layers.Dense(512, activation="relu", kernel_regularizer=keras.regularizers.l2(l=hparam[3]))(resnet_input)
        x = layers.Dropout(hparam[1])(x)
        x = layers.Dense(256, activation='relu', kernel_regularizer=keras.regularizers.l2(l=hparam[3]))(x)
        x = layers.Dropout(hparam[1])(x)
        x = layers.Dense(128, activation='relu', kernel_regularizer=keras.regularizers.l2(l=hparam[3]))(x)
        x = layers.Dropout(hparam[1])(x)
        x = layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l2(l=hparam[3]))(x)
        x = layers.Dropout(hparam[1])(x)

        x = layers.Dense(num_classes)(x)
        x = layers.Activation(activation, name=name)(x)

        return x

In [None]:
def build_model(width, height, list_branches, hparam):
  """
  function to build model for categories

  input : width, image width 
          height, image height
  return: keras.Model
  """

  resnet50 = keras.applications.ResNet50(weights='imagenet', include_top=False, input_shape=(80, 60, 3))
  resnet50.trainable=False

  inputs = keras.Input(shape=(width, height, 3), name="images")
  x = layers.RandomFlip("horizontal")(inputs)
  x = layers.RandomRotation(factor=0.2)(x)
  x = layers.RandomZoom(height_factor=0.1, width_factor=0.1)(x)
  x = layers.RandomContrast(factor=0.2)(x)

  x = resnet50(x, training=False)
  x = layers.Flatten()(x)
  x = layers.Dense(1024, activation='relu')(x)
  
  article_branch = add_layers(x, len(list_branches[0].classes_), 'softmax', 'articleType', hparam)
  gender_branch  = add_layers(x, len(list_branches[1].classes_), 'softmax', 'gender', hparam)
  color_branch   = add_layers(x, len(list_branches[2].classes_), 'softmax', 'baseColour', hparam)
  season_branch  = add_layers(x, len(list_branches[3].classes_), 'softmax', 'season', hparam)
  usage_branch   = add_layers(x, len(list_branches[4].classes_), 'softmax', 'usage', hparam)

  model = keras.Model(inputs=inputs,
                      outputs=[article_branch, gender_branch, color_branch, season_branch, usage_branch])

  return model

### Topwear recommendation

In [None]:
list_branches = [[top_article, top_gender, top_color, top_season, top_usage],
                     [bottom_article, bottom_gender, bottom_color, bottom_season, bottom_usage], 
                     [foot_article, foot_gender, foot_color, foot_season, foot_usage]]

In [None]:
epochs_list = [5, 10, 15]
dropout_list = [0.1, 0.25]
learning_rates = [1e-3, 1e-4]
regularization_list = [0.1, 0.25]

# function to get hyperparameter combinations
def get_hyperparameter_combinations(epochs, dropout, learning_rates, reg):
  hyperparameters_list = [epochs, dropout, learning_rates, reg]
  hyperparameters_comb = list(itertools.product(*hyperparameters_list))
  return hyperparameters_comb

In [None]:
def hyperparameter_tuning(hyperparameters_comb, list_branch, sub_train, sub_val, sub_test):
        best_loss = float('inf')
        for i, hparam in enumerate(hyperparameters_comb):
                print('\nCOMBO', i, hparam)
                test_net = build_model(80, 60, list_branches=list_branch, hparam=hparam)
                test_net.compile(optimizer='adam',
                                loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
                                metrics=['accuracy'])
                with tf.device("/gpu:0"):
                        test_stats = test_net.fit(sub_train, epochs=hparam[0], validation_data=sub_val)

                score = test_net.evaluate(sub_test)
                current_loss = score[0]
                print('Current loss for', i, 'th combination is:', current_loss)

                if current_loss < best_loss:
                        best_loss = current_loss
                        best_idx = i
                        best_net = test_net
                        best_stats = test_stats
        return best_idx, best_stats, best_net, best_loss

In [None]:
# model for topwear
batch_size = 32
top_train, top_val, top_test = split_data(create_array(topwear_df), batch_size)
hyperparameters_comb = get_hyperparameter_combinations(epochs_list, dropout_list, learning_rates, regularization_list)
best_idx, best_stats, best_net, best_acc = hyperparameter_tuning(hyperparameters_comb, list_branch=list_branches[0], sub_train=top_train, sub_val=top_val, sub_test=top_test)
print('Best Test score : ', best_acc)
print('Hidden size used : ', hyperparameters_comb[best_idx][1])
print('No of iterations required : ', hyperparameters_comb[best_idx][0])
print('Learning rate used : ', hyperparameters_comb[best_idx][2])
print('regularization strength : ', hyperparameters_comb[best_idx][3])

In [None]:
# # plot_model(top_base_model, to_file='model_topwear.png')

best_stats.history.keys()

In [None]:
best_net.evaluate(top_test)
best_net.save("models/models/model_topwear")

#### Learning curves

In [None]:
plt.plot(best_stats.history['loss'], label='train_loss')
plt.plot(best_stats.history['val_loss'], label='val_loss')
plt.title("Topwear model loss curve")
plt.xlabel("Epochs"), plt.ylabel("Loss"), plt.legend(loc="best")
plt.tight_layout()
plt.show()

In [None]:
plt.plot(best_stats.history['articleType_accuracy'], label='Train')
plt.plot(best_stats.history['val_articleType_accuracy'], label='Val')
plt.title("Topwear article accuracy curve")
plt.xlabel("Epoch"), plt.ylabel("Accuracy"), plt.legend(loc="best")
plt.tight_layout()
plt.show()

In [None]:
plt.plot(best_stats.history['gender_accuracy'], label='Train')
plt.plot(best_stats.history['val_gender_accuracy'], label='Val')
plt.title("Topwear gender accuracy curve")
plt.xlabel("Epoch"), plt.ylabel("Accuracy"), plt.legend(loc="best")
plt.tight_layout()
plt.show()

In [None]:
plt.plot(best_stats.history['baseColour_accuracy'], label='Train')
plt.plot(best_stats.history['val_baseColour_accuracy'], label='Val')
plt.title("Topwear colour accuracy curve")
plt.xlabel("Epoch"), plt.ylabel("Accuracy"), plt.legend(loc="best")
plt.tight_layout()
plt.show()

In [None]:
plt.plot(best_stats.history['season_accuracy'], label='Train')
plt.plot(best_stats.history['val_season_accuracy'], label='Val')
plt.title("Topwear season accuracy curve")
plt.xlabel("Epoch"), plt.ylabel("Accuracy"), plt.legend(loc="best")
plt.tight_layout()
plt.show()

In [None]:
plt.plot(best_stats.history['usage_accuracy'], label='Train')
plt.plot(best_stats.history['val_usage_accuracy'], label='Val')
plt.title("Topwear usage accuracy curve")
plt.xlabel("Epoch"), plt.ylabel("Accuracy"), plt.legend(loc="best")
plt.tight_layout()
plt.show()

### Bottomwear recommendation

In [None]:
# model for bottomwear
batch_size = 32
bottom_train, bottom_val, bottom_test = split_data(create_array(bottomwear_df),batch_size)
hyperparameters_comb = get_hyperparameter_combinations(epochs_list, dropout_list, learning_rates, regularization_list)
best_idx, best_stats, best_net, best_acc = hyperparameter_tuning(hyperparameters_comb, list_branch=list_branches[1], sub_train=bottom_train, sub_val=bottom_val, sub_test=bottom_test)
print('Best Test score : ', best_acc)
print('Hidden size used : ', hyperparameters_comb[best_idx][1])
print('No of iterations required : ', hyperparameters_comb[best_idx][0])
print('Learning rate used : ', hyperparameters_comb[best_idx][2])
print('regularization strength : ', hyperparameters_comb[best_idx][3])

In [None]:
# plot_model(bottom_base_model, to_file='model_bottomwear.png')

with tf.device("/gpu:0"):
    best_stats = bottom_base_model.fit(bottom_train, epochs=15, validation_data=bottom_val)

In [None]:
best_net.evaluate(bottom_test)
best_net.save("/models/models/model_bottomwear")

#### Learning curves

In [None]:
plt.plot(best_stats.history['loss'], label='train_loss')
plt.plot(best_stats.history['val_loss'], label='val_loss')
plt.title("Bottomwear model loss curve")
plt.xlabel("Epochs"), plt.ylabel("Loss"), plt.legend(loc="best")
plt.tight_layout()
plt.show()

In [None]:
plt.plot(best_stats.history['articleType_accuracy'], label='Train')
plt.plot(best_stats.history['val_articleType_accuracy'], label='Val')
plt.title("Bottomwear article accuracy curve")
plt.xlabel("Epoch"), plt.ylabel("Accuracy"), plt.legend(loc="best")
plt.tight_layout()
plt.show()

In [None]:
plt.plot(best_stats.history['gender_accuracy'], label='Train')
plt.plot(best_stats.history['val_gender_accuracy'], label='Val')
plt.title("Bottomwear gender accuracy curve")
plt.xlabel("Epoch"), plt.ylabel("Accuracy"), plt.legend(loc="best")
plt.tight_layout()
plt.show()

In [None]:
plt.plot(best_stats.history['baseColour_accuracy'], label='Train')
plt.plot(best_stats.history['val_baseColour_accuracy'], label='Val')
plt.title("Bottomwear colour accuracy curve")
plt.xlabel("Epoch"), plt.ylabel("Accuracy"), plt.legend(loc="best")
plt.tight_layout()
plt.show()

In [None]:
plt.plot(best_stats.history['season_accuracy'], label='Train')
plt.plot(best_stats.history['val_season_accuracy'], label='Val')
plt.title("Bottomwear season accuracy curve")
plt.xlabel("Epoch"), plt.ylabel("Accuracy"), plt.legend(loc="best")
plt.tight_layout()
plt.show()

In [None]:
plt.plot(best_stats.history['usage_accuracy'], label='Train')
plt.plot(best_stats.history['val_usage_accuracy'], label='Val')
plt.title("Bottomwear usage accuracy curve")
plt.xlabel("Epoch"), plt.ylabel("Accuracy"), plt.legend(loc="best")
plt.tight_layout()
plt.show()

### Footwear recommendation

In [None]:
# model for footwear
batch_size = 32
foot_train, foot_val, foot_test = split_data(create_array(footwear_df), batch_size)
hyperparameters_comb = get_hyperparameter_combinations(epochs_list, dropout_list, learning_rates, regularization_list)
best_idx, best_stats, best_net, best_acc = hyperparameter_tuning(hyperparameters_comb, list_branch=list_branches[2], sub_train=foot_train, sub_val=foot_val, sub_test=foot_test)
print('Best Test score : ', best_acc)
print('Hidden size used : ', hyperparameters_comb[best_idx][1])
print('No of iterations required : ', hyperparameters_comb[best_idx][0])
print('Learning rate used : ', hyperparameters_comb[best_idx][2])
print('regularization strength : ', hyperparameters_comb[best_idx][3])

In [None]:
# plot_model(best_net, to_file='model_footwear.png')

In [None]:
best_net.evaluate(foot_test)
best_net.save("/models/models/model_footwear")

#### Learning curves

In [None]:
plt.plot(best_stats.history['loss'], label='train_loss')
plt.plot(best_stats.history['val_loss'], label='val_loss')
plt.title("Footwear model loss curve")
plt.xlabel("Epochs"), plt.ylabel("Loss"), plt.legend(loc="best")
plt.tight_layout()
plt.show()

In [None]:
plt.plot(best_stats.history['articleType_accuracy'], label='Train')
plt.plot(best_stats.history['val_articleType_accuracy'], label='Val')
plt.title("Footwear article accuracy curve")
plt.xlabel("Epoch"), plt.ylabel("Accuracy"), plt.legend(loc="best")
plt.tight_layout()
plt.show()

In [None]:
plt.plot(best_stats.history['gender_accuracy'], label='Train')
plt.plot(best_stats.history['val_gender_accuracy'], label='Val')
plt.title("Footwear gender accuracy curve")
plt.xlabel("Epoch"), plt.ylabel("Accuracy"), plt.legend(loc="best")
plt.tight_layout()
plt.show()

In [None]:
plt.plot(best_stats.history['baseColour_accuracy'], label='Train')
plt.plot(best_stats.history['val_baseColour_accuracy'], label='Val')
plt.title("Footwear colour accuracy curve")
plt.xlabel("Epoch"), plt.ylabel("Accuracy"), plt.legend(loc="best")
plt.tight_layout()
plt.show()

In [None]:
plt.plot(best_stats.history['season_accuracy'], label='Train')
plt.plot(best_stats.history['val_season_accuracy'], label='Val')
plt.title("Footwear season accuracy curve")
plt.xlabel("Epoch"), plt.ylabel("Accuracy"), plt.legend(loc="best")
plt.tight_layout()
plt.show()

In [None]:
plt.plot(best_stats.history['usage_accuracy'], label='Train')
plt.plot(best_stats.history['val_usage_accuracy'], label='Val')
plt.title("Footwear usage accuracy curve")
plt.xlabel("Epoch"), plt.ylabel("Accuracy"), plt.legend(loc="best")
plt.tight_layout()
plt.show()