In [4]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import json
from tensorflow.keras.preprocessing.text import tokenizer_from_json  
import re
from tensorflow.keras.models import load_model

In [5]:
with open('tokenizer.json') as f:
    data = json.load(f)
    tokenizer = tokenizer_from_json(data)

In [6]:
def clean_name(name):
  common_keywords = ["general", "accessories", "and", "in", "the", "with", "up", "to", "of", "on", "is", "a", "be", "all", "for", "or"]
  name_words = name
  name_words = re.sub(r'[^a-zA-Z ]', "", name_words)
  name_words = re.sub(r'(^| )x( |$)', "", name_words)
  name_words = re.sub(r'\s+', " ", name_words)
  name_words = name_words.strip().lower().split(" ")
  name_words = [word for word in name_words if word not in common_keywords]
  return " ".join(name_words)

In [7]:
second_levels = [["Jewellery", ["Bangles, Bracelets & Armlets", "Rings", "Pendants & Lockets", "Necklaces & Chains", "Mangalsutras & Tanmaniyas", "Jewellery Sets", "Accessories", "Anklets", "Earrings", "Hair Accessories", "Artificial Jewellery", "Precious Jewellery", "Nose Rings & Studs"]], ["Clothing", ["Women's Clothing", "Kids' Clothing", "Men's Clothing"]], ["Footwear", ["Women's Footwear", "Men's Footwear", "Kids' & Infant Footwear", "REMSON INDIA Women Flats"]], ["Automotive", ["Accessories & Spare parts", "Car & Bike Accessories", "Car Accessories"]], ["Computers", ["Network Components", "Tablet Accessories", "Laptop Accessories", "Software", "Audio Players", "Computer Peripherals", "Storage", "Computer Components", "Laptops"]], ["Watches", ["Wrist Watches", "Watch Accessories", "Clocks"]]]
second_level_hash = {"Jewellery":["Bangles, Bracelets & Armlets","Rings","Pendants & Lockets","Necklaces & Chains","Mangalsutras & Tanmaniyas","Jewellery Sets","Accessories","Anklets","Earrings","Hair Accessories","Artificial Jewellery","Precious Jewellery","Nose Rings & Studs"],"Clothing":["Women's Clothing","Kids' Clothing","Men's Clothing"],"Footwear":["Women's Footwear","Men's Footwear","Kids' & Infant Footwear","REMSON INDIA Women Flats"],"Automotive":["Accessories & Spare parts","Car & Bike Accessories","Car Accessories"],"Computers":["Network Components","Tablet Accessories","Laptop Accessories","Software","Audio Players","Computer Peripherals","Storage","Computer Components","Laptops"],"Watches":["Wrist Watches","Watch Accessories","Clocks"]}

In [8]:
first_level_model = load_model("top_classifier.h5")

In [15]:
all_labels = ["Accessories", "Accessories & Spare parts", "Anklets", "Artificial Jewellery", "Audio Players", "Automotive", "Bangles, Bracelets & Armlets", "Car & Bike Accessories", "Car Accessories", "Clocks", "Clothing", "Computer Components", "Computer Peripherals", "Computers", "Earrings", "Footwear", "Hair Accessories", "Jewellery", "Jewellery Sets", "Kids' & Infant Footwear", "Kids' Clothing", "Laptop Accessories", "Laptops", "Mangalsutras & Tanmaniyas", "Men's Clothing", "Men's Footwear", "Necklaces & Chains", "Network Components", "Nose Rings & Studs", "Pendants & Lockets", "Precious Jewellery", "REMSON INDIA Women Flats", "Rings", "Software", "Storage", "Tablet Accessories", "Watch Accessories", "Watches", "Women's Clothing", "Women's Footwear", "Wrist Watches"] 

first_level_labels = ["Automotive", "Clothing", "Computers", "Footwear", "Jewellery", "Watches"]
first_level_labels.sort()

In [21]:
test_names = ["Embroidered Men's Waistcoat"]

In [22]:
models = {}
for label in first_level_labels:
    models[label] = load_model(f'{label}_classifier.h5')

In [24]:
sequence_maxlen = 250

for name in test_names:
    seq = tokenizer.texts_to_sequences([clean_name(name)])
    padded = pad_sequences(seq, maxlen=sequence_maxlen)
    top_pred = first_level_model.predict(padded)
    top_label = first_level_labels[np.argmax(top_pred)]
    
    second_level_model = models[top_label]
    second_pred = second_level_model.predict(padded)
    
    second_level_labels = second_level_hash[top_label]
    second_label = second_level_labels[np.argmax(second_pred)]
    
    print(
        "%s : %s > %s" % (
            name, 
            top_label,
            second_label
        )
    )

Embroidered Men's Waistcoat : Clothing > Men's Clothing
