# Author

- Code by: Nguyen Van Toan
- GitHub: https://github.com/vantoan2905
- Email: toanvippk115@gmail.com



In [12]:
import sqlite3
import pandas as pd

db_file = r'D:\recommend_product\recommend_product\db.sqlite3'

# Connect to the SQLite database
connection = sqlite3.connect(db_file)

# Create a cursor object
cursor = connection.cursor()
def fetch_table_as_df(cursor, table_name):
    query = f"SELECT * FROM {table_name}"
    cursor.execute(query)
    rows = cursor.fetchall()
    columns = [desc[0] for desc in cursor.description]  
    df = pd.DataFrame(rows, columns=columns)
    df.drop(df.columns[0], axis=1, inplace=True)
    
    return df

# Fetch data
df_product = fetch_table_as_df(cursor, "shop_product")
df_user = fetch_table_as_df(cursor, "auth_user")
df_rating = fetch_table_as_df(cursor, "shop_rating")
df_search_history = fetch_table_as_df(cursor, "shop_searchhistory")
df_view_history = fetch_table_as_df(cursor, "shop_viewhistory")

# Close connection
connection.close()



# Search product by Image

1. preprocessing data

In [13]:

# Embedding data

# Data for task process
# df_product 
# df_user 
# df_search_history



##############################################################################################################
#############################################################################################################
# Dropna 
df_product.dropna()
df_product


Unnamed: 0,image,price,articleNumber,articleType,productDisplayName,masterCategory,subCategory,gender,baseColour,fashionType,season,year,usag
0,10000.jpg,910.0,3359453752,Skirts,Palm Tree Girls Sp Jace Sko White Skirts,Apparel,Bottomwear,Women,White,Fashion,Summer,2011.0,Casual
1,10001.jpg,834.0,1538933187,Skirts,Palm Tree Kids Girls Sp Jema Skt Blue Skirts,Apparel,Bottomwear,Women,Blue,Fashion,Summer,2011.0,Casual
2,10002.jpg,716.0,1982148719,Skirts,Palm Tree Kids Sp Jema Skt Blue Skirts,Apparel,Bottomwear,Women,Blue,Fashion,Summer,2011.0,Casual
3,10003.jpg,891.0,3931560500,Tshirts,Nike Women As Nike Eleme White T-Shirt,Apparel,Topwear,Women,White,Fashion,Fall,2011.0,Sports
4,10004.jpg,724.0,2796097505,Shorts,Nike Men As 7 Sw Temp Grey Shorts,Apparel,Bottomwear,Men,Grey,Fashion,Fall,2011.0,Sports
...,...,...,...,...,...,...,...,...,...,...,...,...,...
44414,9995.jpg,952.0,3032757920,Skirts,Gini And Jony Girls Su Scotia Sko Pink Skirts,Apparel,Bottomwear,Women,Pink,Fashion,Summer,2011.0,Casual
44415,9996.jpg,924.0,3183244550,Tops,Palm Tree Girls Sp Jelly Top Blue Tops,Apparel,Topwear,Women,Blue,Fashion,Summer,2011.0,Casual
44416,9997.jpg,740.0,9223084955,Tops,Palm Tree Girls Hs Livia Top Blue Tops,Apparel,Topwear,Women,Blue,Fashion,Summer,2011.0,Casual
44417,9998.jpg,722.0,4531210368,Tops,Palm Tree Girls Hs Livia Top Pink Tops,Apparel,Topwear,Women,Pink,Fashion,Summer,2011.0,Casual


# Check product table 


In [14]:
df_product.head()

articleType = df_product['articleType'].value_counts()
gender = df_product['gender'].value_counts()
baseColour = df_product['baseColour'].value_counts()
fashionType = df_product['fashionType'].value_counts()
season = df_product['season'].value_counts()


print("acticleType")
print(articleType)
print("gender")
print(gender)
print("baseColour")
print(baseColour)
print("fashionType")
print(fashionType)
print("season")
print(season)


acticleType
articleType
Tshirts                7066
Shirts                 3215
Casual Shoes           2845
Watches                2542
Sports Shoes           2036
                       ... 
Ipad                      1
Body Wash and Scrub       1
Mens Grooming Kit         1
Hair Accessory            1
Shoe Laces                1
Name: count, Length: 142, dtype: int64
gender
gender
Men       22142
Women     18631
Unisex     2161
Boys        830
Girls       655
Name: count, dtype: int64
baseColour
baseColour
Black                9727
White                5538
Blue                 4917
Brown                3494
Grey                 2741
Red                  2453
Green                2115
Pink                 1860
Navy Blue            1789
Purple               1640
Silver               1090
Yellow                778
Beige                 749
Gold                  628
Maroon                581
Orange                530
Olive                 410
Multi                 394
Cream              

# Classificaion model

In [15]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, applications
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)


In [16]:

class CustomDataGenerator(keras.utils.Sequence):
    def __init__(self, img_paths, labels, batch_size=32, img_size=(224, 224), shuffle=True):
        self.img_paths = img_paths
        self.labels = labels
        self.batch_size = batch_size
        self.img_size = img_size
        self.shuffle = shuffle
        self.indexes = np.arange(len(self.img_paths))
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __len__(self):
        return int(np.ceil(len(self.img_paths) / self.batch_size))

    def __getitem__(self, idx):
        batch_indexes = self.indexes[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_paths = [self.img_paths[i] for i in batch_indexes]
        
        X = np.zeros((len(batch_indexes), self.img_size[0], self.img_size[1], 3))
        y = np.array([self.labels[i] for i in batch_indexes])
        
        for i, path in enumerate(batch_paths):
            try:
                img = keras.preprocessing.image.load_img(path, target_size=self.img_size)
                img = keras.preprocessing.image.img_to_array(img)
                img = applications.mobilenet_v2.preprocess_input(img)
                X[i] = img
            except Exception as e:
                print(f"Error loading image {path}: {str(e)}")
                X[i] = np.zeros(self.img_size + (3,))
            
        return X, y

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indexes)

In [17]:
def analyze_class_distribution(df, label_col):
    class_counts = df[label_col].value_counts()
    return class_counts

def filter_rare_classes(df, label_col, min_examples=2):
    class_counts = df[label_col].value_counts()
    valid_classes = class_counts[class_counts >= min_examples].index
    filtered_df = df[df[label_col].isin(valid_classes)].copy()
    return filtered_df

def create_model(num_classes):
    base_model = applications.MobileNetV2(
        weights='imagenet',
        include_top=False,
        input_shape=(224, 224, 3)
    )
    
    base_model.trainable = False
    
    model = models.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.Dropout(0.2),
        layers.Dense(num_classes, activation='softmax')
    ])
    
    return model


In [18]:


def train_model(model, train_generator, validation_generator, epochs=5):
    model.compile(
        
        optimizer=keras.optimizers.Adam(learning_rate=0.003),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    os.makedirs('model_output', exist_ok=True)
    
    callbacks = [
        keras.callbacks.ModelCheckpoint(
            'model_output/best_model.keras',  
            save_best_only=True,
            monitor='val_accuracy'
        ),
        keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=3,
            restore_best_weights=True
        ),
        keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=2,
            min_lr=1e-6
        ),
        keras.callbacks.TensorBoard(
            log_dir='model_output/logs',
            histogram_freq=1
        )
    ]
    
    history = model.fit(
        train_generator,
        validation_data=validation_generator,
        epochs=epochs,
        callbacks=callbacks
    )
    
    return history



In [19]:
def evaluate_model(model, test_generator):
    loss, accuracy = model.evaluate(test_generator)
    return loss, accuracy


In [20]:

def plot_training_history(history):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
    
    ax1.plot(history.history['accuracy'], label='Training')
    ax1.plot(history.history['val_accuracy'], label='Validation')
    ax1.set_title('Model Accuracy')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Accuracy')
    ax1.legend()
    
    ax2.plot(history.history['loss'], label='Training')
    ax2.plot(history.history['val_loss'], label='Validation')
    ax2.set_title('Model Loss')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Loss')
    ax2.legend()
    
    plt.tight_layout()
    plt.savefig('model_output/training_history.png')
    plt.close()

In [None]:

img_dir = r"D:\recommend_product\recommend_product\media"

analyze_class_distribution(df_product, 'articleType')

filtered_df = filter_rare_classes(df_product, 'articleType', min_examples=2)

label_mapping = {label: idx for idx, label in enumerate(filtered_df['articleType'].unique())}
num_classes = len(label_mapping)
labels = filtered_df['articleType'].map(label_mapping).values

img_paths = [os.path.join(img_dir, img_name) for img_name in filtered_df['image']]

train_paths, val_paths, train_labels, val_labels = train_test_split(
    img_paths, labels, test_size=0.2, random_state=42, stratify=labels
)

train_generator = CustomDataGenerator(
    train_paths, train_labels, batch_size=32, shuffle=True
)
validation_generator = CustomDataGenerator(
    val_paths, val_labels, batch_size=32, shuffle=False
)

In [None]:




model = create_model(num_classes)
history = train_model(model, train_generator, validation_generator)

plot_training_history(history)

model.save('model_output/final_model.keras')  

label_mapping_df = pd.DataFrame(list(label_mapping.items()), columns=['class', 'index'])
label_mapping_df.to_csv('model_output/label_mapping.csv', index=False)

# show SVM 



  self._warn_if_super_not_called()


Epoch 1/5
[1m1111/1111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m685s[0m 614ms/step - accuracy: 0.6329 - loss: 1.4726 - val_accuracy: 0.8010 - val_loss: 0.6604 - learning_rate: 0.0010
Epoch 2/5
[1m1111/1111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m452s[0m 407ms/step - accuracy: 0.8110 - loss: 0.5977 - val_accuracy: 0.8162 - val_loss: 0.6000 - learning_rate: 0.0010
Epoch 3/5
[1m1111/1111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m451s[0m 406ms/step - accuracy: 0.8406 - loss: 0.4861 - val_accuracy: 0.8237 - val_loss: 0.5814 - learning_rate: 0.0010
Epoch 4/5
[1m1111/1111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m450s[0m 405ms/step - accuracy: 0.8539 - loss: 0.4333 - val_accuracy: 0.8262 - val_loss: 0.5812 - learning_rate: 0.0010
Epoch 5/5
[1m1111/1111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m450s[0m 406ms/step - accuracy: 0.8606 - loss: 0.4111 - val_accuracy: 0.8326 - val_loss: 0.5701 - learning_rate: 0.0010


In [1]:

from tensorflow.keras.preprocessing.image import load_img, img_to_array
import pandas as pd
import numpy as np

model = load_model('model_output/final_model.keras')

label_mapping_df = pd.read_csv('model_output/label_mapping.csv')
label_mapping = dict(zip(label_mapping_df['index'], label_mapping_df['class']))

def predict_image(image_path, model, label_mapping):
    img = load_img(image_path, target_size=(224, 224))  
    img_array = img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = img_array / 255.0  

    
    predictions = model.predict(img_array)
    predicted_index = np.argmax(predictions, axis=1)[0]
    predicted_class = label_mapping[predicted_index]

    return predicted_class


image_path = r'D:\recommend_product\recommend_product\recommend_model\image.png'
predicted_class = predict_image(image_path, model, label_mapping)

print(f"Predicted class: {predicted_class}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 862ms/step
Predicted class: Blazers
