In [2]:
import sqlite3
import pandas as pd

db_file = r'D:\recomments_product\recomment_product\db.sqlite3'

# Connect to the SQLite database
connection = sqlite3.connect(db_file)

# Create a cursor object
cursor = connection.cursor()
def fetch_table_as_df(cursor, table_name):
    query = f"SELECT * FROM {table_name}"
    cursor.execute(query)
    rows = cursor.fetchall()
    columns = [desc[0] for desc in cursor.description]  
    df = pd.DataFrame(rows, columns=columns)
    df.drop(df.columns[0], axis=1, inplace=True)
    
    return df

# Fetch data
df_product = fetch_table_as_df(cursor, "shop_product")
df_user = fetch_table_as_df(cursor, "auth_user")
df_rating = fetch_table_as_df(cursor, "shop_rating")
df_search_history = fetch_table_as_df(cursor, "shop_searchhistory")
df_view_history = fetch_table_as_df(cursor, "shop_viewhistory")

# Close connection
connection.close()



# Search product by Image

1. preprocessing data

In [3]:

# Embedding data

# Data for task process
# df_product 
# df_user 
# df_search_history



##############################################################################################################
#############################################################################################################
# View data 
print("Product table!")
print(df_product.head())
print("User table!")
print(df_user.head())
print("Search history table!")
print(df_search_history.head())


Product table!
       image  price articleNumber articleType  \
0  10000.jpg  834.0    5410480144      Skirts   
1  10001.jpg  834.0    5410480144      Skirts   
2  10002.jpg  834.0    5410480144      Skirts   
3  10003.jpg  834.0    5410480144     Tshirts   
4  10004.jpg  834.0    5410480144      Shorts   

                             productDisplayName masterCategory subCategory  \
0      Palm Tree Girls Sp Jace Sko White Skirts        Apparel  Bottomwear   
1  Palm Tree Kids Girls Sp Jema Skt Blue Skirts        Apparel  Bottomwear   
2        Palm Tree Kids Sp Jema Skt Blue Skirts        Apparel  Bottomwear   
3        Nike Women As Nike Eleme White T-Shirt        Apparel     Topwear   
4             Nike Men As 7 Sw Temp Grey Shorts        Apparel  Bottomwear   

  gender baseColour fashionType  season    year    usag  
0  Women      White     Fashion  Summer  2011.0  Casual  
1  Women       Blue     Fashion  Summer  2011.0  Casual  
2  Women       Blue     Fashion  Summer  2011.0

# Check product table 


In [4]:
df_product.head()

articleType = df_product['articleType'].value_counts()
gender = df_product['gender'].value_counts()
baseColour = df_product['baseColour'].value_counts()
fashionType = df_product['fashionType'].value_counts()
season = df_product['season'].value_counts()


print("acticleType")
print(articleType)
print("gender")
print(gender)
print("baseColour")
print(baseColour)
print("fashionType")
print(fashionType)
print("season")
print(season)


acticleType
articleType
Tshirts                7067
Shirts                 3217
Casual Shoes           2845
Watches                2542
Sports Shoes           2036
                       ... 
Ipad                      1
Body Wash and Scrub       1
Mens Grooming Kit         1
Hair Accessory            1
Shoe Laces                1
Name: count, Length: 143, dtype: int64
gender
gender
Men       22147
Women     18631
Unisex     2161
Boys        830
Girls       655
Name: count, dtype: int64
baseColour
baseColour
Black                9728
White                5538
Blue                 4918
Brown                3494
Grey                 2741
Red                  2455
Green                2115
Pink                 1860
Navy Blue            1789
Purple               1640
Silver               1090
Yellow                778
Beige                 749
Gold                  628
Maroon                581
Orange                530
Olive                 410
Multi                 394
Cream              

# Classificaion model

In [5]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, applications
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [7]:


# Enable GPU memory growth to prevent TF from taking all memory
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)

class CustomDataGenerator(keras.utils.Sequence):
    def __init__(self, img_paths, labels, batch_size=32, img_size=(224, 224), shuffle=True):
        self.img_paths = img_paths
        self.labels = labels
        self.batch_size = batch_size
        self.img_size = img_size
        self.shuffle = shuffle
        self.indexes = np.arange(len(self.img_paths))
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __len__(self):
        return int(np.ceil(len(self.img_paths) / self.batch_size))

    def __getitem__(self, idx):
        batch_indexes = self.indexes[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_paths = [self.img_paths[i] for i in batch_indexes]
        
        X = np.zeros((len(batch_indexes), self.img_size[0], self.img_size[1], 3))
        y = np.array([self.labels[i] for i in batch_indexes])
        
        for i, path in enumerate(batch_paths):
            try:
                img = keras.preprocessing.image.load_img(path, target_size=self.img_size)
                img = keras.preprocessing.image.img_to_array(img)
                img = applications.mobilenet_v2.preprocess_input(img)
                X[i] = img
            except Exception as e:
                print(f"Error loading image {path}: {str(e)}")
                # Fill with zeros if image loading fails
                X[i] = np.zeros(self.img_size + (3,))
            
        return X, y

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indexes)

def analyze_class_distribution(df, label_col):
    """Analyze class distribution and print statistics"""
    class_counts = df[label_col].value_counts()
    # print("\nClass Distribution Analysis:")
    # print(f"Total number of classes: {len(class_counts)}")
    # print(f"Classes with only one example: {sum(class_counts == 1)}")
    # print("\nClass counts:")
    # print(class_counts)
    return class_counts

def filter_rare_classes(df, label_col, min_examples=2):
    """Filter out classes with fewer than min_examples"""
    class_counts = df[label_col].value_counts()
    valid_classes = class_counts[class_counts >= min_examples].index
    filtered_df = df[df[label_col].isin(valid_classes)].copy()
    
    # print(f"\nFiltering results:")
    # print(f"Original number of samples: {len(df)}")
    # print(f"Filtered number of samples: {len(filtered_df)}")
    # print(f"Original number of classes: {len(class_counts)}")
    # print(f"Filtered number of classes: {len(valid_classes)}")
    
    return filtered_df

def create_model(num_classes):
    base_model = applications.MobileNetV2(
        weights='imagenet',
        include_top=False,
        input_shape=(224, 224, 3)
    )
    
    base_model.trainable = False
    
    model = models.Sequential([
        base_model,
        layers.GlobalAveragePooling2D(),
        layers.Dropout(0.2),
        layers.Dense(num_classes, activation='softmax')
    ])
    
    return model

def train_model(model, train_generator, validation_generator, epochs=5):
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=0.001),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    os.makedirs('model_output', exist_ok=True)
    
    callbacks = [
        keras.callbacks.ModelCheckpoint(
            'model_output/best_model.keras',  
            save_best_only=True,
            monitor='val_accuracy'
        ),
        keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=3,
            restore_best_weights=True
        ),
        keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=2,
            min_lr=1e-6
        ),
        keras.callbacks.TensorBoard(
            log_dir='model_output/logs',
            histogram_freq=1
        )
    ]
    
    history = model.fit(
        train_generator,
        validation_data=validation_generator,
        epochs=epochs,
        callbacks=callbacks
    )
    
    return history

def plot_training_history(history):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
    
    ax1.plot(history.history['accuracy'], label='Training')
    ax1.plot(history.history['val_accuracy'], label='Validation')
    ax1.set_title('Model Accuracy')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Accuracy')
    ax1.legend()
    
    ax2.plot(history.history['loss'], label='Training')
    ax2.plot(history.history['val_loss'], label='Validation')
    ax2.set_title('Model Loss')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Loss')
    ax2.legend()
    
    plt.tight_layout()
    plt.savefig('model_output/training_history.png')
    plt.close()


In [8]:

def main():
    img_dir = r"D:\recomments_product\recomment_product\media"
    
    analyze_class_distribution(df_product, 'articleType')
    
    filtered_df = filter_rare_classes(df_product, 'articleType', min_examples=2)
    
    label_mapping = {label: idx for idx, label in enumerate(filtered_df['articleType'].unique())}
    num_classes = len(label_mapping)
    labels = filtered_df['articleType'].map(label_mapping).values
    
    img_paths = [os.path.join(img_dir, img_name) for img_name in filtered_df['image']]
    
    train_paths, val_paths, train_labels, val_labels = train_test_split(
        img_paths, labels, test_size=0.2, random_state=42, stratify=labels
    )
    
    train_generator = CustomDataGenerator(
        train_paths, train_labels, batch_size=32, shuffle=True
    )
    validation_generator = CustomDataGenerator(
        val_paths, val_labels, batch_size=32, shuffle=False
    )
    
    
    
    model = create_model(num_classes)
    history = train_model(model, train_generator, validation_generator)
    
    plot_training_history(history)
    
    model.save('model_output/final_model.keras')  
    
    label_mapping_df = pd.DataFrame(list(label_mapping.items()), columns=['class', 'index'])
    label_mapping_df.to_csv('model_output/label_mapping.csv', index=False)

if __name__ == "__main__":
    main()

  self._warn_if_super_not_called()


Epoch 1/5
[1m 411/1111[0m [32m━━━━━━━[0m[37m━━━━━━━━━━━━━[0m [1m5:21[0m 460ms/step - accuracy: 0.5203 - loss: 2.0905Error loading image D:\recomments_product\recomment_product\media\39425.jpg: [Errno 2] No such file or directory: 'D:\\recomments_product\\recomment_product\\media\\39425.jpg'
[1m 481/1111[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m4:49[0m 459ms/step - accuracy: 0.5407 - loss: 1.9816

KeyboardInterrupt: 