In [1]:
pip install pandas numpy scikit-learn tensorflow keras opencv-python matplotlib flask


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.3.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import pandas as pd

# Load dataset with specific quote handling
data = pd.read_csv(r"C:/Users/admin/Downloads/dataset/Reduced_styles.csv", quotechar='"', on_bad_lines='skip')

# View dataset structure
print(data.head())


   Unnamed: 0     id gender masterCategory subCategory  articleType  \
0           0  15970    Men        Apparel     Topwear       Shirts   
1           1  39386    Men        Apparel  Bottomwear        Jeans   
2           3  21379    Men        Apparel  Bottomwear  Track Pants   
3           4  53759    Men        Apparel     Topwear      Tshirts   
4           5   1855    Men        Apparel     Topwear      Tshirts   

  baseColour  season   usage                             productDisplayName  
0       Blue    Fall  Casual               Turtle Check Men Navy Blue Shirt  
1       Blue  Summer  Casual             Peter England Men Party Blue Jeans  
2      Black    Fall  Casual  Manchester United Men Solid Black Track Pants  
3       Grey  Summer  Casual                          Puma Men Grey T-shirt  
4       Grey  Summer  Casual           Inkfruit Mens Chain Reaction T-shirt  


In [5]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing import image
import numpy as np
import os
import pickle

# Load the pre-trained ResNet50 model
model = ResNet50(weights="imagenet", include_top=False, pooling="avg")

def extract_features(img_path, model):
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)
    
    features = model.predict(img_array)
    return features.flatten()

# Path to your dataset images
image_folder = 'C:/Users/admin/Downloads/dataset/reduced_images'
image_files = [os.path.join(image_folder, file) for file in os.listdir(image_folder)]

# Extract features for all images
image_features = {}
for img_path in image_files:
    features = extract_features(img_path, model)
    image_features[img_path] = features

print("Extracted features for all images.")

# Save the features to a pickle file
with open('image_features.pkl', 'wb') as f:
    pickle.dump(image_features, f)

Extracted features for all images.


In [9]:
from sklearn.metrics.pairwise import cosine_similarity

def get_similar_images(liked_image_features, all_image_features, top_n=5):
    # Convert liked image features into 2D array for similarity computation
    liked_image_features = liked_image_features.reshape(1, -1)
    
    similarities = []
    for img_path, features in all_image_features.items():
        similarity = cosine_similarity(liked_image_features, features.reshape(1, -1))
        similarities.append((img_path, similarity[0][0]))
    
    # Sort images by similarity score in descending order
    sorted_images = sorted(similarities, key=lambda x: x[1], reverse=True)
    
    # Return top N most similar images
    return sorted_images[:top_n]

# Example of finding top 5 similar images to a liked image
liked_image_path = 'C:/Users/admin/Downloads/dataset/reduced_images/1163.jpg'
liked_image_features = extract_features(liked_image_path, model)
similar_images = get_similar_images(liked_image_features, image_features)

# Print similar image paths
for img, score in similar_images:
    print(f"Image: {img}, Similarity Score: {score}")


Image: C:/Users/admin/Downloads/dataset/reduced_images\1163.jpg, Similarity Score: 1.0000001192092896
Image: C:/Users/admin/Downloads/dataset/reduced_images\13891.jpg, Similarity Score: 0.9171352386474609
Image: C:/Users/admin/Downloads/dataset/reduced_images\1164.jpg, Similarity Score: 0.9167729616165161
Image: C:/Users/admin/Downloads/dataset/reduced_images\13892.jpg, Similarity Score: 0.9059172868728638
Image: C:/Users/admin/Downloads/dataset/reduced_images\43047.jpg, Similarity Score: 0.883743405342102
