<a href="https://colab.research.google.com/github/vkrisvasan/FashionAssist/blob/main/RecommendApparel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Gitaccount: vkrisvasan Repo:FashionAssist
#Simple fashion assist implemented by
  #1. creating a project in google vertexai https://console.cloud.google.com/vertex-ai [note the projectID]
  #2. create an Astra database acccount https://astra.datastax.com/ [create database and note the endpoint and Application token]
  #3. create a HF account [and note the api key [profile > settings > Access tokens]]

In [None]:
!pip install google-cloud-aiplatform ragstack-ai --upgrade


In [None]:
import pandas as pd
import numpy as np
import getpass, os, requests, sys, json, vertexai

In [None]:
credential_names = ["GCP_PROJECT_ID", "ASTRA_DB_ENDPOINT", "ASTRA_DB_TOKEN","HF_TOKEN"]
for credential in credential_names:
  if credential not in os.environ:
    os.environ[credential]=getpass.getpass("Provide your..." + credential)

In [None]:
from google.colab import userdata, auth, files, drive
from google.cloud import aiplatform

!gcloud config set project {os.environ["GCP_PROJECT_ID"]}
auth.authenticate_user()


Updated property [core/project].


In [None]:
from datasets import load_dataset
dataset = load_dataset("vkrisvasan/zara_embedding",split='train')


In [None]:
df = pd.DataFrame.from_dict(dataset)

In [None]:
df['embeddings1'] = None

In [None]:
df.shape

(936, 9)

In [None]:
df.head(2)

Unnamed: 0,product_name,link,product_images,price,details,category,gender,embeddings,embeddings1
0,WORKOUT T-SHIRT,https://www.zara.com/in/en/workout-t-shirt-p05...,https://static.zara.net/photos///2023/I/0/2/p/...,32.06,T-shirt made of lightweight textured stretch f...,ACTIVEWEAR,men,"[0.00584839704, 0.00366304931, 0.0416106768, 0...",
1,SPORTS BACKPACK,https://www.zara.com/in/en/sports-backpack-p13...,https://static.zara.net/photos///2023/I/1/2/p/...,64.26,Multi-pocket backpack. Combination of material...,ACTIVEWEAR,men,"[0.00701390672, 0.00680825347, 0.0142998686, 0...",


In [None]:
df.dtypes

product_name       object
link               object
product_images     object
price             float64
details            object
category           object
gender             object
embeddings         object
embeddings1        object
dtype: object

In [None]:
import io
from vertexai.preview.vision_models import MultiModalEmbeddingModel, Image
vertexai.init(project=os.getenv("GCP_PROJECT_ID"))
model = MultiModalEmbeddingModel.from_pretrained("multimodalembedding@001")

#Takes an image (URL or image file) and returns it
def get_image(image_source):
  if image_source.startswith("http://") or image_source.startswith("https://"):
    img_bytes = requests.get(image_source).content
    img = Image(img_bytes)
    return img
  else:
    print(f" ready to load file {image_source}")
    try:
      with open(image_source, "rb") as f:
        image_bytes = f.read()
      #print(f"img_bytes type : {type(image_bytes)}")
      img = Image(image_bytes)
      #print(f"img type : {type(img)}")
      return img
    except Exception as error:
      print(f"error loading image from {image_source}: {error}" )
      return None

#Gets image embeddings using Google Gemini model
def get_img_embeddings(img, text=""):
  embeddings = model.get_embeddings(image=img,
                                    contextual_text=text # passes text along with the image (multimodal)
  )
  return embeddings.image_embedding

In [None]:

numberofproductwithembedding=0
numberofproductwithoutembedding=0
# Iterate over each row in the DataFrame and update the embeddings1 column with image embedding
#Setting dflen as 30 to quickly test the working of embedding creation. The higher the number throws 400 error. Need to debug reason for error in next iteration
dflen=30
#dflen=len(df)
for i in range(dflen):
  # Check if the embeddings1 column is empty (None) before processing
  if df.loc[i, 'embeddings1'] is None:
    image = get_image(df.loc[i, 'product_images'])
    if image is not None:
      try:
        embeddings1 = get_img_embeddings(image)
        df.at[i, 'embeddings1'] = embeddings1
        numberofproductwithembedding+=1
      except Exception as e:
        #print(f"Error processing image from record #{i}: {df.loc[i, 'product_images']}: {e}")
        numberofproductwithoutembedding+=1
  else:
    numberofproductwithembedding=numberofproductwithembedding+1
print("Product with Embedding",numberofproductwithembedding)
print("Product without Embedding",numberofproductwithoutembedding)

Product with Embedding 30
Product without Embedding 0


In [None]:

df['embeddings']=df['embeddings'].apply(json.loads)

In [None]:
from astrapy.db import AstraDB, AstraDBCollection

astra_db = AstraDB(token=os.getenv("ASTRA_DB_TOKEN"),api_endpoint=os.getenv("ASTRA_DB_ENDPOINT"))

In [None]:
#SET THE correct Dimension based on the embedding model output to avoid error "SHRED_DOC_LIMIT_VIOLATION"
collection = astra_db.create_collection(collection_name="fashion_buddy_workshop_collection",dimension=1408,metric="cosine")
print(f"* Collection: {collection.collection_name}\n")

* Collection: fashion_buddy_workshop_collection



In [None]:
from ipywidgets import IntProgress
from IPython.display import display
import urllib.request

def load_to_astra(df, collection,dflength):
  len_df=dflength
  errorinfocount=0
  error_info=""
  f=IntProgress(min=0,max=len_df)
  display(f)
  for i in range(len_df):
    f.value += 1
    f.description = str(f.value) + "/" + str(len_df)

    product_name=df.loc[i,"product_name"]
    link = df.loc[i,"link"]
    product_images = df.loc[i,"product_images"]
    price = df.loc[i,"price"]
    details = df.loc[i,"details"]
    category = df.loc[i,"category"]
    gender = df.loc[i,"gender"]
    embeddings = df.loc[i,"embeddings"]

    try:
      collection.insert_one({
          "_id":i,
          "product_name":product_name,
          "link":link,
         "product_images":product_images,
          "price":price,
          "details":details,
          "category":category,
          "gender": gender,
          "$vector": embeddings}) #Keep column name as $Vector to avoid SHRED_DOC_LIMIT_VIOLATION error
    except Exception as error:
      error_info = json.loads(str(error))
      errorinfocount+=1

  print(f"total errors : {errorinfocount} : {error_info}")


In [None]:
#load_to_astra(df,collection)
dflength =  len(df)
load_to_astra(df,collection,dflength)


IntProgress(value=0, max=936)

total errors : 0 : 


In [None]:
#Displays recommendations by category returned by Astra Vector Search
def show_recommendations(documents, category=None):
  i=0
  if category:
    print()
    print("Category:", category)
  for doc in documents:
    i+=1
    print(f"\nRecommendation : {i}")
    print(doc["product_name"], "$" + str(doc["price"]), ", Gender:", doc["gender"])
    print(doc["details"])
    print(doc["link"])
    print("Similarity score: ", doc["$similarity"])
    imageurl=doc["product_images"]
    print(imageurl)

#Finds the similar itens of the given reference image and categories (if any) aleng with a search prompt to give t
def find_similar_items(reference_image, categories=None):
  reference_img = get_image(reference_image)
  #print("Reference image:")
  #if reference_img is not None:
    #reference_img.show()
  print("Selected Categories:", ", ".join(categories) if categories else "None")
  if categories:
    for category in categories:
      # Run multiple ANN searches to pull top 3 matches
      # for each category requested
      search_prompt= """
      I am trying to find pieces of apparel that are similar to what is in thIS picture.
      Ignore the-model and only focus on finding the most similar clothing.

      I only care about apparel that falls within the category contained within triple backticks:
      '''{category}'''
      """.format(category=category)
      reference_embeddings = get_img_embeddings(reference_img, text=search_prompt)
      #Run an ANN search filtering by apparel category
      documents = collection.vector_find(reference_embeddings,limit=3,filter={"category": category},include_similarity="True")
      show_recommendations(documents, category=category)
  else:
    search_prompt = """
      I am trying to find pieces of apparel that are similar to what is in this picture.
      Pretend as if there is no model in the image, only clothing.
      """
    #print("going to call get_img_embeddings")
    reference_embeddings = get_img_embeddings(reference_img, text=search_prompt)
    #print(reference_embeddings)
    #Run single ANN search across entire DB
    #print("going to call vector_find")
    documents = collection.vector_find(reference_embeddings,limit=3)
    #print(documents)
    #print("going to call show_recommendations")
    show_recommendations(documents)


In [None]:
from google.colab import files
uploaded = files.upload()

Saving zaratrouser1.jpeg to zaratrouser1.jpeg


In [None]:
#reference_image = "/content/sample_image1.jpeg"

#reference_image = "/content/zaratrouser1.jpeg"
reference_image = "/content/zara_shirt1.jpeg"
#reference_image = "/content/armani1.jpeg"
#reference_image = "/content/myntrashirt1.jpeg"
#reference_image = "/content/myntrapant1.jpg"
#reference_image = "/content/zara_workout1.jpeg"


In [None]:
print(f"The file being considered for recommendation is : {reference_image}")
find_similar_items(reference_image)

The file being considered for recommendation is : /content/zara_shirt1.jpeg
 ready to load file /content/zara_shirt1.jpeg
Selected Categories: None

Recommendation : 1
COTTON SHIRT $46.06 , Gender: men
Relaxed fit collared shirt made of a cotton fabric. Short sleeves. Side vents at the hem. Button-up front.
https://www.zara.com/in/en/cotton-shirt-p07545275.html
Similarity score:  0.88434076
https://static.zara.net/photos///2023/V/0/2/p/7545/275/712/2/w/448/7545275712_1_1_1.jpg?ts=1678983432463

Recommendation : 2
CREASED-EFFECT SHIRT $41.86 , Gender: men
Relaxed fit shirt made of cotton fabric. Stand collar. Long sleeves. Button-up front.
https://www.zara.com/in/en/creased-effect-shirt-p06103480.html
Similarity score:  0.87546265
https://static.zara.net/photos///2023/V/0/2/p/6103/480/800/2/w/448/6103480800_1_1_1.jpg?ts=1679654173740

Recommendation : 3
TEXTURED SHIRT $46.06 , Gender: men
Relaxed fit shirt with a camp collar and short sleeves. Ribbed trim at the hem and a button-up fron

In [None]:
#to find python version
!python --version

Python 3.10.12


In [None]:
#to list all python packages installed with version
#!pip list