In [7]:
from pymongo import MongoClient
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone, ServerlessSpec
from uuid import uuid4
from dotenv import load_dotenv
from langchain.schema import Document 
from langchain_huggingface import HuggingFaceEndpoint
import os
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser


### Setting Up API Keys


In [3]:


# env_content = f"""
# HUGGINGFACE_API_KEY={HUGGINGFACE_API_KEY}
# PINECONE_API_KEY={PINECONE_API_KEY}
# """

# with open(".env", "w") as file:
#     file.write(env_content)

# print("Environment variables are saved to .env file.")

Environment variables are saved to .env file.


### Loading the Environment File

In [8]:
import dotenv

dotenv.load_dotenv()

MONGODB_URI = os.getenv("MONGODB_URI")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
PINECONE_API_KEY



'pcsk_iAgUU_FXUSfemuBAKgQTBG1eKLxZyoxA9RfUMgdpQJNkF8H1dYSaQtRbRAauDzviDsQ8w'


### MongoDB and Pinecone Setup for Embedding Storage and Querying ###
Connecting to MongoDB and Pinecone, we check if a specified Pinecone index exists, and create it if not. We then initialize an embedding model using Hugging Face and set up a vector store with Pinecone for storing and querying product embeddings.

In [10]:
# Load environment variables from .env file
load_dotenv()

# MongoDB connection setup
MONGODB_URI = os.getenv("MONGODB_URI")

# Connect to MongoDB
try:
    client = MongoClient(MONGODB_URI)
    # Ping the database to check if the connection is successful
    client.admin.command('ping')
    print("Connected to MongoDB successfully!")
except Exception as e:
    print("Failed to connect to MongoDB:", e)
    exit(1)

# Select the database and collection
db = client["test"]
collection = db["productdata"]



# Pinecone connection
pc = Pinecone("pcsk_iAgUU_FXUSfemuBAKgQTBG1eKLxZyoxA9RfUMgdpQJNkF8H1dYSaQtRbRAauDzviDsQ8w")

# Define Pinecone index name
index_name = "llms-project"

# Check if index exists, else create it
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=768,
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        )
    )

# Initialize embedding model and vector store
embeddings = HuggingFaceEmbeddings()  # This should default to a model like "sentence-transformers/all-MiniLM-L6-v2"
index = pc.Index(index_name)
vector_store = PineconeVectorStore(index=index, embedding=embeddings)

Connected to MongoDB successfully!


### Adding Product Embeddings to Pinecone and Updating MongoDB ###
This code iterates through products in the MongoDB collection, checks if embeddings have already been created for each product, and processes only those without embeddings. It generates embeddings using a detailed product description, adds them to the Pinecone vector store with a unique ID, and updates MongoDB to mark the product as processed.

In [12]:
products = collection.find()

for product in products:

    if product:
        # Check if the "Embeddings Created" flag exists and is set to True
        detailed_description = product.get("detailed_description", "")
        
        if not detailed_description:
            print("Product does not have a detailed description.")
        else:
            # Create a Document with the necessary fields
            document = Document(
                page_content=detailed_description,
                metadata={
                    "mongo_id": str(product["_id"]),
                    "description": detailed_description
                }
            )

            # Generate unique ID for Pinecone
            unique_id = str(uuid4())

            # Add the document to Pinecone
            vector_store.add_documents(documents=[document], ids=[unique_id])

            # Update MongoDB to mark embeddings as created
            collection.update_one(
                {"_id": product["_id"]},
                {"$set": {"Embeddings Created": True}}
            )

            print(f"Product with MongoDB ID {product['_id']} added to Pinecone with embedding.")
    else:
         print("No product found in the database.")

Product with MongoDB ID 676a38ac63fe6ff887c1d633 added to Pinecone with embedding.
Product with MongoDB ID 676a38ac63fe6ff887c1d634 added to Pinecone with embedding.
Product with MongoDB ID 676a38ac63fe6ff887c1d635 added to Pinecone with embedding.
Product with MongoDB ID 676a38ac63fe6ff887c1d636 added to Pinecone with embedding.
Product with MongoDB ID 676a38ac63fe6ff887c1d637 added to Pinecone with embedding.
Product with MongoDB ID 676a38ac63fe6ff887c1d638 added to Pinecone with embedding.
Product with MongoDB ID 676a38ac63fe6ff887c1d639 added to Pinecone with embedding.
Product with MongoDB ID 676a38ac63fe6ff887c1d63a added to Pinecone with embedding.
Product with MongoDB ID 676a38ac63fe6ff887c1d63b added to Pinecone with embedding.
Product with MongoDB ID 676a38ac63fe6ff887c1d63c added to Pinecone with embedding.
Product with MongoDB ID 676a38ac63fe6ff887c1d63d added to Pinecone with embedding.
Product with MongoDB ID 676a38ac63fe6ff887c1d63e added to Pinecone with embedding.
Prod

### Setting up the HuggingFace Model
To provide the LLM, we'll use HuggingFaceHub. HuggingFaceHub is a platform we can connect to and call the model without having to deploy it on our machine. We just define the ID of the model we want to use. In this case, it’s `mistralai/Mixtral-8x7B-Instruct-v0.1`.

In [13]:
# os.environ["HUGGINGFACE_API_KEY"] = "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"

# repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
# llm = HuggingFaceEndpoint(
#     repo_id=repo_id,
#     temperature=0.8,
#     top_k=50,
#     huggingfacehub_api_token=os.getenv('HUGGINGFACE_API_KEY')
# )

# print(llm("what is capitol of pakistan"))

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to C:\Users\Zainab\.cache\huggingface\token
Login successful


  print(llm("what is capitol of pakistan"))


?

Islamabad is the capital of Pakistan.

Islamabad is the capital of Pakistan. It is a city in the north of the country, between the Margalla Hills and the Rawalpindi District. Islamabad was officially made the capital in 1960, and it is the 10th largest city in Pakistan. It is known for its greenery and beautiful scenery. The city has a population of about 1.5 million people. The official language of Pakistan is Urdu, but many people also speak English. Pakistan is a predominantly Muslim country, and Islam is the official religion. The currency of Pakistan is the Pakistani rupee.


In [8]:
# retriever = vector_store.as_retriever(
#     search_type="similarity_score_threshold",
#     search_kwargs={"k": 3, "score_threshold": 0.6},
# )

Ask the LLm about the recommendation about the user query.

In [9]:
# from langchain import PromptTemplate

# general_template = """
# <Prompt>
#   <Context>
#     You are a virtual shopping assistant for an Ecommerce website which offers different clothing products (like sweatshirts, coats, pants and sneakers, boots etc). Your job is to give stylish personalized recommendation to the user.
#     Focus on keeping a friendly tone and incorporating any item or preference specified by the user in his query. 
#   </Context>
#   <Instructions>
#     - <Focus>Respond only to fashion-related queries. If the user asks a non-fashion question, politely let them know you specialize in fashion and encourage them to ask styling questions.</Focus>
#     - <Tone>Keep the response friendly, helpful, and engaging. Structure your suggestions in a conversational style to make the user feel at ease.</Tone>
#     - <Details>Explain why your suggestions work, considering factors like color coordination, occasion suitability, and style preferences. Ensure the recommendations are practical and fashionable.</Details>
#     - <Examples>Provide clear and concise answers, showcasing your expertise in fashion.</Examples>
#   </Instructions>
#   <Query>
#     <UserQuestion>{question}</UserQuestion>
#   </Query>
#   <Answer>
#     Please provide a concise and to-the-point response.
#   </Answer>
# </Prompt>
# """


# general_prompt = PromptTemplate(
#     template=general_template,
#     input_variables=["question"]
# )

# retriever_template = """

#   You are a chatbot designed to help users with clothing recommendations on an e-commerce website. The website offers tops (e.g., shirts, sweatshirts, dresses), bottoms (e.g., sweatpants, jeans, joggers), and shoes (e.g., boots, sneakers).

#   Answer only clothing-related questions by suggesting how to style specific items (e.g., shirts, pants, dresses, shoes).
#   If the question is general query (not about clothing), no need to give any descriptions or recommendaitons.

#   Examples:

#     - Question: How are you?
#       Answer: I am good, how are you?
      
#     - Question: What to wear with a knee-length slip dress?
#       Answer:
#       Description: Layer a black cropped zipper hoodie over a knee-length slip dress for a chic yet laid-back look. The cropped style balances the dress's silhouette, while the zipper adds a modern edge. Perfect for casual outings or evening strolls.
#       Description: Pair a knee-length slip dress with textured black trousers underneath for a unique layered style. This combination adds structure to the flowy dress, making it suitable for cooler weather or a more formal setting.

#     - Question: How to style a black sweatshirt?
#       Answer:
#       Description: Pair a black high crew tee under a black sweatshirt for a layered casual look. The high crew neckline adds a stylish dimension and makes the outfit cozy for everyday wear.
#       Description: Combine the black sweatshirt with textured black trousers for a refined casual outfit. The trousers’ versatility complements the relaxed vibe of the sweatshirt, making it suitable for casual office wear or a lunch outing.

#     - Question: What shoes go with white jeans?
#       Answer:
#       Description: Pair white jeans with textured black trousers for a bold, layered style. The black trousers peeking through offer a striking contrast and elevate the outfit’s sophistication.
#       Description: Match white jeans with black leather loafers to create a clean and polished look. The loafers' sleek design adds elegance, making the outfit suitable for semi-formal gatherings.

#     - Question: How can I style a red sweater with jeans?
#       Answer:
#       Description: Style a red sweater with textured black trousers instead of jeans for a striking yet polished outfit. The black trousers enhance the boldness of the red while adding a touch of sophistication.
#       Description: Pair the red sweater with a black cropped zipper hoodie layered over it for a trendy and sporty vibe. The cropped fit works well with high-waisted jeans or trousers, creating a balanced silhouette.
     
  
#     When providing recommendations, consider the following:
#   - Color Coordination: Suggest colors that complement or contrast in a flattering way, considering vibrant, neutral, or formal tones.
#   - Fabric Compatibility: Recommend fabric pairings (e.g., cotton with denim, silk blouse with tailored trousers).
#   - Style Matching: Ensure the look matches the intended vibe, whether casual, formal, or sporty.

#   Tailor your response to the needs of the user and take into account if user specifies something in its query (for exmaple, user specifies recommendations for one type of profuct). Respond with a detailed, stylish recommendation alongwith detailed description for the items that are being suggested, ensuring the items match well in terms of color, fabric, of the item inthe query. The output format should be as follows:
  
#   Give 4 suggestions and DO NOT add additional questions or information or examples outside of the recommendation for user query.

#   Question: {question}

#   Answer:

#   """

# retriever_prompt = PromptTemplate(
#     template=retriever_template,
#     input_variables=["question"]
#   )



# response_template = """
#   You are a virtual shopping assistant on an e-commerce platform, specializing in clothing recommendations. The platform offers a wide variety of items including tops (e.g., shirts, sweatshirts, dresses), bottoms (e.g., jeans, joggers, skirts), and shoes (e.g., boots, sneakers, flats).
#   You have been provided with detailed descriptions of relevant products based on the query. Style the product mentioned in the query with the items from these descriptions and try to come up with at least two complete outfits to complete the user’s desired look. For each suggestion:

# - Clearly reference the retrieved product(s) in your response.
# - Show how they complement the product mentioned in the query in terms of color, fabric, and overall vibe.
# - Ensure your response connects back to the query’s intent.
# - Make sure the outfits you come up with are complementary and matches well with eachother
# - Use phrases like "You can pair the [user's item] with our [product]" to personalize the recommendations.
# - If the query does not specify an item, recommend versatile products with clear justifications.
# Personalize your suggestions and structure the response to be friendly, helpful, and engaging. Ensure the suggestions are practical and stylish while explaining why the products match well with the user’s requirements.

# Invite the user to refine their preferences if needed, but *do not add unrelated details or unrelated examples.* DO NOT add more examples or questions to your response. DO not repeat your response, keep it concise and brief it well. 

#     <UserQuestion>{question}</UserQuestion>
#     <Descriptions>{descriptions}</Descriptions>
#     Answer:
    
# """


# response_prompt = PromptTemplate(
#   template=response_template,
#   input_variables=["question", "descriptions"]
# )

In [10]:
# from typing import List

# def processing(documents):
#     result_dict = {doc["id"]: doc["metadata"]["description"] for doc in documents}
#     return result_dict

# def extract_descriptions(documents: List[Document]) -> str:
#     """
#     Extract and concatenate detailed product descriptions from a list of Document objects.
    
#     Args:
#         documents (List[Document]): List of Document objects from the retriever.

#     Returns:
#         str: A single string containing all concatenated product descriptions.
#     """
#     ids = [doc.id for doc in documents]
#     descriptions = [doc.page_content for doc in documents]
#     # print(descriptions)
#     return "\n\n".join(descriptions)

In [12]:
# # # Chain 1: Generate initial recommendations (general prompt)
# # # Chain 1: Generate initial recommendations (general prompt)
# # general_chain = (
# #     RunnablePassthrough()                # Pass the user query
# #     | general_prompt                     # Generate initial recommendations
# #     | llm  
# #     | StrOutputParser()                              # Use LLM to produce suggestions
# # )

# # # Chain 2: Enhance with database products (retriever prompt)
# # retriever_chain = (
# #     {
# #         "question": RunnablePassthrough(),          # Pass the user query directly
# #         "recommendations": general_chain            # Take output from general_chain
# #     }
# #     | retriever_prompt                               # Combine recommendations with retrieval
# #     | llm  
# #     | retriever                                         # Use LLM for detailed descriptions
# #     # | extract_descriptions                          # Extract product descriptions
# # )

# # # Chain 3: Final response generation
# # response_chain = (
# #     {
# #         "question": RunnablePassthrough(),          # Pass the user query
# #         "descriptions": retriever_chain             # Take output from retriever_chain
# #     }
# #     | response_prompt                                # Generate final response
# #     | llm                                           # Use LLM for the final output
# #     | StrOutputParser()                             # Parse the output as a string
# # )

# retriever_chain = (
#     {"question": RunnablePassthrough()}
#     | retriever_prompt                               # Combine recommendations with retrieval
#     | llm  
#     | retriever                                         # Use LLM for detailed descriptions
#     # | extract_descriptions 
#     # | StrOutputParser()                        
# )
# response_chain = (
#     {
#         "question": RunnablePassthrough(),          # Pass the user query
#         "descriptions": retriever_chain             # Take output from retriever_chain
#     }
#     | response_prompt                                # Generate final response
#     | llm                                           # Use LLM for the final output
#     | StrOutputParser()                             # Parse the output as a string
# )


In [14]:

# question = "suggest black clothes?"
# # general = general_chain.invoke({"question": question})
# # print(general)
# retriever = retriever_chain.invoke({"question": question})
# print(retriever)
# # response = response_chain.invoke({"question": question, "descriptions" : retriever})
# # print(response)
# # result = rag_chain2.invoke({"question": question, "descriptions": descriptions})
# # print(result)

[Document(id='7dae5e8b-d6c8-4795-8ec7-48c8c39bfa7d', metadata={'description': 'A black high crew tee for women, ideal for casual wear and versatile styling. It pairs well with jeans, shorts, or skirts for a relaxed look, making it suitable for everyday outings, weekend brunches, or lounging at home. Layer it under a jacket or cardigan for cooler days. Available in sizes SMALL, MEDIUM, LARGE, and X-LARGE, and also offered in IVORY.', 'mongo_id': '67364f096b5f370c97b33e11'}, page_content='A black high crew tee for women, ideal for casual wear and versatile styling. It pairs well with jeans, shorts, or skirts for a relaxed look, making it suitable for everyday outings, weekend brunches, or lounging at home. Layer it under a jacket or cardigan for cooler days. Available in sizes SMALL, MEDIUM, LARGE, and X-LARGE, and also offered in IVORY.'), Document(id='7c45db55-9715-4580-b190-6ebf7b2ba747', metadata={'description': 'A classic black BASIC SWEATSHIRT designed for women, perfect for casual

In [None]:
# #retirveing the most revelant objects form the data base.

# results = vector_store.similarity_search(
#     "wide leg jeans",
#     k=7,
# )
# for res in results:
#     print(f"* {res.page_content} [{res.metadata}]")

* 
CROPPED WIDE LEG PANTS, a WHITE bottoms for women, are available in sizes X-SMALL, SMALL, MEDIUM, LARGE, and X-LARGE. Ideal for casual outings and summer events, these WHITE wide-leg pants match well with crop tops, tank tops, or blouses. Pair them with sandals or sneakers for a comfortable, stylish look. Suitable for various occasions, from weekend gatherings to beach vacations. [{'description': '\nCROPPED WIDE LEG PANTS, a WHITE bottoms for women, are available in sizes X-SMALL, SMALL, MEDIUM, LARGE, and X-LARGE. Ideal for casual outings and summer events, these WHITE wide-leg pants match well with crop tops, tank tops, or blouses. Pair them with sandals or sneakers for a comfortable, stylish look. Suitable for various occasions, from weekend gatherings to beach vacations.', 'mongo_id': '67376da7fd76308feadf78ba'}]
*  A pair of wide-leg mid-blue jeans for women, designed for a comfortable fit and flattering silhouette. Ideal for casual outings, these jeans can be worn with a fitte