In [None]:
import requests
import html
from bs4 import BeautifulSoup
import re
from langchain_core.documents.base import Document
total_pages = 1854 
import os 
from dotenv import load_dotenv

load_dotenv()

def call_api(page):
    url = f"https://api.artic.edu/api/v1/products?page={page}"
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()['data']
    else:
        return None    
    
def html_to_text(raw_html):
    if raw_html:
        decoded = raw_html.encode('utf-8').decode('unicode_escape')
        soup = BeautifulSoup(decoded, 'html.parser')
        text = soup.get_text(separator=' ')
        clean_text = ' '.join(text.split())
        return clean_text
    else:
        return ''


def generate_information(item):
    product_title = item.get('title', 'Untitled')
    raw_description = item.get('description', '')
    description = html_to_text(raw_description)

    decoded_price_html = html.unescape(item.get('price_display', ''))
    match = re.search(r'\$(\d+(?:\.\d{1,2})?)', decoded_price_html)
    if match:
        price = float(match.group(1))
    else:
        price = 20.0  

    image_url = item.get('image_url', '')

    summary = (
        f"{product_title} is a beautifully crafted poster derived from the artwork featured in the exhibition. "
        f"It has the description as: {description}. "
        f"The piece reflects the powerful legacy of Frida Kahlo and Mary Reynolds as showcased in the Art Institute of Chicago. "
        f"Priced at ${price:.2f}, it makes a unique addition to any art lover’s collection."
    )

    return {
        'product_title': product_title,
        'price': price,
        'image': image_url,
        'summary': summary
    }
documents = []
from tqdm.auto import tqdm 
for page in tqdm(range(1, total_pages + 1)):
    data = call_api(page)
    if data:
        for item in data:
            information = generate_information(item)
            document = Document(page_content=information['summary'], metadata={
                'product_title': information['product_title'],
                'price': information['price'],
                'image': information['image'],
            })
            documents.append(document)  
    else:
        print(f"Failed to retrieve data for page {page}")
        break
    
len(documents)

  0%|          | 0/1854 [00:00<?, ?it/s]

  decoded = raw_html.encode('utf-8').decode('unicode_escape')


Failed to retrieve data for page 156


1854

In [42]:
from langchain_groq import ChatGroq
from langchain_community.embeddings import OllamaEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain.chains import create_retrieval_chain, create_history_aware_retriever
from langchain_chroma import Chroma
def create_vector_embeddings():
    all_docs = documents
    
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
    
    final_docs = text_splitter.split_documents(all_docs)
    
    embedding_model = OllamaEmbeddings(model="mxbai-embed-large")
    
    print("Loaded Embedding Model")
    
    embeddings = Chroma.from_documents(final_docs, 
                                       embedding_model, 
                                       collection_name="art_collection",
                                       persist_directory="./art_products")
    print("Loaded Chroma")
    return embeddings

In [None]:
llm = ChatGroq(groq_api_key=os.getenv("GROQ_API_KEY"), model_name="meta-llama/llama-4-scout-17b-16e-instruct")

prompt = ChatPromptTemplate.from_template(
    """
    Answer the questions based on the provided context only.
    Please provide the most accurate respone based on the question
    <context>
    {context}
    <context>
    Question:{input}

    """
)

embeddings = create_vector_embeddings()

retriever = embeddings.as_retriever()

document_chain = create_stuff_documents_chain(llm, prompt)

contextualize_q_system_prompt=(
            "Given a chat history and the latest user question"
            "which might reference context in the chat history, "
            "formulate a standalone question which can be understood "
            "without the chat history. Do NOT answer the question, "
            "just reformulate it if needed and otherwise return it as is."
        )

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}")
    ]
)

history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualize_q_prompt)

system_prompt = (
                "You are an assistant for question-answering tasks. "
                "Use the following pieces of retrieved context to answer "
                "the question. If you don't know the answer, say that you "
                "don't know. Use three sentences maximum and keep the "
                "answer concise."
                "\n\n"
                "{context}"
            )
qa_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            MessagesPlaceholder("chat_history"),
            ("human", "{input}"),
        ]
    )

question_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_chain)

store = {}

def get_session_history(session_id:str)->BaseChatMessageHistory:
    if session_id not in store:
        store[session_id]=ChatMessageHistory()
    return store[session_id]
    
conversational_rag_chain=RunnableWithMessageHistory(
    rag_chain,get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer"
)

# if __name__ == "__main__":
#     while True:
input_text = "Hey, i want some product having price less than 40.0 and they can be used for decoration purpose. Can you help me with that?"

session_history = get_session_history("default")
answer = conversational_rag_chain.invoke({
    "input": input_text
}, config={
    "configurable": {"session_id": "default"}
})
print("AI Response: ", answer["answer"])
print("History: ", session_history.messages)

Loaded Embedding Model
Loaded Chroma
AI Response:  The Festive Tree Flat Decorative Candle is priced at $20.00, which is less than $40.00, and can be used for decoration. It's a handmade flat candle made in Lithuania with high-quality materials. It makes a unique addition to any art lover's collection.
History:  [HumanMessage(content='Hey, i want some product having price less than 40.0 and they can be used for decoration purpose. Can you help me with that?', additional_kwargs={}, response_metadata={}), AIMessage(content="The Festive Tree Flat Decorative Candle is priced at $20.00, which is less than $40.00, and can be used for decoration. It's a handmade flat candle made in Lithuania with high-quality materials. It makes a unique addition to any art lover's collection.", additional_kwargs={}, response_metadata={})]


In [48]:
input_text = "Describe the product in detail and provide the link to the product. I want to know about its price as well."

session_history = get_session_history("default")
answer = conversational_rag_chain.invoke({
    "input": input_text
}, config={
    "configurable": {"session_id": "default"}
})
print("AI Response: ", answer["answer"])


AI Response:  I can describe the product to you, but I'm a text-based AI and do not have the capability to provide direct links to products.

The Festive Tree Flat Decorative Candle is a handmade flat candle made in Lithuania using high-quality German materials. It is dripless, smokeless, and self-extinguishing, and comes with an easy-to-assemble steel base. The dimensions of the candle are 2.4 x 0.4 x 5.9 inches and it is priced at $20.00.

To find the product, I suggest searching online for "Festive Tree Flat Decorative Candle" or checking an online marketplace or store that sells it, such as the Art Institute of Chicago's website or an art store.
