In [35]:
import os
import json
from typing import List, Dict, Any
from dotenv import load_dotenv
from pinecone import Pinecone
from openai import OpenAI

# Load environment variables
load_dotenv()
#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
# Initialize clients
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

def get_embedding(text: str) -> List[float]:
    response = client.embeddings.create(
        input=text,
        model="text-embedding-3-small"
    )
    return response.data[0].embedding

def search_products(query: str, index_name: str, top_k: int = 5) -> List[Dict[str, Any]]:
    index = pc.Index(index_name)
    query_embedding = get_embedding(query)
    search_results = index.query(
        vector=query_embedding,
        top_k=top_k,
        include_metadata=True
    )
    matches = []
    for match in search_results["matches"]:
        metadata = match["metadata"]
        nutrients = json.loads(metadata.get("nutrients", "{}"))
        product = {
            "id": match["id"],
            "score": match["score"],
            "name": metadata.get("name", ""),
            "category": metadata.get("category", ""),
            "ingredients": metadata.get("ingredients", ""),
            "nutrients": nutrients,
            "text": metadata.get("text", "")
        }
        matches.append(product)
    return matches

def refine_results_with_llm(query: str, results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    # Create a formatted string of the results
    formatted_results = "\n".join([
        f"Product {i+1}: Name: {r['name']}, Ingredients: {r['ingredients']}, Nutrients: {r['nutrients']}, Score: {r['score']}"
        for i, r in enumerate(results)
    ])
    
    # Create system and user messages for OpenAI API call
    system_message = """
    You are a nutrition expert assistant. The user is searching for products.
    You will receive a query and search results. Please:
    1. Filter out irrelevant products
    2. Rank relevant products
    3. Explain why they match the query
    
    Respond with JSON format only:
    [
        {
            "product_name": "product name here", 
            "match_score": numeric score from 1-10, 
            "explanation": "explanation here", 
            "meets_criteria": true/false
        },
        ...
    ]
    """
    
    user_message = f"""
    Query: "{query}"
    
    Search Results:
    {formatted_results}
    
    Please analyze these results according to the query criteria and provide the formatted JSON response.
    """
    
    # Make a direct call to OpenAI's chat completion API
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": system_message},
            {"role": "user", "content": user_message}
        ],
        temperature=0,
        response_format={"type": "json_object"}
    )
    
    # Extract and parse the response
    try:
        result_text = response.choices[0].message.content
        results_json = json.loads(result_text)
        # Handle potential variations in response structure
        if "results" in results_json:
            return results_json["results"]
        elif isinstance(results_json, list):
            return results_json
        else:
            return [results_json]
    except json.JSONDecodeError:
        return [{"error": "Could not parse results", "raw_response": response.choices[0].message.content}]
    except KeyError:
        return [{"error": "Unexpected response format", "raw_response": response.choices[0].message.content}]

def display_refined_results(refined_results: List[Dict[str, Any]]):
    print("\n===== REFINED SEARCH RESULTS =====\n")
    if not refined_results or "error" in refined_results[0]:
        print("No matching products found or error in processing results.")
        if "error" in refined_results[0]:
            print(f"Error: {refined_results[0]['error']}")
        return
    
    # Handle both array and object formats
    results_to_display = refined_results
    if not isinstance(refined_results, list):
        if "results" in refined_results:
            results_to_display = refined_results["results"]
        else:
            results_to_display = [refined_results]
    
    sorted_results = sorted(results_to_display, key=lambda x: x.get("match_score", 0), reverse=True)
    for i, result in enumerate(sorted_results):
        if result.get("meets_criteria", False):
            print(f"{i+1}. {result.get('product_name')} - Match Score: {result.get('match_score')}/10")
            print(f"   Explanation: {result.get('explanation')}\n")

def main():
    INDEX_NAME = "langchain"
    print("\n===== PRODUCT SEARCH DEMO =====\n")
    query = input("Enter your search query (e.g., 'low sugar protein bar with at least 15g protein'): ")
    print("\nSearching for products...")
    search_results = search_products(query, INDEX_NAME)
    if not search_results:
        print("No products found matching your query.")
        return
    print(f"Found {len(search_results)} products. Refining with LLM...")
    refined_results = refine_results_with_llm(query, search_results)
    display_refined_results(refined_results)

if __name__ == "__main__":
    main()


===== PRODUCT SEARCH DEMO =====



Enter your search query (e.g., 'low sugar protein bar with at least 15g protein'):  low sugar protein bar with at least 15g protein



Searching for products...
Found 5 products. Refining with LLM...

===== REFINED SEARCH RESULTS =====

1. PowerProtein Bar - Match Score: 9/10
   Explanation: This bar contains 20g of protein and only 5g of sugar, meeting both the protein and low sugar criteria.

2. Plant Power Bar - Match Score: 7/10
   Explanation: This bar contains 15g of protein, meeting the minimum protein requirement, but has 8g of sugar, which is slightly higher than ideal for a low sugar product.

