In [1]:
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())

True

In [40]:
import os

# Set environment variables
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_PROJECT'] = 'cortex'

# Get keys from the environment
langchain_api_key = os.getenv("LANGCHAIN_API_KEY")
groq_api_key = os.getenv("GROQ_API_KEY")

if langchain_api_key:
    os.environ['LANGCHAIN_API_KEY'] = langchain_api_key
else:
    raise ValueError("LANGCHAIN_API_KEY is not set in the environment.")

if groq_api_key:
    os.environ['GROQ_API_KEY'] = groq_api_key
else:
    raise ValueError("GROQ_API_KEY is not set in the environment.")

Retriever

In [20]:
import os
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from sentence_transformers import SentenceTransformer

# Custom embedding class to wrap the SentenceTransformer model
class SentenceTransformerEmbedding:
    def __init__(self, model_name, device='cpu'):
        self.model = SentenceTransformer(model_name, device=device)
    
    def embed_documents(self, texts):
        return [self.model.encode(text) for text in texts]
    
    def embed_query(self, query):
        # Ensure this returns a 2D array (even if only one query)
        return [self.model.encode(query)]

# List of URLs to load documents from
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

# Load documents from the URLs
docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

# Split documents into smaller chunks for easier processing
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=250, chunk_overlap=0)
doc_splits = text_splitter.split_documents(docs_list)

# Initialize custom embedding class
model_name = "BAAI/bge-small-en"
embedding_function = SentenceTransformerEmbedding(model_name=model_name, device="cpu")

# Store the split documents in Chroma vector database using the custom embedding function
vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag-chroma",
    embedding=embedding_function  # Pass the instance of SentenceTransformerEmbedding
)

# Create a retriever for querying the vectorstore
retriever = vectorstore.as_retriever()

# Optional: You can print the retriever to check if it's working correctly
print(retriever)

tags=['Chroma', 'SentenceTransformerEmbedding'] vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x17acbbe80> search_kwargs={}


LLMs

In [30]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
from pydantic import BaseModel, Field
import numpy as np

# Data model
class GradeDocuments(BaseModel):
    """Binary score for relevance check on retrieved documents."""
    binary_score: str = Field(..., description="Documents are relevant to the question, 'yes' or 'no'")

# LLM with function call 
llm = ChatGroq(temperature=0)
structured_llm_grader = llm.with_structured_output(GradeDocuments)

# Prompt 
system = """You are a grader assessing relevance of a retrieved document to a user question. \n 
    It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""

grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
    ]
)

retrieval_grader = grade_prompt | structured_llm_grader


In [41]:
question = "agent memory"
docs = retriever.get_relevant_documents(question)
doc_txt = docs[1].page_content
print(retrieval_grader.invoke({"question": question, "document": doc_txt}))

ValueError: Expected embeddings to be a list of floats or ints, a list of lists, a numpy array, or a list of numpy arrays, got [[array([-5.06070517e-02,  5.97463269e-03,  2.03445759e-02, -2.69595329e-02,
       -2.03757975e-02,  2.80002970e-02,  5.12210988e-02,  2.03084257e-02,
        5.20406887e-02, -4.87228297e-03,  7.47223245e-03, -1.37139037e-02,
        7.51490444e-02,  5.91166541e-02,  1.55815640e-02,  1.40016228e-02,
       -3.54385525e-02,  5.04436828e-02,  4.38578706e-03, -3.48372012e-02,
        2.74588075e-02, -6.84176162e-02, -8.07437953e-03, -1.36269713e-02,
       -6.49731532e-02,  2.87238527e-02, -4.87621687e-02, -2.01405920e-02,
       -3.68303619e-02, -1.67949498e-01,  1.84508990e-02,  1.47046603e-03,
        2.28697825e-02,  1.14864006e-03,  9.20567941e-03, -2.64571495e-02,
       -2.37961505e-02,  1.43749304e-02, -4.25855666e-02,  8.24343506e-03,
        4.37707528e-02,  2.43634116e-02, -3.13222818e-02, -2.15302575e-02,
       -1.84855163e-02, -4.35628854e-02,  5.97639708e-03, -2.16741692e-02,
        2.68250927e-02, -4.97780070e-02,  9.29589616e-04, -2.75718654e-03,
        2.21071523e-02,  1.53738735e-02,  2.61903834e-02,  5.69965206e-02,
        3.97896320e-02,  8.79201591e-02,  2.73467321e-02, -9.14174505e-03,
        4.57712673e-02,  5.39637096e-02, -1.41799212e-01,  6.55034482e-02,
        4.37518805e-02,  2.74643786e-02, -2.97501031e-02, -3.10534853e-02,
       -9.78837535e-03,  2.36403402e-02, -2.27469802e-02,  2.16385573e-02,
        8.53271224e-03,  8.50541517e-03,  3.25006433e-02,  1.93956811e-02,
       -2.53703594e-02, -1.84268393e-02,  2.01860983e-02, -2.86276732e-02,
       -2.41019912e-02, -5.49252564e-03, -2.10723244e-02, -8.14018678e-03,
       -1.23851653e-02, -6.49305210e-02, -1.75483583e-03, -1.76455900e-02,
        3.33020464e-02, -2.39961818e-02, -1.61984563e-02,  3.31948586e-02,
        8.12890416e-04,  2.15870352e-03, -2.65990030e-02,  5.73266670e-03,
        3.24266069e-02,  3.35806832e-02, -3.53800729e-02,  5.02837360e-01,
       -1.84634738e-02,  9.17410478e-03,  3.43778804e-02, -2.94342451e-02,
        1.39944181e-02, -4.86274995e-02, -4.70952038e-03, -5.13015427e-02,
       -5.97946681e-02,  2.57694069e-02, -1.84467342e-02, -1.44617436e-02,
        5.10029159e-02,  6.25290733e-04,  5.09900413e-02,  2.46231779e-02,
        2.19300967e-02,  1.36708599e-02,  3.09565049e-02, -3.57943438e-02,
       -3.16529647e-02,  1.34994369e-02,  2.16652825e-02,  1.10441900e-03,
        8.36670981e-04, -5.55954911e-02, -4.29707859e-03,  1.27219304e-01,
       -1.77437242e-03,  3.41928601e-02,  1.94236543e-02,  1.42267905e-02,
       -3.61597608e-03,  6.76565524e-03,  2.95063127e-02, -3.67683009e-04,
        8.36492050e-03, -2.49140672e-02,  1.05797201e-02, -3.84328216e-02,
       -3.93848233e-02, -1.53888771e-02, -4.80911898e-04, -4.95640598e-02,
       -5.47943003e-02,  1.19785368e-01,  2.94943675e-02,  1.49188554e-02,
       -4.43637334e-02, -5.65678114e-04, -2.34345980e-02,  1.20799495e-02,
        2.75444034e-02, -3.14469635e-02,  4.02604369e-03,  9.71968099e-03,
        1.80770569e-02,  3.01018404e-03, -6.95357025e-02,  3.09022497e-02,
       -3.28014456e-02, -1.17571540e-02, -2.07450781e-02,  6.96032196e-02,
       -2.73452923e-02, -3.61572467e-02, -2.30542887e-02, -2.80066300e-02,
        3.31181400e-02, -7.83240981e-03,  1.62130948e-02,  1.79317445e-02,
       -2.74823271e-02, -2.96494598e-03,  1.30399978e-02,  1.21108582e-02,
       -3.82872559e-02, -6.57101208e-03, -7.01458630e-05, -2.61560846e-02,
        5.61462110e-03, -2.43807882e-02, -2.78421547e-02,  3.95850316e-02,
        1.53425299e-02, -3.73874120e-02, -1.51089085e-02, -1.65969394e-02,
        6.98615750e-03,  1.59019753e-02, -1.81240886e-02,  2.89189015e-02,
        1.65251102e-02, -8.12974572e-03, -4.08844985e-02, -1.98714007e-02,
       -3.85452947e-03, -1.00383488e-02, -4.34385464e-02, -2.77600158e-02,
        3.08463331e-02,  1.07367784e-02, -6.09184802e-02,  1.11348527e-02,
        2.53551416e-02,  1.19891090e-04, -2.39971839e-02, -1.21778427e-02,
        4.19401564e-02,  1.30643947e-02, -2.11110301e-02, -1.76947899e-02,
        1.68610066e-02,  1.57546822e-03, -6.71790615e-02, -3.43238772e-03,
       -6.64287480e-03,  2.17840839e-02,  1.85414217e-02, -8.79211351e-03,
        3.06329373e-02,  2.61663459e-04, -6.64864015e-03, -2.57574409e-01,
       -8.33812170e-03, -2.83031967e-02, -5.04065230e-02,  2.29076743e-02,
       -3.39929126e-02,  5.87143935e-02, -2.04354841e-02,  5.15595032e-03,
        4.05228399e-02,  3.82520780e-02, -6.81813657e-02, -2.93883961e-02,
        2.72621494e-03, -3.06014810e-03,  5.60409483e-03,  2.21736524e-02,
        2.69140862e-02, -3.06594968e-02,  4.61364277e-02,  1.11773731e-02,
       -2.41410211e-02,  1.98951233e-02, -6.34861216e-02,  6.37077615e-02,
       -4.26355749e-03,  2.54786074e-01,  1.97292510e-02, -2.13978048e-02,
        2.88166981e-02, -8.63612071e-03,  2.14625169e-02, -3.60639691e-02,
       -9.83528271e-02,  2.78047659e-02,  1.02095902e-02,  2.09225994e-02,
       -9.73082334e-03,  2.35221051e-02, -5.19373342e-02, -2.92548705e-02,
        1.26222065e-02, -8.89744982e-03, -7.42038041e-02, -4.12503779e-02,
       -4.39423285e-02, -5.03256992e-02, -1.43280416e-03, -2.35120542e-02,
       -9.40674730e-03,  3.26487012e-02, -1.61242895e-02,  1.63563117e-02,
        2.05794033e-02, -1.95864551e-02, -5.16826771e-02, -4.21754345e-02,
       -1.19759226e-02, -3.34176309e-02,  5.27794920e-02,  1.33662885e-02,
       -2.62658913e-02, -2.50422605e-03, -3.21321711e-02,  7.71857351e-02,
       -1.43390093e-02,  1.47105753e-02, -7.38090724e-02,  1.19574983e-02,
       -2.55365726e-02, -2.44117267e-02,  6.09615371e-02, -1.64830666e-02,
       -1.82305705e-02,  4.73785065e-02,  1.50061992e-03,  1.68396402e-02,
       -2.52318047e-02, -2.52858419e-02,  7.65216071e-03,  4.84909639e-02,
       -1.49675766e-02,  1.47299860e-02, -9.42961546e-04,  4.86176647e-02,
       -1.35445064e-02,  3.34642082e-02, -1.95900146e-02,  6.44785762e-02,
        1.34265665e-02,  3.59408222e-02,  5.40988855e-02, -5.03674001e-02,
       -2.02594884e-02,  2.32609198e-03, -2.76508462e-02, -3.05763692e-01,
        4.33131456e-02, -1.40268914e-02,  5.73758967e-02, -1.90570951e-03,
        1.65583491e-02, -2.26147589e-03,  2.14686971e-02, -6.41008392e-02,
        1.42551372e-02, -1.18634300e-02,  5.57387099e-02,  1.35175325e-02,
        4.72619869e-02, -1.12300999e-02,  3.51900272e-02,  9.35478434e-02,
       -2.82668453e-02,  4.12231386e-02, -5.65125123e-02, -3.77950608e-03,
        1.01956818e-02,  2.51749575e-01, -1.86080914e-02,  5.23024425e-02,
        3.31834964e-02,  2.10109968e-02, -1.41304228e-02,  1.58193859e-03,
       -3.20743918e-02,  1.21844448e-02, -1.99431647e-03,  5.30952699e-02,
       -3.33657451e-02,  3.80866602e-02,  4.33834381e-02, -1.65163893e-02,
        2.36306456e-03,  1.84816252e-02, -2.78157328e-04, -1.77706257e-02,
        5.33394981e-03, -2.57758163e-02,  5.68716903e-04,  8.65156129e-02,
        3.78861651e-02, -1.37932161e-02, -5.29564656e-02, -4.11841832e-02,
        1.92392711e-03, -1.21165421e-02, -2.89163683e-02, -1.03095118e-02,
       -1.25527121e-02,  4.04560454e-02,  6.20743609e-04, -2.26488896e-02,
       -3.59288789e-02, -3.57535854e-02,  1.40415728e-02, -2.14719977e-02,
       -3.85697708e-02,  4.19987477e-02, -6.65607816e-03, -7.41343200e-03],
      dtype=float32)]] in query.