In [9]:
import os 
from dotenv import load_dotenv
load_dotenv()
import bs4

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

# Langsmith Tracking
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")
os.environ["LANGCHAIN_TRACING_V2"] = "true"

### Data Loader

In [10]:
# Data Ingestion - scrape the data from https://docs.smith.langchain.com/administration/tutorials/manage_spend
from langchain_community.document_loaders import WebBaseLoader

In [11]:
loader = WebBaseLoader("https://docs.smith.langchain.com/administration/tutorials/manage_spend")
loader

<langchain_community.document_loaders.web_base.WebBaseLoader at 0x20894134a30>

In [12]:
docs = loader.load()
docs

[Document(metadata={'source': 'https://docs.smith.langchain.com/administration/tutorials/manage_spend', 'title': 'Optimize tracing spend on LangSmith | 🦜️🛠️ LangSmith', 'description': 'Before diving into this content, it might be helpful to read the following:', 'language': 'en'}, page_content='\n\n\n\n\nOptimize tracing spend on LangSmith | 🦜️🛠️ LangSmith\n\n\n\n\n\n\nSkip to main contentWe are growing and hiring for multiple roles for LangChain, LangGraph and LangSmith. Join our team!API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppGet StartedObservabilityEvaluationPrompt EngineeringDeployment (LangGraph Platform)AdministrationTutorialsOptimize tracing spend on LangSmithHow-to GuidesSetupConceptual GuideSelf-hostingPricingReferenceCloud architecture and scalabilityAuthz and AuthnAuthentication methodsdata_formatsEvaluationDataset transformationsRegions FAQsdk_referenceChangelogCloud architecture and scalabilityAuthz and AuthnAuthentication methodsdata_formatsEvaluationDataset tra

### Text Splitter

In [14]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
documents = text_splitter.split_documents(docs)
documents

[Document(metadata={'source': 'https://docs.smith.langchain.com/administration/tutorials/manage_spend', 'title': 'Optimize tracing spend on LangSmith | 🦜️🛠️ LangSmith', 'description': 'Before diving into this content, it might be helpful to read the following:', 'language': 'en'}, page_content='Optimize tracing spend on LangSmith | 🦜️🛠️ LangSmith\n\n\n\n\n\n\nSkip to main contentWe are growing and hiring for multiple roles for LangChain, LangGraph and LangSmith. Join our team!API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppGet StartedObservabilityEvaluationPrompt EngineeringDeployment (LangGraph Platform)AdministrationTutorialsOptimize tracing spend on LangSmithHow-to GuidesSetupConceptual GuideSelf-hostingPricingReferenceCloud architecture and scalabilityAuthz and AuthnAuthentication methodsdata_formatsEvaluationDataset transformationsRegions FAQsdk_referenceChangelogCloud architecture and scalabilityAuthz and AuthnAuthentication methodsdata_formatsEvaluationDataset transformatio

### Embeddings

In [15]:
from langchain_openai import OpenAIEmbeddings
embeddings=OpenAIEmbeddings()

### Vector Store DB

In [None]:
from langchain_community.vectorstores import FAISS
vectorstore_db = FAISS.from_documents(documents, embeddings)
vectorstore_db

<langchain_community.vectorstores.faiss.FAISS at 0x208a6b16050>

### Query the DB

In [None]:
query = "LangSmith has two usage limits: total traces and extended"

result = vectorstore_db.similarity_search(query)

In [20]:
result

[Document(id='1b73e2fb-aa63-4c12-9de7-4b621a427521', metadata={'source': 'https://docs.smith.langchain.com/administration/tutorials/manage_spend', 'title': 'Optimize tracing spend on LangSmith | 🦜️🛠️ LangSmith', 'description': 'Before diving into this content, it might be helpful to read the following:', 'language': 'en'}, page_content='Lets start by setting limits on our production usage, since that is where the majority of spend comes from.\nSetting a good total traces limit\u200b\nPicking the right "total traces" limit depends on the expected load of traces that you will send to LangSmith. You should\nclearly think about your assumptions before setting a limit.\nFor example:\n\nCurrent Load: Our gen AI application is called between 1.2-1.5 times per second, and each API request has a trace associated with it,\nmeaning we log around 100,000-130,000 traces per day\nExpected Growth in Load: We expect to double in size in the near future.\n\nFrom these assumptions, we can do a quick bac

In [18]:
print(result[0].page_content)

Lets start by setting limits on our production usage, since that is where the majority of spend comes from.
Setting a good total traces limit​
Picking the right "total traces" limit depends on the expected load of traces that you will send to LangSmith. You should
clearly think about your assumptions before setting a limit.
For example:

Current Load: Our gen AI application is called between 1.2-1.5 times per second, and each API request has a trace associated with it,
meaning we log around 100,000-130,000 traces per day
Expected Growth in Load: We expect to double in size in the near future.

From these assumptions, we can do a quick back-of-the-envelope calculation to get a good limit of:
limit = current_load_per_day * expected_growth * days/month      = 130,000 * 2 * 30      = 7,800,000 traces / month
We click on the edit icon on the right side of the table for our Prod row, and can enter this limit as follows:


In [19]:
r = vectorstore_db.similarity_search("LangSmith has two usage limits. Which are they?")
print(r[0].page_content)

your use case with LangSmith. For example, if you run evals as part of CI/CD in dev or staging, you may
want to be more liberal with your usage limits to avoid test failures.
Now that our limits are set, we can see that LangSmith shows a maximum spend estimate across all workspaces:


### Retrieval Chain, Document Chain

In [26]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini")

prompt = ChatPromptTemplate.from_template(
    """Answer the given question based only on the provided context: {input}
    <context> 
    {context}
    </context>
    """
)

document_chain = create_stuff_documents_chain(llm, prompt)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template='Answer the given question based only on the provided context: {input}\n    <context> \n    {context}\n    </context>\n    '), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x00000208E14F5D20>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x00000208E14F43A0>, root_client=<openai.OpenAI object at 0x00000208E14F68C0>, root_async_client=<openai.AsyncOpenAI object at 0x00000208E14F7A60>, model_name='gpt-4o-mini', model_kwargs={}, openai_api_key=SecretStr('**********'))

In [None]:
# without retrieval chain
from langchain_core.documents import Document

document_chain.invoke({
    "input": "LangSmith has two usage limits. Which are they?",
    "context": [Document(page_content="'LangSmith has two usage limits: total traces and extended retention traces.'")]
})

'LangSmith has two usage limits: total traces and extended retention traces.'

In [None]:
# setup retrieval chain
retriever = vectorstore_db.as_retriever()

from langchain.chains import create_retrieval_chain
retrieval_chain = create_retrieval_chain(retriever, document_chain)
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000208A6B16050>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template='Answer the given question based only on the provided context: {input}\n    <context> \n    {context}\n    </context>\n    '), addit

In [None]:
# get response using retrieval chain
response = retrieval_chain.invoke({
    "input": "If we are not a big enterprise, how much would be the per month bill?"
})
response['answer']

'If we are not a big enterprise, the estimated per month bill would be around ~$7.5k.'

In [34]:
response

{'input': 'If we are not a big enterprise, how much would be the per month bill?',
 'context': [Document(id='d4eae317-7575-439a-ad10-5c44ace65ac5', metadata={'source': 'https://docs.smith.langchain.com/administration/tutorials/manage_spend', 'title': 'Optimize tracing spend on LangSmith | 🦜️🛠️ LangSmith', 'description': 'Before diving into this content, it might be helpful to read the following:', 'language': 'en'}, page_content="This translates to the invoice, where we've only spent about $900 in the last 7 days, as opposed to $2,000 in the previous 4.\nThat's a cost reduction of nearly 75% per day!\n\nOptimization 2: limit usage\u200b\nIn the previous section, we managed data retention settings to optimize existing spend. In this section, we will\nuse usage limits to prevent future overspend.\nLangSmith has two usage limits: total traces and extended retention traces. These correspond to the two metrics we've\nbeen tracking on our usage graph. We can use these in tandem to have granu