Skip to content

Commit db5c363

Browse files
authored
feat(cookbook): add /cookbook and export all cookbooks to it (langfuse#376)
1 parent 3058446 commit db5c363

15 files changed

+2297
-24
lines changed

components/MainContentWrapper.tsx

+14-5
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,28 @@ import { ProductUpdateSignup } from "./productUpdateSignup";
1818

1919
const pathsWithoutFooterWidgets = ["/imprint", "/blog"];
2020

21+
const cleanedCookbookRoutes = cookbookRoutes.flatMap(
22+
({ notebook, destinations }) => {
23+
return [
24+
...destinations,
25+
"cookbook/" + notebook.replace(".ipynb", ".md"), // add cookbook path that all notebooks are published to
26+
].map((d) => ({
27+
notebook,
28+
destination: "/" + d,
29+
}));
30+
}
31+
);
32+
2133
export const MainContentWrapper = (props) => {
2234
const router = useRouter();
23-
const notebook = cookbookRoutes.find(
35+
const notebook = cleanedCookbookRoutes.find(
2436
({ destination }) => destination === router.pathname + ".md"
2537
);
2638

2739
return (
2840
<>
2941
{notebook ? (
30-
<NotebookBanner
31-
src={notebook.source.replace(".md", ".ipynb")}
32-
className="mb-4"
33-
/>
42+
<NotebookBanner src={notebook.notebook} className="mb-4" />
3443
) : null}
3544
{props.children}
3645
{!pathsWithoutFooterWidgets.includes(router.pathname) ? (

components/NotebookBanner.tsx

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ export const NotebookBanner: React.FC<{ src: string; className?: string }> = ({
1111
<span>This is a Jupyter notebook</span>
1212
<div className="flex gap-2 flex-wrap">
1313
<a
14-
href={`https://github.com/langfuse/langfuse-docs/blob/main/${src}`}
14+
href={`https://github.com/langfuse/langfuse-docs/blob/main/cookbook/${src}`}
1515
target="_blank"
1616
rel="noopener noreferrer"
1717
>
@@ -20,7 +20,7 @@ export const NotebookBanner: React.FC<{ src: string; className?: string }> = ({
2020
</Button>
2121
</a>
2222
<a
23-
href={`https://colab.research.google.com/github/langfuse/langfuse-docs/blob/main/${src}`}
23+
href={`https://colab.research.google.com/github/langfuse/langfuse-docs/blob/main/cookbook/${src}`}
2424
target="_blank"
2525
rel="noopener noreferrer"
2626
>

cookbook/_routes.json

+16-12
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,30 @@
11
[
22
{
3-
"source": "cookbook/datasets.md",
4-
"destination": "/docs/datasets/python-cookbook.md"
3+
"notebook": "datasets.ipynb",
4+
"destinations": ["docs/datasets/python-cookbook.md"]
55
},
66
{
7-
"source": "cookbook/integration_langchain.md",
8-
"destination": "/docs/integrations/langchain/example-python.md"
7+
"notebook": "integration_langchain.ipynb",
8+
"destinations": ["docs/integrations/langchain/example-python.md"]
99
},
1010
{
11-
"source": "cookbook/integration_openai_sdk.md",
12-
"destination": "/docs/integrations/openai.md"
11+
"notebook": "integration_openai_sdk.ipynb",
12+
"destinations": ["docs/integrations/openai.md"]
1313
},
1414
{
15-
"source": "cookbook/evaluation_with_langchain.md",
16-
"destination": "/docs/scores/model-based-evals/langchain.md"
15+
"notebook": "evaluation_with_langchain.ipynb",
16+
"destinations": ["docs/scores/model-based-evals/langchain.md"]
1717
},
1818
{
19-
"source": "cookbook/python_sdk.md",
20-
"destination": "/docs/sdk/python.md"
19+
"notebook": "integration_azure_openai_langchain.ipynb",
20+
"destinations": []
2121
},
2222
{
23-
"source": "cookbook/evaluation_of_rag_with_ragas.md",
24-
"destination": "/docs/scores/model-based-evals/ragas.md"
23+
"notebook": "python_sdk.ipynb",
24+
"destinations": ["docs/sdk/python.md"]
25+
},
26+
{
27+
"notebook": "evaluation_of_rag_with_ragas.ipynb",
28+
"destinations": ["docs/scores/model-based-evals/ragas.md"]
2529
}
2630
]

pages/_meta.json

+5
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@
2929
"type": "page",
3030
"title": "Docs"
3131
},
32+
"cookbook": {
33+
"type": "page",
34+
"title": "Cookbook",
35+
"display": "hidden"
36+
},
3237
"pricing": {
3338
"title": "Pricing",
3439
"type": "page",

pages/cookbook/_meta.json

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"index": "Overview",
3+
"-- Cookbook": {
4+
"type": "separator",
5+
"title": "Cookbook"
6+
}
7+
}

pages/cookbook/datasets.md

+239
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
# Langfuse Datasets Cookbook
2+
3+
In this cookbook, we'll iterate on systems prompts with the goal of getting only the capital of a given country. We use Langfuse datasets, to store a list of example inputs and expected outputs.
4+
5+
This is a very simple example, you can run experiments on any LLM application that you either trace with the [Langfuse SDKs](https://langfuse.com/docs/sdk) (Python, JS/TS) or via one of our [integrations](https://langfuse.com/docs/integrations) (e.g. Langchain).
6+
7+
_Simple example application_
8+
9+
- **Model**: gpt-3.5-turbo
10+
- **Input**: country name
11+
- **Output**: capital
12+
- **Evaluation**: exact match of completion and ground truth
13+
- **Experiment on**: system prompt
14+
15+
## Setup
16+
17+
18+
```python
19+
%pip install langfuse openai langchain --upgrade
20+
```
21+
22+
23+
```python
24+
import os
25+
26+
# get keys for your project from https://cloud.langfuse.com
27+
os.environ["LANGFUSE_PUBLIC_KEY"] = ""
28+
os.environ["LANGFUSE_SECRET_KEY"] = ""
29+
30+
# your openai key
31+
os.environ["OPENAI_API_KEY"] = ""
32+
33+
# Your host, defaults to https://cloud.langfuse.com
34+
# For US data region, set to "https://us.cloud.langfuse.com"
35+
# os.environ["LANGFUSE_HOST"] = "http://localhost:3000"
36+
```
37+
38+
39+
```python
40+
# import
41+
from langfuse import Langfuse
42+
import openai
43+
44+
# init
45+
langfuse = Langfuse()
46+
```
47+
48+
## Create a dataset
49+
50+
51+
```python
52+
langfuse.create_dataset(name="capital_cities");
53+
```
54+
55+
### Items
56+
57+
Load local items into the Langfuse dataset. Alternatively you can add items from production via the Langfuse UI.
58+
59+
60+
```python
61+
# example items, could also be json instead of strings
62+
local_items = [
63+
{"input": {"country": "Italy"}, "expected_output": "Rome"},
64+
{"input": {"country": "Spain"}, "expected_output": "Madrid"},
65+
{"input": {"country": "Brazil"}, "expected_output": "Brasília"},
66+
{"input": {"country": "Japan"}, "expected_output": "Tokyo"},
67+
{"input": {"country": "India"}, "expected_output": "New Delhi"},
68+
{"input": {"country": "Canada"}, "expected_output": "Ottawa"},
69+
{"input": {"country": "South Korea"}, "expected_output": "Seoul"},
70+
{"input": {"country": "Argentina"}, "expected_output": "Buenos Aires"},
71+
{"input": {"country": "South Africa"}, "expected_output": "Pretoria"},
72+
{"input": {"country": "Egypt"}, "expected_output": "Cairo"},
73+
]
74+
```
75+
76+
77+
```python
78+
# Upload to Langfuse
79+
for item in local_items:
80+
langfuse.create_dataset_item(
81+
dataset_name="capital_cities",
82+
# any python object or value
83+
input=item["input"],
84+
# any python object or value, optional
85+
expected_output=item["expected_output"]
86+
)
87+
```
88+
89+
## Define application and run experiments
90+
91+
We implement the application in two ways to demonstrate how it's done
92+
93+
1. Custom LLM app using e.g. OpenAI SDK, traced with Langfuse Python SDK
94+
2. Langchain Application, traced via native Langfuse integration
95+
96+
97+
```python
98+
# we use a very simple eval here, you can use any eval library
99+
# see https://langfuse.com/docs/scores/model-based-evals for details
100+
def simple_evaluation(output, expected_output):
101+
return output == expected_output
102+
```
103+
104+
### Custom app
105+
106+
107+
```python
108+
from datetime import datetime
109+
110+
def run_my_custom_llm_app(input, system_prompt):
111+
messages = [
112+
{"role":"system", "content": system_prompt},
113+
{"role":"user", "content": input["country"]}
114+
]
115+
116+
generationStartTime = datetime.now()
117+
118+
openai_completion = openai.chat.completions.create(
119+
model="gpt-3.5-turbo",
120+
messages=messages
121+
).choices[0].message.content
122+
123+
langfuse_generation = langfuse.generation(
124+
name="guess-countries",
125+
input=messages,
126+
output=openai_completion,
127+
model="gpt-3.5-turbo",
128+
start_time=generationStartTime,
129+
end_time=datetime.now()
130+
)
131+
132+
return openai_completion, langfuse_generation
133+
```
134+
135+
136+
```python
137+
def run_experiment(experiment_name, system_prompt):
138+
dataset = langfuse.get_dataset("capital_cities")
139+
140+
for item in dataset.items:
141+
completion, langfuse_generation = run_my_custom_llm_app(item.input, system_prompt)
142+
143+
item.link(langfuse_generation, experiment_name) # pass the observation/generation object or the id
144+
145+
langfuse_generation.score(
146+
name="exact_match",
147+
value=simple_evaluation(completion, item.expected_output)
148+
)
149+
```
150+
151+
152+
```python
153+
run_experiment(
154+
"famous_city",
155+
"The user will input countries, respond with the most famous city in this country"
156+
)
157+
run_experiment(
158+
"directly_ask",
159+
"What is the capital of the following country?"
160+
)
161+
run_experiment(
162+
"asking_specifically",
163+
"The user will input countries, respond with only the name of the capital"
164+
)
165+
run_experiment(
166+
"asking_specifically_2nd_try",
167+
"The user will input countries, respond with only the name of the capital. State only the name of the city."
168+
)
169+
```
170+
171+
### Langchain application
172+
173+
174+
```python
175+
from datetime import datetime
176+
from langchain.chat_models import ChatOpenAI
177+
from langchain.chains import LLMChain
178+
from langchain.schema import AIMessage, HumanMessage, SystemMessage
179+
180+
def run_my_langchain_llm_app(input, system_message, callback_handler):
181+
182+
# needs to include {country}
183+
messages = [
184+
SystemMessage(
185+
content=system_message
186+
),
187+
HumanMessage(
188+
content=input
189+
),
190+
]
191+
chat = ChatOpenAI(callbacks=[callback_handler])
192+
completion = chat(messages)
193+
194+
return completion.content
195+
```
196+
197+
198+
```python
199+
def run_langchain_experiment(experiment_name, system_message):
200+
dataset = langfuse.get_dataset("capital_cities")
201+
202+
for item in dataset.items:
203+
handler = item.get_langchain_handler(run_name=experiment_name)
204+
205+
completion = run_my_langchain_llm_app(item.input["country"], system_message, handler)
206+
207+
handler.root_span.score(
208+
name="exact_match",
209+
value=simple_evaluation(completion, item.expected_output)
210+
)
211+
```
212+
213+
214+
```python
215+
run_langchain_experiment(
216+
"langchain_famous_city",
217+
"The user will input countries, respond with the most famous city in this country"
218+
)
219+
run_langchain_experiment(
220+
"langchain_directly_ask",
221+
"What is the capital of the following country?"
222+
)
223+
run_langchain_experiment(
224+
"langchain_asking_specifically",
225+
"The user will input countries, respond with only the name of the capital"
226+
)
227+
run_langchain_experiment(
228+
"langchain_asking_specifically_2nd_try",
229+
"The user will input countries, respond with only the name of the capital. State only the name of the city."
230+
)
231+
```
232+
233+
## Evaluate experiments in Langfuse UI
234+
235+
- Average scores per experiment run
236+
- Browse each run for an individual item
237+
- Look at traces to debug issues
238+
239+
![Experiment runs in Langfuse](https://langfuse.com/images/docs/dataset-runs-cookbook.jpg)

0 commit comments

Comments
 (0)