In [None]:
# pip uninstall -y trulens_eval
# pip install git+https://github.com/truera/trulens@piotrm/azure_bugfixes#subdirectory=trulens_eval

# trulens_eval notebook dev

%load_ext autoreload
%autoreload 2
from pathlib import Path
import sys

base = Path().cwd()
while not (base / "trulens_eval" / "trulens_eval").exists():
    base = base.parent

if (base / "trulens_eval" / "trulens_eval").exists():
    base = base / "trulens_eval"

print(base)

# If running from github repo, can use this:
sys.path.append(str(base))

# Uncomment for more debugging printouts.
"""
import logging
root = logging.getLogger()
root.setLevel(logging.DEBUG)

handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
root.addHandler(handler)
"""

from trulens_eval.keys import check_keys

check_keys(
    "OPENAI_API_KEY",
    "HUGGINGFACE_API_KEY"
)

from trulens_eval import Tru
tru = Tru()
tru.reset_database()

tru.run_dashboard(_dev=base, force=True)

# 📓 _LangChain_ Quickstart

In this quickstart you will create a simple LLM Chain and learn how to log it and get feedback on an LLM response.

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/main/trulens_eval/examples/quickstart/langchain_quickstart.ipynb)

## Setup
### Add API keys
For this quickstart you will need Open AI and Huggingface keys

In [None]:
# ! pip install trulens_eval openai langchain chromadb langchainhub bs4 tiktoken

In [None]:
import os
os.environ["OPENAI_API_KEY"] = "sk-..."

### Import from LangChain and TruLens

In [None]:
# Imports main tools:
from trulens_eval import TruChain, Tru
tru = Tru()
tru.reset_database()

# Imports from LangChain to build app
import bs4
from langchain import hub
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import WebBaseLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema import StrOutputParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_core.runnables import RunnablePassthrough

### Load documents

In [None]:
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

### Create Vector Store

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

splits = text_splitter.split_documents(docs)

vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=OpenAIEmbeddings()
)

### Create RAG

In [None]:
retriever = vectorstore.as_retriever()

prompt = hub.pull("rlm/rag-prompt")
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

### Send your first request

In [None]:
rag_chain.invoke("What is Task Decomposition?")

In [None]:
rag_chain

In [None]:
from langchain_core.runnables import RunnablePassthrough, RunnableSequence

# how we mark a method as instrumented
print(rag_chain.invoke.__tru_instrumented)

# a method may have multiple imlementations in different classes across the MRO
# "method resolution order"
for superclass in rag_chain.__class__.__mro__:
    print(superclass.__name__, superclass.__module__, end="")
    if hasattr(superclass, "ainvoke") and hasattr(superclass.ainvoke, "__tru_instrumented"):
        print(" is instrumented")
    else:
        print()

In [None]:
# rag_chain.invoke != RunnableSequence.invoke
# rag_chain.invoke is "bound" in that it has a value for "self", whereas
# RunnableSequence.invoke expects self as an argument.
print(RunnableSequence.invoke == rag_chain.invoke)                # false
print(RunnableSequence.invoke == rag_chain.invoke.__func__)       # true
print(RunnablePassthrough.invoke == rag_chain.invoke.__func__)    # false
print(rag_chain.invoke.__self__)

In [None]:
# Look at one inner component, a retriever:
print(type(rag_chain.steps[0].steps['context'].steps[0]))
# And its method of interest:
print(rag_chain.steps[0].steps['context'].steps[0].get_relevant_documents)
# Recall selector for context:
print(context)

In [None]:
from trulens_eval.utils.serial import Lens
context_part = Lens(path=context.path[2:-1])
print(context_part)
print(context_part.get_sole_item(rag_chain))
print(context_part.get_sole_item(rag_chain) == rag_chain.steps[0].steps['context'].steps[0].get_relevant_documents)

In [None]:
print(type(rag_chain))
for attr in dir(rag_chain):
    if attr.startswith("_"):
        continue
    try:
        print(attr, type(getattr(rag_chain, attr)))
    except Exception:
        pass

## Initialize Feedback Function(s)

In [None]:
from trulens_eval.feedback.provider import OpenAI
from trulens_eval import Feedback
import numpy as np

# Initialize provider class
provider = OpenAI()

# select context to be used in feedback. the location of context is app specific.
from trulens_eval.app import App
context = App.select_context(rag_chain)

from trulens_eval.feedback import Groundedness
grounded = Groundedness(groundedness_provider=OpenAI())
# Define a groundedness feedback function
f_groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons)
    .on(context.collect()) # collect context chunks into a list
    .on_output()
    .aggregate(grounded.grounded_statements_aggregator)
)

# Question/answer relevance between overall question and answer.
f_answer_relevance = (
    Feedback(provider.relevance)
    .on_input_output()
)
# Question/statement relevance between question and each context chunk.
f_context_relevance = (
    Feedback(provider.context_relevance_with_cot_reasons)
    .on_input()
    .on(context)
    .aggregate(np.mean)
)

## Instrument chain for logging with TruLens

In [None]:
tru_recorder = TruChain(rag_chain,
    app_id='Chain1_ChatApplication',
    feedbacks=[f_answer_relevance, f_context_relevance, f_groundedness])

In [None]:
response, tru_record = tru_recorder.with_record(
    rag_chain.invoke,
    "What is Task Decomposition?"
)

with tru_recorder as recorder:
    rag_chain.invoke("What is Task Decomposition?")

tru_record = recorder.get()

In [None]:
json_like = tru_record.layout_calls_as_app()

In [None]:
json_like

In [None]:
from ipytree import Tree, Node

def display_call_stack(data):
    tree = Tree()
    tree.add_node(Node('Record ID: {}'.format(data['record_id'])))
    tree.add_node(Node('App ID: {}'.format(data['app_id'])))
    tree.add_node(Node('Cost: {}'.format(data['cost'])))
    tree.add_node(Node('Performance: {}'.format(data['perf'])))
    tree.add_node(Node('Timestamp: {}'.format(data['ts'])))
    tree.add_node(Node('Tags: {}'.format(data['tags'])))
    tree.add_node(Node('Main Input: {}'.format(data['main_input'])))
    tree.add_node(Node('Main Output: {}'.format(data['main_output'])))
    tree.add_node(Node('Main Error: {}'.format(data['main_error'])))
    
    calls_node = Node('Calls')
    tree.add_node(calls_node)
    
    for call in data['calls']:
        call_node = Node('Call')
        calls_node.add_node(call_node)
        
        for step in call['stack']:
            step_node = Node('Step: {}'.format(step['path']))
            call_node.add_node(step_node)
            if 'expanded' in step:
                expanded_node = Node('Expanded')
                step_node.add_node(expanded_node)
                for expanded_step in step['expanded']:
                    expanded_step_node = Node('Step: {}'.format(expanded_step['path']))
                    expanded_node.add_node(expanded_step_node)
    
    return tree

# Usage
tree = display_call_stack(json_like)
tree

In [None]:
tree

In [None]:
with tru_recorder as recording:
    llm_response = rag_chain.invoke("What is Task Decomposition?")

display(llm_response)

## Retrieve records and feedback

In [None]:
# The record of the app invocation can be retrieved from the `recording`:

rec = recording.get() # use .get if only one record
# recs = recording.records # use .records if multiple

display(rec)

In [None]:
# The results of the feedback functions can be rertireved from
# `Record.feedback_results` or using the `wait_for_feedback_result` method. The
# results if retrieved directly are `Future` instances (see
# `concurrent.futures`). You can use `as_completed` to wait until they have
# finished evaluating or use the utility method:

for feedback, feedback_result in rec.wait_for_feedback_results().items():
    print(feedback.name, feedback_result.result)

# See more about wait_for_feedback_results:
# help(rec.wait_for_feedback_results)

In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=["Chain1_ChatApplication"])

records.head()

In [None]:
tru.get_leaderboard(app_ids=["Chain1_ChatApplication"])

## Explore in a Dashboard

In [None]:
tru.run_dashboard() # open a local streamlit app to explore

# tru.stop_dashboard() # stop if needed

Alternatively, you can run `trulens-eval` from a command line in the same folder to start the dashboard.

Note: Feedback functions evaluated in the deferred manner can be seen in the "Progress" page of the TruLens dashboard.