In [1]:
#import statements
from IPython.display import JSON

# Imports main tools:
from trulens_eval import TruChain, Feedback, Huggingface, Tru
from trulens_eval.schema import FeedbackResult
tru = Tru()
tru.reset_database()

# Imports from langchain to build app
import bs4
from langchain import hub
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import WebBaseLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema import StrOutputParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_core.runnables import RunnablePassthrough


# Bocconi implementation
import os
import openai
from langchain.document_loaders import UnstructuredMarkdownLoader
from langchain.text_splitter import MarkdownHeaderTextSplitter
from trulens_eval.feedback.provider import OpenAI
import numpy as np
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `Tru` to prevent this.


In [2]:
#SET UP
os.environ['OPENAI_API_KEY'] = "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"
openai.api_key = os.getenv("OPENAI_API_KEY")

#DOCUMENT LOADING
file_path = "../../Data/Scraping_Bocconi_converted_no_dup_check.md"
with open(file_path, 'r') as file:
    markdown_content = file.read()

#CREATE VECTOR STORE
headers_to_split_on = [
    ("#", "Header 1"),
    ("##", "Header 2"),
    ("###", "Header 3"),
    ("####", "Header 4"),]
markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on)
splits = markdown_splitter.split_text(markdown_content)
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings(
))

#CREATE RAG
retriever = vectorstore.as_retriever()

prompt = hub.pull("rlm/rag-prompt")
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain_base = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [43]:
type(prompt)

langchain_core.prompts.chat.ChatPromptTemplate

In [51]:
#TODO: FINETUNE PROMPT
from langchain.chat_models import ChatOpenAI
from langchain.prompts import HumanMessagePromptTemplate
from langchain_core.messages import SystemMessage

chat_template = ChatPromptTemplate.from_messages(
    [
        SystemMessage(
            content=(
                "You are a helpful assistant that re-writes the user's text to "
                "sound more upbeat."
            )
        ),
        HumanMessagePromptTemplate.from_template("{text}"),
    ]
)

In [8]:
rag_chain_base.invoke("Come funziona per il check-in in residenza? ")


"Per effettuare il check-in in residenza, è necessario consultare l'esito dell'assegnazione, effettuare il pagamento della prima rata alloggi e compilare la procedura di check-in online entro le scadenze indicate. Successivamente, è possibile prendere possesso dell'alloggio presentandosi alla reception con un documento di identità valido."

---

In [10]:
# Initialize provider class
openai = OpenAI()

#SET UP FEEDBACK FUNCTIONS FOR EVAL
# select context to be used in feedback. the location of context is app specific.
from trulens_eval.app import App
context = App.select_context(rag_chain_base)

from trulens_eval.feedback import Groundedness
grounded = Groundedness(groundedness_provider=OpenAI())
# Define a groundedness feedback function
f_groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons)
    .on(context.collect()) # collect context chunks into a list
    .on_output()
    .aggregate(grounded.grounded_statements_aggregator)
)

# Question/answer relevance between overall question and answer.
f_qa_relevance = Feedback(openai.relevance).on_input_output()
# Question/statement relevance between question and each context chunk.
f_context_relevance = (
    Feedback(openai.qs_relevance)
    .on_input()
    .on(context)
    .aggregate(np.mean)
    )


✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.first.steps.context.first.get_relevant_documents.rets.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In qs_relevance, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In qs_relevance, input statement will be set to __record__.app.first.steps.context.first.get_relevant_documents.rets .


In [11]:
tru_recorder = TruChain(rag_chain_base,
    app_id='Chain1_ChatApplication',
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness])

In [12]:
with tru_recorder as recording:
    llm_response = rag_chain_base.invoke("Come funziona l'ingresso in residenza? ")

display(llm_response)

"L'ingresso in residenza avviene attraverso la reception, dove il residente deve compilare un registro con le informazioni dell'ospite esterno e depositare un documento di identità. L'ospite esterno può entrare in residenza dopo aver firmato il registro e depositato il proprio documento di identità. Il residente è responsabile del comportamento dell'ospite esterno e deve assicurarsi che rispetti le regole della residenza."

In [17]:
# The record of the ap invocation can be retrieved from the `recording`:

rec = recording.get() # use .get if only one record
# recs = recording.records # use .records if multiple

display(rec)


Record(record_id='record_hash_6472182f98bb17f06ebad986ac1c8e56', app_id='Chain1_ChatApplication', cost=Cost(n_requests=2, n_successful_requests=2, n_classes=0, n_tokens=1725, n_stream_chunks=0, n_prompt_tokens=1618, n_completion_tokens=107, cost=0.0026185), perf=Perf(start_time=datetime.datetime(2023, 12, 29, 11, 46, 6, 26703), end_time=datetime.datetime(2023, 12, 29, 11, 46, 12, 747333)), ts=datetime.datetime(2023, 12, 29, 11, 46, 12, 754210), tags='-', meta=None, main_input="Come funziona l'ingresso in residenza? ", main_output="L'ingresso in residenza avviene attraverso la reception, dove il residente deve compilare un registro con le informazioni dell'ospite esterno e depositare un documento di identità. L'ospite esterno può entrare in residenza dopo aver firmato il registro e depositato il proprio documento di identità. Il residente è responsabile del comportamento dell'ospite esterno e deve assicurarsi che rispetti le regole della residenza.", main_error=None, calls=[RecordAppCal

In [20]:
records, feedback = tru.get_records_and_feedback(app_ids=["Chain1_ChatApplication"])

records.head()


Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,ts,relevance,qs_relevance,groundedness_measure_with_cot_reasons,relevance_calls,qs_relevance_calls,groundedness_measure_with_cot_reasons_calls,latency,total_tokens,total_cost
0,Chain1_ChatApplication,"{""tru_class_info"": {""name"": ""TruChain"", ""modul...",RunnableSequence(langchain_core.runnables.base),record_hash_6472182f98bb17f06ebad986ac1c8e56,"""Come funziona l'ingresso in residenza? ""","""L'ingresso in residenza avviene attraverso la...",-,"{""record_id"": ""record_hash_6472182f98bb17f06eb...","{""n_requests"": 2, ""n_successful_requests"": 2, ...","{""start_time"": ""2023-12-29T11:46:06.026703"", ""...",2023-12-29T11:46:12.754210,0.9,0.5,1.0,[{'args': {'prompt': 'Come funziona l'ingresso...,[{'args': {'question': 'Come funziona l'ingres...,"[{'args': {'source': [[{'page_content': ""Se lo...",6,1725,0.002619


In [21]:
tru.get_leaderboard(app_ids=["Chain1_ChatApplication"])


Unnamed: 0_level_0,groundedness_measure_with_cot_reasons,relevance,qs_relevance,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Chain1_ChatApplication,1.0,0.9,0.5,6.0,0.002619


In [26]:
pd.set_option("display.max_colwidth", None)
records[["input", "output"] + feedback]

Unnamed: 0,input,output,groundedness_measure_with_cot_reasons,relevance,qs_relevance
0,"""Come funziona l'ingresso in residenza? ""","""L'ingresso in residenza avviene attraverso la reception, dove il residente deve compilare un registro con le informazioni dell'ospite esterno e depositare un documento di identit\u00e0. L'ospite esterno pu\u00f2 entrare in residenza dopo aver firmato il registro e depositato il proprio documento di identit\u00e0. Il residente \u00e8 responsabile del comportamento dell'ospite esterno e deve assicurarsi che rispetti le regole della residenza.""",1.0,0.9,0.5


In [41]:
tru.run_dashboard() # open a local streamlit app to explore
# tru.stop_dashboard() # stop if needed

Starting dashboard ...
Config file already exists. Skipping writing process.
Credentials file already exists. Skipping writing process.


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

RuntimeError: Dashboard failed to start in time. Please inspect dashboard logs for additional information.