# Langchain Quickstart

In this quickstart you will create a simple LLM Chain and learn how to log it and get feedback on an LLM response.

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/truera/trulens/blob/main/trulens_eval/examples/quickstart/langchain_quickstart.ipynb)

## 0.Setup
### 0.1. Import statements & add API keys
For this quickstart you will need Open AI and Huggingface keys

In [13]:
#pip install -U langchain
#! pip install trulens_eval==0.21.0 openai==1.3.7 langchain chromadb langchainhub bs4

In [2]:
# Imports main tools:
from trulens_eval import TruChain, Feedback, Huggingface, Tru
from trulens_eval.schema import FeedbackResult
tru = Tru()
tru.reset_database()

# Imports from langchain to build app
import bs4
from langchain import hub
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import WebBaseLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema import StrOutputParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_core.runnables import RunnablePassthrough

from langchain.document_loaders import UnstructuredMarkdownLoader
from langchain.text_splitter import MarkdownHeaderTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI

from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo

from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

from langchain.memory import ConversationBufferMemory

from langchain.chains import StuffDocumentsChain,LLMChain,ReduceDocumentsChain,MapReduceDocumentsChain

from langchain_core.prompts import PromptTemplate
from langchain_community.llms import OpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

import json
import os
import textwrap
from getpass import getpass
from pathlib import Path

import chromadb
import langchain
import openai
from langchain.chains import ConversationChain
from langchain.chat_models import ChatOpenAI
from langchain.docstore import InMemoryDocstore
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.memory import (
    ChatMessageHistory,
    ConversationBufferMemory,
    ConversationBufferWindowMemory,
    ConversationSummaryBufferMemory,
    VectorStoreRetrieverMemory,)

🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `Tru` to prevent this.


In [3]:

from langchain.prompts.prompt import PromptTemplate
from langchain.schema import messages_from_dict, messages_to_dict
from langchain.vectorstores import Chroma


import langchain 
print(langchain.__version__) 

import trulens_eval
print(trulens_eval.__version__)

import openai 
print(openai.__version__) #version update 

import os 
os.environ["OPENAI_API_KEY"] = "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"
os.environ["LANGCHAIN_API_KEY"] = "ls__a7cd2e593e7248e594ac5b698bae1f7c"

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_PROJECT"] ="Bocconi-chat"

0.1.7
0.22.2
1.11.1


## 1. Implementation 

### Load documents & Create Vector stores & Create RAG  

In [4]:
## paths
# reformat 
path_full_p = "../../Data/New/Markdown/Full_plain.md"

Headers splitters

In [5]:
headers_to_split_on_plain = [
    ("#", "Category"),
    ("##", "Subcategory"),
    ("###", "Question"),
    ("####", "URL"),
    ("#####", "ID"), 
]

#### Basic retriever + Vectorstore

In [6]:
from langchain_openai import OpenAIEmbeddings

In [7]:
## SETTING 
with open(path_full_p, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_plain)
splits = markdown_splitter.split_text(markdown_content)

embedding=OpenAIEmbeddings()

llm_name = "gpt-3.5-turbo"
llm = ChatOpenAI(model_name=llm_name, temperature=0)


  warn_deprecated(


In [8]:
vs_full_plain = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_full_plain = vs_full_plain.as_retriever()

In [16]:
! pip --version

pip 22.0.4 from /Users/valedipalo/miniforge3/lib/python3.9/site-packages/pip (python 3.9)


#### Self query retriever

SETUP

In [9]:
prompt = hub.pull("rlm/rag-prompt")
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

metadata_field_info_plain = [
    AttributeInfo(
        name="Category",
        description="a primary category or a general topic. It introduces the broader theme under which more specific information is grouped. In a retrieval task, it acts as the first level of data filtering or organization, offering a broad overview of the context or subject area.",
        type="string",
    ),
    AttributeInfo(
        name="Subcategory",
        description="This is a subtheme or subcategory of Header 1. It provides a further level of detail, focusing on a specific aspect of the main theme. It serves to refine the search or understanding within the general topic defined by Header 1, guiding the user towards more targeted information.",
        type="string",
    ),
    AttributeInfo(
        name="Question",
        description="This represents an even more specific subdivision of Header 2. This level contains the actual question. In a retrieval task, this header helps to focus on a very specific question, making the search even more targeted. ",
        type="string",
    ),
        AttributeInfo(
        name="URL",
        description="A reference to the URL from which the Question has been obtained. It is not relevant in any way for retrieving",
        type="string",
    ),
    AttributeInfo(
        name="ID",
        description="A reference to the specific question. It is not relevant in any way for retrieving",
        type="string",
    ),
]

document_content_description = "Frequently asked questions"




In [10]:
self_full_plain = SelfQueryRetriever.from_llm(
    llm,
    vs_full_plain,
    document_content_description, #
    metadata_field_info_plain,          #
    verbose= True
)

### Prompt engineering 

In [1]:
# Build prompt
template0 = """
Use the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer. 
If you the question is too broad ask the user for clarifications.
Keep the answer as concise as possible.
Be exhaustive if the user is asking for it. 
For text provided in the format [some text](link) always include the link. 
Try to keep the same the "text" when it surrounded by quotation marks.  
Always say "If you need any further information, don't hesitate to ask!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""

template_2 = """
This chatbot is designed to provide assistance with university-related inquiries. Please ensure that all responses are relevant to the domain of universities and adhere to the following constraints:

1. Responses should only contain information relevant to universities, including but not limited to admissions, academics, campus life, and student services.
2. Avoid generating responses that stray into unrelated topics or provide general information outside the scope of university-related queries.
3. Responses should be accurate and informative, drawing from a designated knowledge base consisting of FAQs and guides specific to universities.
4. Maintain a professional and helpful tone in all responses, reflecting the expected demeanor of a university representative or advisor.
5. Prioritize providing concise and clear answers to questions, avoiding unnecessary verbosity or repetition.
6. If the response is mentioning a date, suggest the user to refer to sources to actually verify the answer is right

As general suggestion ashere to the following instructions:

1. If you don't know the answer, just say that you don't know, don't try to make up an answer and suggest the user to refer to a University Associations or University Advisor for further informations.
2. If you the question is too broad and could lead to multiple answers ask the user for clarifications.
3. Keep the answer as concise as possible while providing all the relevant information.
4. Be exhaustive if the user is asking for it.
5. For text provided in the format [some text](link) always include the link.
6.  Incentivize the user to ask more question if needed at the end of the answer.
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

QA_CHAIN_PROMPT2 = PromptTemplate.from_template(template2)
#llm_name = "gpt-3.5-turbo"
#llm = ChatOpenAI(model_name=llm_name, temperature=0)


NameError: name 'PromptTemplate' is not defined

### RetrievalQA Chain + Base retriever 

In [12]:
rqa_basic_full_plain = RetrievalQA.from_chain_type(
    llm,
    retriever=ret_full_plain,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)


# SAVE CHAIN 
https://github.com/langchain-ai/langchain/discussions/16542

In [None]:
rqa_basic_full_plain.save("trail_save.yaml")

In [None]:
rqa_basic_full_plain.save("trail_save.json")

In [None]:
cat trial_save.json 

# Load
https://github.com/langchain-ai/langchain/issues/13696

In [None]:
from langchain.chains.question_answering import load_qa_chain

In [None]:
? sace.save

In [None]:
chain = load_qa_chain(llm, chain_type=qa_chain_type)("trail_save.json")

## Load t2

In [None]:
? Chroma.from_documents

In [None]:
from langchain.vectorstores import Chroma
persist_directory = 'docs/chroma/'
#!rm -rf ./docs/chroma  # remove old database files if any
vectordb = Chroma.from_documents(
    documents=splits,
    embedding=embedding,
    persist_directory=persist_directory
)

In [None]:
#vectordb = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings()) #persist directory missing 

retriever = vectordb.as_retriever()

qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=retriever)

In [None]:
rqa_basic_full_plain = RetrievalQA.from_chain_type(
    llm,
    retriever=ret_full_plain,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)


# SAVE CHAIN 
https://github.com/langchain-ai/langchain/discussions/16542

In [14]:
? Chroma

In [None]:
rqa_basic_full_plain.save("trail_save.yaml")

In [None]:
rqa_basic_full_plain.save("trail_save.json")

In [None]:
cat trial_save.json 

# Load
https://github.com/langchain-ai/langchain/issues/13696

In [None]:
from langchain.chains.question_answering import load_qa_chain

In [None]:
? sace.save

In [None]:
chain = load_qa_chain(llm, chain_type=qa_chain_type)("trail_save.json")

## Load t2

In [None]:
? Chroma.from_documents

In [None]:
from langchain.vectorstores import Chroma
persist_directory = 'docs/chroma/'
#!rm -rf ./docs/chroma  # remove old database files if any
vectordb = Chroma.from_documents(
    documents=splits,
    embedding=embedding,
    persist_directory=persist_directory
)

In [None]:
vectordb = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings()) #persist directory missing 

retriever = vectordb.as_retriever()

qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)

### Memory RetrievalQA Chain

In [None]:
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True 
)

In [None]:
rqa_basic_full_plain_memory = RetrievalQA.from_chain_type(
    llm,
    retriever=ret_full_plain,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
    memory=memory
)

In [None]:
rqa_basic_full_plain_memory.invoke("What are the dotations in the room? ")

### Conversational retriever chain 

In [None]:
llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")

memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True, output_key='answer')
retriever = ret_full_plain

# Create the multipurpose chain
qachat = ConversationalRetrievalChain.from_llm(
    llm=ChatOpenAI(temperature=0),
    memory=memory,
    retriever=retriever, 
    return_source_documents=True
)

In [None]:
qachat.invoke("what are the dotation in the rooms?")

In [None]:
qachat.invoke("what about the kitchens?")

In [None]:
qachat.invoke("are they shared?")

### RetrievalQA Chain + Self retriever 

In [None]:
llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")

memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True, output_key='answer')
retriever = ret_full_plain

# Create the multipurpose chain
qachat = ConversationalRetrievalChain.from_llm(
    llm=ChatOpenAI(temperature=0),
    memory=memory,
    retriever=retriever, 
    return_source_documents=True
)

### RetrievalQA Chain + Self retriever 

In [None]:
rqa_self_full_plain = RetrievalQA.from_chain_type(
    llm,
    retriever=self_full_plain,
    memory = memory,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [None]:
rqa_self_full_plain.invoke("Who are the resident representatives")

### Mapreduce Chain
[link](https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.map_reduce.MapReduceDocumentsChain.html#langchain.chains.combine_documents.map_reduce.MapReduceDocumentsChain)

In [None]:
map_red_full_plain = RetrievalQA.from_chain_type(
    llm,
    retriever=ret_full_plain,
    memory = memory, 
    chain_type = "map_reduce"    
)

In [None]:
map_red_full_plain.invoke("who are the resident representatives?")

---- 

### Conversationalretriever Chain 

In [None]:
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True 
)


retriever=ret_full_plain
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [None]:
memor

In [None]:
qa.invoke("who are the resident representatives?")

In [None]:
qa.invoke("what are the dotations in the rooms ?")

In [None]:
qa.invoke("what about the kitchen ?")

In [None]:
qa.invoke("Are they shared ?")

In [None]:
qa.invoke("who are the resident representative?")

## 1,5. Memory and sourcing implementation

# Prototype

In [None]:
print("Hello I'm the Bocconichatbot, I'm designed to answer you questions and provide as much help as I can!") 

In [None]:
#user specific 
history = ChatMessageHistory()

In [None]:
#example commands 
history.add_user_message("Hello")
history.add_ai_message("What can I do you for?")
history.messages

In [None]:
#example commands - converting in a suitable format 
memory = ConversationBufferMemory(chat_memory=history)
memory

In [None]:
#
history_buffer = memory.load_memory_variables({})

In [None]:
print(history_buffer["history"])

In [None]:
template_gen = """
Use the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer. 
If you the question is too broad and could lead to multiple answers ask the user for clarifications.
Keep the answer as concise as possible.
Be exhaustive if the user is asking for it. 
For text provided in the format [some text](link) always include the link. 
Try to keep the same the "text" when it surrounded by quotation marks.  
Always say "If you need any further information, don't hesitate to ask!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template_gen)

rqa_basic_full_plain_memory = RetrievalQA.from_chain_type(
    llm,
    memory = ConversationBufferMemory(),
    retriever=ret_full_plain,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
    verbose = True
)

In [None]:
print("Hello I'm the Bocconichatbot, I'm designed to answer you questions and provide as much help as I can!") 
while True:
    prompt = input()
    print()
    result = rqa_basic_full_plain_memory.invoke(prompt)
    print_response(result)
    print()

In [None]:
template = """The following is a conversation between a human and Dwight K. Schrute from the TV show The Office.
Your goal is to outwit the human and show how much smarter Dwight is. No matter the question, Dwight responds as he's talking in The Office.

Current conversation:
{history}
Human: {input}
Dwight:"""

PROMPT = PromptTemplate(input_variables=["history", "input"], template=template)

conversation = ConversationChain(
    prompt=PROMPT,
    llm=chat_gpt,
    verbose=False,
    memory=ConversationBufferMemory(ai_prefix="Dwight"),
)

In [None]:
history = ChatMessageHistory()
history.add_user_message("Hello")
history.add_ai_message("What can I do you for?")

history.messages
     

In [None]:
memory = ConversationBufferMemory(chat_memory=history)

In [None]:
history_buffer = memory.load_memory_variables({})
history_buffer

In [None]:
print(history_buffer["history"])

In [None]:
chat_gpt = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo")

conversation = ConversationChain(
    llm=chat_gpt, verbose=True, memory=ConversationBufferMemory()
)

In [None]:
conversation("hello")

In [None]:
conversation_messages = conversation.memory.chat_memory.messages

In [None]:
messages = messages_to_dict(conversation_messages)

In [None]:
messages

In [None]:
with Path("messages.json").open("w") as f:
    json.dump(messages, f, indent=4)

In [None]:
with Path("messages.json").open("r") as f:
    loaded_messages = json.load(f)

In [None]:
history = ChatMessageHistory(messages=messages_from_dict(loaded_messages))
#retrieve informations 
history.messages[0].content

---

In [None]:
# Condensing conversations 

In [None]:
history = ChatMessageHistory(messages=messages_from_dict(loaded_messages))

memory = ConversationBufferWindowMemory(
    chat_memory=history,
    k=1, #take last message 
    ai_prefix="Dwight",
)

In [None]:
history = ChatMessageHistory(messages=messages_from_dict(loaded_messages))

memory = ConversationSummaryBufferMemory(
    chat_memory=history, ai_prefix="Dwight", llm=chat_gpt, max_token_limit=10
)

---

In [None]:
rqa_basic_full_plain_memory = RetrievalQA.from_chain_type(
    llm,
    retriever=ret_full_plain,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
    verbose = True
)

In [None]:
rqa_basic_full_plain_memory("hello")

## 2. Evaluations

- Langchain: [link](https://smith.langchain.com/o/917d7cd4-4420-5477-8a36-902a60673259/projects?paginationState=%7B%22pageIndex%22%3A0%2C%22pageSize%22%3A10%7D&chartedColumn=latency_p50)
- Trulens: 

### Trulens set up

In [None]:
from trulens_eval import TruChain, Feedback, Huggingface, Tru
from trulens_eval.schema import FeedbackResult
tru = Tru()
#tru.reset_database()

In [None]:
from trulens_eval.feedback.provider import OpenAI
import numpy as np

# Initialize provider class
openai = OpenAI()

# select context to be used in feedback. the location of context is app specific.
from trulens_eval.app import App
context = App.select_context(rqa_basic_full_plain)

from trulens_eval.feedback import Groundedness
grounded = Groundedness(groundedness_provider=OpenAI())
# Define a groundedness feedback function
f_groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons)
    .on(context.collect()) # collect context chunks into a list
    .on_output()
    .aggregate(grounded.grounded_statements_aggregator)
)

# Question/answer relevance between overall question and answer.
f_qa_relevance = Feedback(openai.relevance_with_cot_reasons).on_input_output()
# Question/statement relevance between question and each context chunk.
f_context_relevance = (
    Feedback(openai.qs_relevance_with_cot_reasons)
    .on_input()
    .on(context)
    .aggregate(np.mean)
    )

f_conciseness = Feedback(openai.conciseness_with_cot_reasons).on_output()
f_helpfulness = Feedback(openai.helpfulness_with_cot_reasons).on_output()
#f_comprensiveness = Feedback(openai.comprehensiveness_with_cot_reasons).on_output()


### Eval_question tests 

In [None]:
# test tabular data 
question1 = ("Who are the resident representatives?") # 18
question2 = ("Are there any grants for international mobility programs?") #65 - Good 
question3 = ("I'm going to Danmark with my mobility program, what grant I will receive?") #65 - good 
question4 = ("I'm a second year student who is using a student loan, how many credit do I need by the end of the year?")#69 - almost perfect, should ask for info about if you are bachelor or master 
question5 = ("I'm a second year master student who is using a student loan, how many credit do I need by the end of the year?")#69 - perfect 
question6 = ("what are the deadlines for payment of tuition?") # 74 Bad - test sources 

#test notes 
question10 = ("what is maximum occupacy in the library?") #29 Good 

#test factual info 
question11 = ("where is located the library?") # 29 Good 

#test specific info retrieval in answers which have to mention many points 
question17 = ("What is the necessary documentation to apply for fees revaluation? ")#41
question20 = ("I want to apply for fees revaluation, what should I do?")#41
question21 = ("what can u tell me abou fees revaluation?")# 41 check references 
question22 = ("what are the steps for fees revaluation?")#41 

#number list testing 
question23 = ("what is the application procedure for international mobility grant?") #ok
    
#bullet point testing 
question18 = ("What are the requirements to apply for open reservation monthly?") #6 Perfect 
question19 = ("How can I apply for open reservation monthly?") #6 Good 

#hard questions 
question12 = ("Is it possible to visit the library without being a student?") # 29 Perfect 
question13 = ("I've booked an accomodation in Openreservation, but I can't pay the deposit. What should I do?") #7 Good 
question14 = ("What are the coordinates for making a bank transfer for securing the open reservation given that I can't pay with Paytool?") # 7 Perfect

#very hard 
question15 = ("I broke my arm, I'm a bocconi student, who can I contact?") #174 Verify sources 

#link retrieval 
question16 = ("I broke my arm, does the Bocconi have a medical center?") # 174 good helpful and secure 

In [None]:
#rqa_basic_full_plain.invoke(question4)['result']

### (tests) Instrument chain for logging with TruLens


In [None]:
#OK 
tru_recorder = TruChain(rqa_basic_full_plain,
    app_id='eval_question_rqa_basic_full_plain',
    tags = 'ciao',
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness,f_conciseness,f_helpfulness])#,f_comprensiveness])

In [None]:
with tru_recorder as recording:
    llm_response = rqa_basic_full_plain.invoke("Who are the resident representatives ?")

In [None]:
rec = recording.get()

from concurrent.futures import as_completed

for feedback_future in  as_completed(rec.feedback_results):
    feedback = feedback_future.result()

    feedback: Feedback

    #display(feedback.name, feedback_result.result)

records, feedback = tru.get_records_and_feedback(app_ids=['eval_question_rqa_basic_full_plain'])


In [None]:
records

In [None]:
tru_recorder = TruChain(rqa_basic_full_ref,
    app_id='eval_question_rqa_basic_full_ref',
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness,f_conciseness,f_helpfulness])#,f_comprensiveness])

In [None]:
with tru_recorder as recording:
    llm_response = rqa_basic_full_ref.invoke("Who are the resident representatives ?")

In [None]:
llm_response = rqa_basic_full_ref.invoke("Who are the resident representatives? ")

from trulens_eval import TruChain
    tru_recorder = TruChain(
    rqa_basic_full_ref,
    app_id='trial2')

response, tru_record = tru_recorder.with_record(rqa_basic_full_ref, "Who are the resident representatives? ")
json_like = tru_record.layout_calls_as_app()

In [None]:
rqa_basic_full_ref.invoke("What is the necessary documentation to apply for fees revaluation?")

In [None]:
#gettig cot 
#records.groundedness_measure_with_cot_reasons_calls[0][0]['meta']

---

# Iterating and looking results 

In [None]:
tru.reset_database()

In [None]:
eval_questions = []
with open('../../Data/New/new_eval2.txt', 'r') as file:
    for line in file:
        # Remove newline character and convert to integer
        item = line.strip()
        eval_questions.append(item)

In [None]:
#eval_questions
eval_questions1 = eval_questions[:5]
eval_questions2 = eval_questions[5:10]
eval_questions3 = eval_questions[10:]

## rqa_basic_full_plain

In [None]:
# single model 
tru_recorder = TruChain(rqa_basic_full_plain,
    app_id= "rqa_basic_full_plain",
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness,f_conciseness,f_helpfulness])#,f_comprensiveness])

for question in eval_questions1:
    with tru_recorder as recording:
        rqa_basic_full_plain.invoke(question)

    rec = recording.get()

    from concurrent.futures import as_completed

    for feedback_future in  as_completed(rec.feedback_results):
        feedback = feedback_future.result()

        feedback: Feedback

    #display(feedback.name, feedback_result.result)

    records, feedback = tru.get_records_and_feedback(app_ids=['eval_question_rqa_basic_full_plain'])

In [None]:
tru_recorder = TruChain(rqa_basic_full_plain,
    app_id= "rqa_basic_full_plain",
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness,f_conciseness,f_helpfulness])#,f_comprensiveness])

for question in eval_questions2:
    with tru_recorder as recording:
        rqa_basic_full_plain.invoke(question)

    rec = recording.get()

    from concurrent.futures import as_completed

    for feedback_future in  as_completed(rec.feedback_results):
        feedback = feedback_future.result()

        feedback: Feedback

    #display(feedback.name, feedback_result.result)

    records, feedback = tru.get_records_and_feedback(app_ids=['eval_question_rqa_basic_full_plain'])

In [None]:
tru_recorder = TruChain(rqa_basic_full_plain,
    app_id= "rqa_basic_full_plain",
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness,f_conciseness,f_helpfulness])#,f_comprensiveness])

for question in eval_questions3:
    with tru_recorder as recording:
        rqa_basic_full_plain.invoke(question)

    rec = recording.get()

    from concurrent.futures import as_completed

    for feedback_future in  as_completed(rec.feedback_results):
        feedback = feedback_future.result()

        feedback: Feedback

    #display(feedback.name, feedback_result.result)

    records, feedback = tru.get_records_and_feedback(app_ids=['eval_question_rqa_basic_full_plain'])

### Example use-case 

In [None]:
chat = rqa_basic_full_plain.invoke("Till when I have to convert Language certificates for exchanges?")

In [None]:
chat['result']

In [None]:
chat['source_documents']
unique_urls = set()

    # Iterate over each document in the source_documents list
for document in chat['source_documents']:
    # Extract the URL from the metadata dictionary and add it to the set
    # This automatically ensures that only unique URLs are stored
    unique_urls.add(document['metadata']['URL'])

print(list(unique_urls))

---

## rqa_self_full_plain

In [None]:
tru_recorder = TruChain(rqa_self_full_plain,
    app_id= "rqa_self_full_plain",
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness,f_conciseness,f_helpfulness])#,f_comprensiveness])

for question in eval_questions1:
    with tru_recorder as recording:
        rqa_self_full_plain.invoke(question)

    rec = recording.get()

    from concurrent.futures import as_completed

    for feedback_future in  as_completed(rec.feedback_results):
        feedback = feedback_future.result()

        feedback: Feedback

    #display(feedback.name, feedback_result.result)

    records, feedback = tru.get_records_and_feedback(app_ids=['eval_question_rqa_self_full_plain'])

In [None]:
tru_recorder = TruChain(rqa_self_full_plain,
    app_id= "rqa_self_full_plain",
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness,f_conciseness,f_helpfulness])#,f_comprensiveness])

for question in eval_questions2:
    with tru_recorder as recording:
        rqa_self_full_plain.invoke(question)

    rec = recording.get()

    from concurrent.futures import as_completed

    for feedback_future in  as_completed(rec.feedback_results):
        feedback = feedback_future.result()

        feedback: Feedback

    #display(feedback.name, feedback_result.result)

    records, feedback = tru.get_records_and_feedback(app_ids=['eval_question_rqa_self_full_plain'])

In [None]:
tru_recorder = TruChain(rqa_self_full_plain,
    app_id= "rqa_self_full_plain",
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness,f_conciseness,f_helpfulness])#,f_comprensiveness])

for question in eval_questions3:
    with tru_recorder as recording:
        rqa_self_full_plain.invoke(question)

    rec = recording.get()

    from concurrent.futures import as_completed

    for feedback_future in  as_completed(rec.feedback_results):
        feedback = feedback_future.result()

        feedback: Feedback

    #display(feedback.name, feedback_result.result)

    records, feedback = tru.get_records_and_feedback(app_ids=['eval_question_rqa_self_full_plain'])

--- 

## map_red_full_plain

In [None]:
tru_recorder = TruChain(map_red_full_plain,
    app_id= "map_red_full_plain",
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness,f_conciseness,f_helpfulness])#,f_comprensiveness])

for question in eval_questions1:
    with tru_recorder as recording:
        map_red_full_plain.invoke(question)

    rec = recording.get()

    from concurrent.futures import as_completed

    for feedback_future in  as_completed(rec.feedback_results):
        feedback = feedback_future.result()

        feedback: Feedback

    #display(feedback.name, feedback_result.result)

    records, feedback = tru.get_records_and_feedback(app_ids=['eval_question_map_red_full_plain'])

In [None]:
tru_recorder = TruChain(map_red_full_plain,
    app_id= "map_red_full_plain",
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness,f_conciseness,f_helpfulness])#,f_comprensiveness])

for question in eval_questions2:
    with tru_recorder as recording:
        map_red_full_plain.invoke(question)

    rec = recording.get()

    from concurrent.futures import as_completed

    for feedback_future in  as_completed(rec.feedback_results):
        feedback = feedback_future.result()

        feedback: Feedback

    #display(feedback.name, feedback_result.result)

    records, feedback = tru.get_records_and_feedback(app_ids=['eval_question_map_red_full_plain'])

In [None]:
tru_recorder = TruChain(map_red_full_plain,
    app_id= "map_red_full_plain",
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness,f_conciseness,f_helpfulness])#,f_comprensiveness])

for question in eval_questions3:
    with tru_recorder as recording:
        map_red_full_plain.invoke(question)

    rec = recording.get()

    from concurrent.futures import as_completed

    for feedback_future in  as_completed(rec.feedback_results):
        feedback = feedback_future.result()

        feedback: Feedback

    #display(feedback.name, feedback_result.result)

    records, feedback = tru.get_records_and_feedback(app_ids=['eval_question_map_red_full_plain'])

## Conversationalretriver chain 

In [None]:
tru_recorder = TruChain(map_red_full_plain,
    app_id= "conversationalchain",
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness,f_conciseness,f_helpfulness])#,f_comprensiveness])

for question in eval_questions1:
    with tru_recorder as recording:
        map_red_full_plain.invoke(question)

    rec = recording.get()

    from concurrent.futures import as_completed

    for feedback_future in  as_completed(rec.feedback_results):
        feedback = feedback_future.result()

        feedback: Feedback

    #display(feedback.name, feedback_result.result)

    records, feedback = tru.get_records_and_feedback(app_ids=['eval_question_conversationalchain'])

In [None]:
tru_recorder = TruChain(map_red_full_plain,
    app_id= "conversationalchain",
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness,f_conciseness,f_helpfulness])#,f_comprensiveness])

for question in eval_questions2:
    with tru_recorder as recording:
        map_red_full_plain.invoke(question)

    rec = recording.get()

    from concurrent.futures import as_completed

    for feedback_future in  as_completed(rec.feedback_results):
        feedback = feedback_future.result()

        feedback: Feedback

    #display(feedback.name, feedback_result.result)

    records, feedback = tru.get_records_and_feedback(app_ids=['eval_question_conversationalchain'])

In [None]:
tru_recorder = TruChain(map_red_full_plain,
    app_id= "conversationalchain",
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness,f_conciseness,f_helpfulness])#,f_comprensiveness])

for question in eval_questions3:
    with tru_recorder as recording:
        map_red_full_plain.invoke(question)

    rec = recording.get()

    from concurrent.futures import as_completed

    for feedback_future in  as_completed(rec.feedback_results):
        feedback = feedback_future.result()

        feedback: Feedback

    #display(feedback.name, feedback_result.result)

    records, feedback = tru.get_records_and_feedback(app_ids=['eval_question_conversationalchain'])

--- 

In [None]:
# multiple models 

models = {
    "rqa_basic_full_plain": rqa_basic_full_plain}

#SET UP MODELS 

# Function to invoke models
def invoke_model_with_inputs(model, inputs):
    print(f"🤖 starting execution of the model: {model}") 
    result = models[model].invoke(inputs)
    return result


for model_name,model_instance in models.items():
    tru_recorder = TruChain(model_name,
        app_id= model_instance,
        feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness,f_conciseness,f_helpfulness])#,f_comprensiveness])

    for question in eval_questions:
        with tru_recorder as recording:
            invoke_model_with_inputs(model, question)

        rec = recording.get()

        from concurrent.futures import as_completed

        for feedback_future in  as_completed(rec.feedback_results):
            feedback = feedback_future.result()

            feedback: Feedback

        #display(feedback.name, feedback_result.result)

        records, feedback = tru.get_records_and_feedback(app_ids=[])

### See in Dashboard
For reference see the following [link](https://www.trulens.org/trulens_eval/api/tru/#trulens_eval.trulens_eval.tru.Tru)
def run_dashboard(
        self,
        port: Optional[int] = 8501,
        address: Optional[str] = None,
        force: bool = False,
        _dev: Optional[Path] = None
    ) -> Process:
        """
        Run a streamlit dashboard to view logged results and apps.

        Args:
            - port: int: port number to pass to streamlit through server.port.

In [None]:
#troubleshooting 
#!pip uninstall trulens_eval -y # to remove existing PyPI version
#!pip install git+https://github.com/truera/trulens#subdirectory=trulens_eval

In [None]:
from trulens_eval import TruChain, Feedback, Huggingface, Tru
from trulens_eval.schema import FeedbackResult
tru = Tru()
#tru.reset_database()

In [None]:
tru.run_dashboard() # open a local streamlit app to explore 

In [None]:
tru.stop_dashboard(force = True) # stop if needed

### Text Dashboard

### 3.2 Retrieve records and feedback Trulens + Langchain

In [None]:
# The results of the feedback functions can be rertireved from the record. These
# are `Future` instances (see `concurrent.futures`). You can use `as_completed`
# to wait until they have finished evaluating.

from concurrent.futures import as_completed

for feedback_future in  as_completed(rec.feedback_results):
    feedback = feedback_future.result()

    feedback: Feedback

    display(feedback.name)


In [None]:
#set the row 
n = 0

print("Question:"+ records.iloc[n].input + str("\n") + "Answer:" + records.iloc[n].output)
print("---------------------" + "\n" + "EVALUATION:")

print("\n")
print("qs_relevance_with_cot_reasons")
print("SCORE: " + str(records.iloc[n].qs_relevance_with_cot_reasons) +" | " + records.qs_relevance_with_cot_reasons_calls[n][0]['meta']['reason'])

print("\n")
print("groundedness_measure_with_cot_reasons")
print("SCORE: " + str(records.iloc[n].groundedness_measure_with_cot_reasons) +" | " + records.groundedness_measure_with_cot_reasons[n][0]['meta']['reason'])


print("\n")
print("conciseness_with_cot_reasons")
print("SCORE: " + str(records.iloc[n].conciseness_with_cot_reasons) +" | " + records.conciseness_with_cot_reasons[n][0]['meta']['reason'])

print("\n")
print("helpfulness_with_cot_reasons")
print("SCORE: " + str(records.iloc[n].helpfulness_with_cot_reasons) +" | " + records.helpfulness_with_cot_reasons[n][0]['meta']['reason'])


In [None]:
print("\n")
print("comprehensiveness_with_cot_reasons")
print("SCORE: " + str(records.iloc[n].comprehensiveness_with_cot_reasons) +" | " + records.comprehensiveness_with_cot_reasons[n][0]['meta']['reason'])


In [None]:
print("\n")
print("groundedness_measure_with_cot_reasons")
print("SCORE: " + str(records.iloc[n].groundedness_measure_with_cot_reasons))
     # +" | " + 

In [None]:
records.groundedness_measure_with_cot_reasons[0]

In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=[])

records.head()

In [None]:
tru.get_leaderboard(app_ids=[])

### Readable evaluations

In [None]:
[0][0]['meta']['reason']

### 3.3. Multiple questions evaluations

In [None]:
eval_questions = []
with open('eval_questions.txt', 'r') as file:
    for line in file:
        # Remove newline character and convert to integer
        item = line.strip()
        print(item)
        eval_questions.append(item)

In [None]:
for question in eval_questions:
    with tru_recorder as recording:
        rag_chain.invoke(question)

In [None]:
self_retriever.invoke("Vorrei prenotare un alloggio a tariffa intera per l'a.a. 2023-24. Come posso procedere?")

In [None]:
for question in eval_questions:
    with tru_recorder3 as recording:
        self_retriever.invoke(question)
        
        #__record__.app.first.steps.context.first.get_relevant_documents

In [None]:
for question in eval_questions:
    with tru_recorder4 as recording:
        self_retriever.invoke(question)

In [None]:
for question in eval_questions:
    with tru_recorder2 as recording:
        rag_chain_compressed.invoke(question)

In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=[])
records.head()

In [None]:
import pandas as pd

pd.set_option("display.max_colwidth", None)
records[["input", "output"] + feedback]

In [None]:
tru.get_leaderboard(app_ids=[])

In [None]:
# the recorder is initialized as prebuilt we will need some more lessons to undertand how to actually implemet 

# ---

# Chatbot

## Save/Load
- [save](https://api.python.langchain.com/en/latest/chains/langchain.chains.retrieval_qa.base.RetrievalQA.html#:~:text=save(file_path%3A%20Union%5BPath%2C%20str%5D)%20%E2%86%92%20None%C2%B6)
- [load](https://api.python.langchain.com/en/latest/chains/langchain.chains.loading.load_chain.html#langchain.chains.loading.load_chain)

In [None]:
rqa_basic_full_ref.save(file_path="models/rqa_basic_full_ref.yaml")


In [None]:
import json
from pathlib import Path
from typing import Any, Union

import yaml

In [None]:
new_chain = RetrievalQA.load("models/rqa_basic_full_ref.yaml")

In [None]:
a = langchain.chains.loading.load_chain("models/rqa_basic_full_ref.yaml", retriever=ret_full_ref)


In [None]:
a = langchain.chains.loading.load_chain_from_file("models/rqa_basic_full_ref.yaml")

In [None]:
import 

In [None]:
import langchain.chains.loading

In [None]:
langchain.__version__

In [None]:
!sudo pip install langchain --upgrade

### 2. MODEL COMPARISONS 

#### 2.1. COMPARISON ON RETRIVALQA 
We will use ceteris paribus for evaluating which on is the best model. 
We will choose the best model in Retrival_QA setting. 
We want to investigate: 
1. HOW DO THE MODEL PERFORM 

#### Trulens troubleshooting

In [None]:
# terminal commands 
conda activate aienv
cd Finetuning/BOT_V3_Langchain  
# PORT problem solved by chainging the port number in /Users/valedipalo/miniforge3/lib/python3.9/site-packages/tru.py 

Alternatively, you can run `trulens-eval` from a command line in the same folder to start the dashboard.

Note: Feedback functions evaluated in the deferred manner can be seen in the "Progress" page of the TruLens dashboard.


# 3. Comparison
## 3.1. Amount of data 
How do the model perform based on the amount of datas we are giving into. 
To do so we will evaluate with a basic retriever 
1. rqa_basic_house_ref VS rqa_basic_full_ref 
2. rqa_basic_house_plain VS rqa_basic_full_plain 

## 3.2 Importance of Data Cleaning 
We will evaluate how much structuring data is relevant for the models to properly work. 
To do so we will evaluate the performance for 
1. rqa_basic_house_ref VS rqa_basic_house_plain
2. rqa_basic_full_ref VS rqa_basic_full_plain

## 3.3 How much is important the retriever 
The same tests will be done with the self retriever to evaluate if it is performing better or worse than the Basic one. 

## 3.4. Different chains 
Once determined the bes scoring from previous test, we will evaluate different chains and how do they perform using 
- RetrivalQA
- MAPreduce 
- MAPrerank 

# determining eval question 

In [None]:
rqa_basic_full_plain.invoke

In [None]:
# TEST EVAL_QUESTIONS 

In [None]:

import pandas as pd

# Function to invoke models
def invoke_model_with_inputs(models,model, inputs):
    print(f"🤖 starting execution of the model: {model}") 
    result = models[model].invoke(inputs)
    return result

# Map model names to instances
models_basic = {
    "rqa_basic_house_ref": rqa_basic_house_ref,
    "rqa_basic_house_plain": rqa_basic_house_plain,
    "rqa_basic_full_ref": rqa_basic_full_ref,
    "rqa_basic_full_plain": rqa_basic_full_plain,
}

models_self = {
    "rqa_self_house_ref": rqa_self_house_ref,
    "rqa_self_house_plain": rqa_self_house_plain,
    "rqa_self_full_ref": rqa_self_full_ref,
    "rqa_self_full_plain": rqa_self_full_plain,
}

In [None]:
# define the question to ask to the model
question = "Does Bocconi has a Medical Center?" 

# Initialize an empty list to collect data
data = []

# Iterate over your models dictionary and invoke them
for model_name, model_instance in models_basic.items():
    # Invoke the model with a question and get the result
    result = invoke_model_with_inputs(models_basic,model_name, question)
    print(result)#
    print("--")#

    # Extract the question and answer from the result
    question_asked = result["query"]
    answer_received = result["result"]
    
    # Append a dictionary with model name, question, and answer to the data list
    data.append({"Model Name": model_name, "Question": question_asked, "Answer": answer_received})

# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
df

# ---
# Model appendix 

In [None]:
#PATHS 
path_fees_r = "../../Data/New/Markdown/Fees_reformat.md"
path_full_r = "../../Data/New/Markdown/Full_reformat.md"
path_funding_r = "../../Data/New/Markdown/Funding_reformat.md"
path_housing_r = "../../Data/New/Markdown/Housing_reformat.md"
path_oth_r = "../../Data/New/Markdown/Library-Freemover_DD_reformat.md"
#plain 
path_fees_p = "../../Data/New/Markdown/Fees_plain.md"
path_funding_p = "../../Data/New/Markdown/Funding_plain.md"
path_housing_p = "../../Data/New/Markdown/Housing_plain.md"
path_exc_p = "../../Data/New/Markdown/Incoming-Exc_plain.md"
path_oth_p = "../../Data/New/Markdown/Library-Freemover-DD_plain.md"

In [None]:
headers_to_split_on_reformat = [
    ("#", "Category"),
    ("##", "Subcategory"),
    ("###", "Question"),
    ("####", "Subquestion"),
    ("#####", "Subsubquestion"),
    ("######", "URL"),
    ("#######","ID"), ]

### Vector store and basic retrievers
In this section there are models that probably are not necessary given the way in which retrievers work. 

In [None]:
with open(path_full_r, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_reformat)
splits = markdown_splitter.split_text(markdown_content)
vs_full_ref = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_full_ref = vs_full_ref.as_retriever()

#### Refined

In [None]:
# oth

with open(path_housing_r, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_reformat)
splits = markdown_splitter.split_text(markdown_content)
vs_house_ref = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_house_ref = vs_house_ref.as_retriever()
###

with open(path_fees_r, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_reformat)
splits = markdown_splitter.split_text(markdown_content)
vs_fees_ref = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_fees_ref = vs_fees_ref.as_retriever()

with open(path_funding_r, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_reformat)
splits = markdown_splitter.split_text(markdown_content)
vs_funding_ref = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_funding_ref = vs_funding_ref.as_retriever()

with open(path_oth_r, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_reformat)
splits = markdown_splitter.split_text(markdown_content)
vs_oth_ref = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_oth_ref = vs_oth_ref.as_retriever()

#### Plain vector stores 

In [None]:
#oth 

with open(path_housing_p, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_plain)
splits = markdown_splitter.split_text(markdown_content)
vs_house_plain = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_house_plain = vs_house_plain.as_retriever()

with open(path_fees_p, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_plain)
splits = markdown_splitter.split_text(markdown_content)
vs_fees_plain = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_fees_plain = vs_fees_plain.as_retriever()

with open(path_funding_p, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_plain)
splits = markdown_splitter.split_text(markdown_content)
vs_funding_plain = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_fudning_plain = vs_funding_plain.as_retriever()

with open(path_exc_p, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_plain)
splits = markdown_splitter.split_text(markdown_content)
vs_exc_plain = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_exc_plain = vs_exc_plain.as_retriever()


with open(path_oth_p, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_plain)
splits = markdown_splitter.split_text(markdown_content)
vs_oth_plain = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_oth_plain = vs_oth_plain.as_retriever()

### Self retrievers

In [None]:
# self_full_ref
metadata_field_info_ref = [
    AttributeInfo(
        name="Category",
        description="a primary category or a general topic. It introduces the broader theme under which more specific information is grouped. In a retrieval task, it acts as the first level of data filtering or organization, offering a broad overview of the context or subject area.",
        type="string",
    ),
    AttributeInfo(
        name="Subcategory",
        description="This is a subtheme or subcategory of Header 1. It provides a further level of detail, focusing on a specific aspect of the main theme. It serves to refine the search or understanding within the general topic defined by Header 1, guiding the user towards more targeted information.",
        type="string",
    ),
    AttributeInfo(
        name="Question",
        description="This represents an even more specific subdivision of Header 2. This level contains the actual question. In a retrieval task, this header helps to focus on a very specific question, making the search even more targeted. ",
        type="string",
    ),
    AttributeInfo(
        name="Subquestion",
        description="For questions which are represented by multiple section,it serves to direct the user or the retrieval system towards a highly detailed and specific answer or information. It's the level that directly responds to the user's questions or needs. Oftentime is defined as General as a placeholder. ",
        type="string",
    ),
        AttributeInfo(
        name="Subsubquestion",
        description="This is the most specific level, is used in case of further and specific details. In most of the cases is defined as general as a placeholder",
        type="string",
    ),
        AttributeInfo(
        name="URL",
        description="A reference to the URL from which the Question has been obtained. It is not relevant in any way for retrieving",
        type="string",
    ),
    AttributeInfo(
        name="ID",
        description="A reference to the specific question. It is not relevant in any way for retrieving",
        type="string",
    ),
]

self_full_ref = SelfQueryRetriever.from_llm(
    llm,
    vs_full_ref,
    document_content_description, #
    metadata_field_info_ref,          #
    verbose= True
)

In [None]:
self_house_ref = SelfQueryRetriever.from_llm(
    llm,
    vs_house_ref,
    document_content_description, #
    metadata_field_info,          #
    verbose= True
)
self_house_plain = SelfQueryRetriever.from_llm(
    llm,
    vs_house_plain,
    document_content_description, #
    metadata_field_info,          #
    verbose= True
)

##### 2.1.2 RETRIVAL_QA BASIC

In [None]:
rqa_basic_house_ref = RetrievalQA.from_chain_type(
    llm,
    retriever=ret_house_ref,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
    tags = ["base", "house","refined"]
)

rqa_basic_house_plain = RetrievalQA.from_chain_type(
    llm,
    retriever=ret_house_plain,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

##### 2.1.2 RETRIVAL_QA SELF 

In [None]:
#
rqa_self_house_ref = RetrievalQA.from_chain_type(
    llm,
    retriever=self_house_ref,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
)

rqa_self_house_plain = RetrievalQA.from_chain_type(
    llm,
    retriever=self_house_plain,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

rqa_self_full_ref = RetrievalQA.from_chain_type(
    llm,
    retriever=self_full_ref,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

rqa_self_full_plain = RetrievalQA.from_chain_type(
    llm,
    retriever=self_full_plain,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

# --- 

### 1.7. Langchain evaluation 
To access the results from the dashboard you can use the folowing [link](https://smith.langchain.com/o/917d7cd4-4420-5477-8a36-902a60673259/projects?paginationState=%7B%22pageIndex%22%3A0%2C%22pageSize%22%3A10%7D&chartedColumn=latency_p50)

#### 1.7.1. Single question eval

In [None]:
rqa_self.invoke("Who are the student representatives?")

## 2. Memory and Sourcing 

### 2.1. Memory 

In [None]:
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True 
)

from langchain.chains import ConversationalRetrievalChain
retriever=vectorstore.as_retriever()
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory
)

In [None]:
# New type of chain: It adds a new bit on top that allows for keeping chat history and new question creating a ew standalone question  
from langchain.chains import ConversationalRetrievalChain
retriever=vectorstore.as_retriever()
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory
)

In [None]:
question = "Quali sono le dotazioni disponibili all'interno delle camere? "
result = qa({"question": question})

In [None]:
result["answer"] 

In [None]:
question = "Per quanto riguarda la cucina?"
result = qa({"question": question})

In [None]:
result["answer"] 

In [None]:
question = "Sono quindi comuni?"
result = qa({"question": question})

In [None]:
result["answer"] 

In [None]:
# Comparison with model with no memory 


In [None]:
qa_chain1.invoke(question)

### 2.2. Sourcing 
https://python.langchain.com/docs/use_cases/question_answering/sources

In [None]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
from langchain_core.runnables import RunnableParallel

rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain_with_source = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
).assign(answer=rag_chain_from_docs)

rag_chain_with_source.invoke("Cosa troverà nella stanza in residenza? ")

## 3. Initialize Feedback Function(s)
For iterations over different models
N.B. in case of problems refer to the langchain_quickstart in this folder, or to: [Optimize RAG application - Trulens](https://colab.research.google.com/drive/1bjplY8jIUYtkiKzM4tXmZ5U5U10BaiCd)

In [None]:
from trulens_eval import TruChain, Feedback, Huggingface, Tru
from trulens_eval.schema import FeedbackResult
tru = Tru()
tru.reset_database()

In [None]:
from trulens_eval.feedback.provider import OpenAI
import numpy as np

# Initialize provider class
openai = OpenAI()

# select context to be used in feedback. the location of context is app specific.
from trulens_eval.app import App
context = App.select_context(rqa_base)

from trulens_eval.feedback import Groundedness
grounded = Groundedness(groundedness_provider=OpenAI())
# Define a groundedness feedback function
f_groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons)
    .on(context.collect()) # collect context chunks into a list
    .on_output()
    .aggregate(grounded.grounded_statements_aggregator)
)

# Question/answer relevance between overall question and answer.
f_qa_relevance = Feedback(openai.relevance).on_input_output()
# Question/statement relevance between question and each context chunk.
f_context_relevance = (
    Feedback(openai.qs_relevance)
    .on_input()
    .on(context)
    .aggregate(np.mean)
    )

### 3.1 Instrument chain for logging with TruLens


In [None]:
#OK 
tru_recorder = TruChain(rqa_base,
    app_id='Chain1_ChatApplication',
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness])

In [None]:
with tru_recorder as recording:
    llm_response = rqa_base.invoke("Come funziona l'ingresso in residenza")

print(llm_response)

In [None]:
rqa_base.invoke("Come funziona l'ingresso in residenza")

In [None]:
tru_recorder2 = TruChain(rqa_compressed,
    app_id='Chain2_ChatApplication',
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness])

with tru_recorder2 as recording:
    llm_response = rqa_compressed.invoke("What is the purpose of the source?")

display(llm_response)

In [None]:


tru_recorder3 = TruChain(self_retriever,
    app_id='ChainSelf_ChatApplication',
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness])

tru_recorder4 = TruChain(multi_retriever,
    app_id='Chainmulti_ChatApplication',
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness])



### 3.2 Retrieve records and feedback (single question) 

In [None]:
# The record of the app invocation can be retrieved from the `recording`:

rec = recording.get() # use .get if only one record
#recs = recording.records # use .records if multiple

#display(rec)

In [None]:
# The results of the feedback functions can be rertireved from the record. These
# are `Future` instances (see `concurrent.futures`). You can use `as_completed`
# to wait until they have finished evaluating.

from concurrent.futures import as_completed

for feedback_future in  as_completed(rec.feedback_results):
    feedback, feedback_result = feedback_future.result()

    feedback: Feedback
    feedbac_result: FeedbackResult

    display(feedback.name, feedback_result.result)


In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=["Chain1_ChatApplication"])

records.head()

In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=["Chain2_ChatApplication"])

records.head()

In [None]:
tru.get_leaderboard(app_ids=[])

### 3.3. Multiple questions evaluations

In [None]:
eval_questions = []
with open('eval_questions.txt', 'r') as file:
    for line in file:
        # Remove newline character and convert to integer
        item = line.strip()
        print(item)
        eval_questions.append(item)

In [None]:
for question in eval_questions:
    with tru_recorder as recording:
        rag_chain.invoke(question)

In [None]:
self_retriever.invoke("Vorrei prenotare un alloggio a tariffa intera per l'a.a. 2023-24. Come posso procedere?")

In [None]:
for question in eval_questions:
    with tru_recorder3 as recording:
        self_retriever.invoke(question)
        
        #__record__.app.first.steps.context.first.get_relevant_documents

In [None]:
for question in eval_questions:
    with tru_recorder4 as recording:
        self_retriever.invoke(question)

In [None]:
for question in eval_questions:
    with tru_recorder2 as recording:
        rag_chain_compressed.invoke(question)

In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=[])
records.head()

In [None]:
import pandas as pd

pd.set_option("display.max_colwidth", None)
records[["input", "output"] + feedback]

In [None]:
tru.get_leaderboard(app_ids=[])

### 3.4. Explore in a Dashboard
For reference see the following [link](https://www.trulens.org/trulens_eval/api/tru/#trulens_eval.trulens_eval.tru.Tru)
def run_dashboard(
        self,
        port: Optional[int] = 8501,
        address: Optional[str] = None,
        force: bool = False,
        _dev: Optional[Path] = None
    ) -> Process:
        """
        Run a streamlit dashboard to view logged results and apps.

        Args:
            - port: int: port number to pass to streamlit through server.port.

In [None]:
tru = Tru()
#tru.reset_database()
tru.run_dashboard(port = 8503) # open a local streamlit app to explore

# tru.stop_dashboard() # stop if needed

In [None]:
conda activate aienv
cd Finetuning/BOT_V3_Langchain  
# PORT problem solved by chainging the port number in tru.py 

Alternatively, you can run `trulens-eval` from a command line in the same folder to start the dashboard.

Note: Feedback functions evaluated in the deferred manner can be seen in the "Progress" page of the TruLens dashboard.


In [None]:
# the recorder is initialized as prebuilt we will need some more lessons to undertand how to actually implemet 


In [None]:
rqa_basic_full_ref.save(file_path="models/rqa_basic_full_ref.yaml")


In [None]:
import json
from pathlib import Path
from typing import Any, Union

import yaml

In [None]:
new_chain = RetrievalQA.load("models/rqa_basic_full_ref.yaml")

In [None]:
a = langchain.chains.loading.load_chain("models/rqa_basic_full_ref.yaml", retriever=ret_full_ref)


In [None]:
a = langchain.chains.loading.load_chain_from_file("models/rqa_basic_full_ref.yaml")

In [None]:
import 

In [None]:
import langchain.chains.loading

In [None]:
langchain.__version__

In [None]:
!sudo pip install langchain --upgrade

### 2. MODEL COMPARISONS 

#### 2.1. COMPARISON ON RETRIVALQA 
We will use ceteris paribus for evaluating which on is the best model. 
We will choose the best model in Retrival_QA setting. 
We want to investigate: 
1. HOW DO THE MODEL PERFORM 

#### Trulens troubleshooting

In [None]:
# terminal commands 
conda activate aienv
cd Finetuning/BOT_V3_Langchain  
# PORT problem solved by chainging the port number in /Users/valedipalo/miniforge3/lib/python3.9/site-packages/tru.py 

Alternatively, you can run `trulens-eval` from a command line in the same folder to start the dashboard.

Note: Feedback functions evaluated in the deferred manner can be seen in the "Progress" page of the TruLens dashboard.


# 3. Comparison
## 3.1. Amount of data 
How do the model perform based on the amount of datas we are giving into. 
To do so we will evaluate with a basic retriever 
1. rqa_basic_house_ref VS rqa_basic_full_ref 
2. rqa_basic_house_plain VS rqa_basic_full_plain 

## 3.2 Importance of Data Cleaning 
We will evaluate how much structuring data is relevant for the models to properly work. 
To do so we will evaluate the performance for 
1. rqa_basic_house_ref VS rqa_basic_house_plain
2. rqa_basic_full_ref VS rqa_basic_full_plain

## 3.3 How much is important the retriever 
The same tests will be done with the self retriever to evaluate if it is performing better or worse than the Basic one. 

## 3.4. Different chains 
Once determined the bes scoring from previous test, we will evaluate different chains and how do they perform using 
- RetrivalQA
- MAPreduce 
- MAPrerank 

# determining eval question 

In [None]:
rqa_basic_full_plain.invoke

In [None]:
# TEST EVAL_QUESTIONS 

In [None]:

import pandas as pd

# Function to invoke models
def invoke_model_with_inputs(models,model, inputs):
    print(f"🤖 starting execution of the model: {model}") 
    result = models[model].invoke(inputs)
    return result

# Map model names to instances
models_basic = {
    "rqa_basic_house_ref": rqa_basic_house_ref,
    "rqa_basic_house_plain": rqa_basic_house_plain,
    "rqa_basic_full_ref": rqa_basic_full_ref,
    "rqa_basic_full_plain": rqa_basic_full_plain,
}

models_self = {
    "rqa_self_house_ref": rqa_self_house_ref,
    "rqa_self_house_plain": rqa_self_house_plain,
    "rqa_self_full_ref": rqa_self_full_ref,
    "rqa_self_full_plain": rqa_self_full_plain,
}

In [None]:
# define the question to ask to the model
question = "Does Bocconi has a Medical Center?" 

# Initialize an empty list to collect data
data = []

# Iterate over your models dictionary and invoke them
for model_name, model_instance in models_basic.items():
    # Invoke the model with a question and get the result
    result = invoke_model_with_inputs(models_basic,model_name, question)
    print(result)#
    print("--")#

    # Extract the question and answer from the result
    question_asked = result["query"]
    answer_received = result["result"]
    
    # Append a dictionary with model name, question, and answer to the data list
    data.append({"Model Name": model_name, "Question": question_asked, "Answer": answer_received})

# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
df

# ---
# Model appendix 

In [None]:
#PATHS 
path_fees_r = "../../Data/New/Markdown/Fees_reformat.md"
path_full_r = "../../Data/New/Markdown/Full_reformat.md"
path_funding_r = "../../Data/New/Markdown/Funding_reformat.md"
path_housing_r = "../../Data/New/Markdown/Housing_reformat.md"
path_oth_r = "../../Data/New/Markdown/Library-Freemover_DD_reformat.md"
#plain 
path_fees_p = "../../Data/New/Markdown/Fees_plain.md"
path_funding_p = "../../Data/New/Markdown/Funding_plain.md"
path_housing_p = "../../Data/New/Markdown/Housing_plain.md"
path_exc_p = "../../Data/New/Markdown/Incoming-Exc_plain.md"
path_oth_p = "../../Data/New/Markdown/Library-Freemover-DD_plain.md"

In [None]:
headers_to_split_on_reformat = [
    ("#", "Category"),
    ("##", "Subcategory"),
    ("###", "Question"),
    ("####", "Subquestion"),
    ("#####", "Subsubquestion"),
    ("######", "URL"),
    ("#######","ID"), ]

### Vector store and basic retrievers
In this section there are models that probably are not necessary given the way in which retrievers work. 

In [None]:
with open(path_full_r, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_reformat)
splits = markdown_splitter.split_text(markdown_content)
vs_full_ref = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_full_ref = vs_full_ref.as_retriever()

#### Refined

In [None]:
# oth

with open(path_housing_r, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_reformat)
splits = markdown_splitter.split_text(markdown_content)
vs_house_ref = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_house_ref = vs_house_ref.as_retriever()
###

with open(path_fees_r, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_reformat)
splits = markdown_splitter.split_text(markdown_content)
vs_fees_ref = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_fees_ref = vs_fees_ref.as_retriever()

with open(path_funding_r, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_reformat)
splits = markdown_splitter.split_text(markdown_content)
vs_funding_ref = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_funding_ref = vs_funding_ref.as_retriever()

with open(path_oth_r, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_reformat)
splits = markdown_splitter.split_text(markdown_content)
vs_oth_ref = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_oth_ref = vs_oth_ref.as_retriever()

#### Plain vector stores 

In [None]:
#oth 

with open(path_housing_p, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_plain)
splits = markdown_splitter.split_text(markdown_content)
vs_house_plain = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_house_plain = vs_house_plain.as_retriever()

with open(path_fees_p, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_plain)
splits = markdown_splitter.split_text(markdown_content)
vs_fees_plain = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_fees_plain = vs_fees_plain.as_retriever()

with open(path_funding_p, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_plain)
splits = markdown_splitter.split_text(markdown_content)
vs_funding_plain = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_fudning_plain = vs_funding_plain.as_retriever()

with open(path_exc_p, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_plain)
splits = markdown_splitter.split_text(markdown_content)
vs_exc_plain = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_exc_plain = vs_exc_plain.as_retriever()


with open(path_oth_p, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_plain)
splits = markdown_splitter.split_text(markdown_content)
vs_oth_plain = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_oth_plain = vs_oth_plain.as_retriever()

### Self retrievers

In [None]:
# self_full_ref
metadata_field_info_ref = [
    AttributeInfo(
        name="Category",
        description="a primary category or a general topic. It introduces the broader theme under which more specific information is grouped. In a retrieval task, it acts as the first level of data filtering or organization, offering a broad overview of the context or subject area.",
        type="string",
    ),
    AttributeInfo(
        name="Subcategory",
        description="This is a subtheme or subcategory of Header 1. It provides a further level of detail, focusing on a specific aspect of the main theme. It serves to refine the search or understanding within the general topic defined by Header 1, guiding the user towards more targeted information.",
        type="string",
    ),
    AttributeInfo(
        name="Question",
        description="This represents an even more specific subdivision of Header 2. This level contains the actual question. In a retrieval task, this header helps to focus on a very specific question, making the search even more targeted. ",
        type="string",
    ),
    AttributeInfo(
        name="Subquestion",
        description="For questions which are represented by multiple section,it serves to direct the user or the retrieval system towards a highly detailed and specific answer or information. It's the level that directly responds to the user's questions or needs. Oftentime is defined as General as a placeholder. ",
        type="string",
    ),
        AttributeInfo(
        name="Subsubquestion",
        description="This is the most specific level, is used in case of further and specific details. In most of the cases is defined as general as a placeholder",
        type="string",
    ),
        AttributeInfo(
        name="URL",
        description="A reference to the URL from which the Question has been obtained. It is not relevant in any way for retrieving",
        type="string",
    ),
    AttributeInfo(
        name="ID",
        description="A reference to the specific question. It is not relevant in any way for retrieving",
        type="string",
    ),
]

self_full_ref = SelfQueryRetriever.from_llm(
    llm,
    vs_full_ref,
    document_content_description, #
    metadata_field_info_ref,          #
    verbose= True
)

In [None]:
self_house_ref = SelfQueryRetriever.from_llm(
    llm,
    vs_house_ref,
    document_content_description, #
    metadata_field_info,          #
    verbose= True
)
self_house_plain = SelfQueryRetriever.from_llm(
    llm,
    vs_house_plain,
    document_content_description, #
    metadata_field_info,          #
    verbose= True
)

##### 2.1.2 RETRIVAL_QA BASIC

In [None]:
rqa_basic_house_ref = RetrievalQA.from_chain_type(
    llm,
    retriever=ret_house_ref,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
    tags = ["base", "house","refined"]
)

rqa_basic_house_plain = RetrievalQA.from_chain_type(
    llm,
    retriever=ret_house_plain,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

##### 2.1.2 RETRIVAL_QA SELF 

In [None]:
#
rqa_self_house_ref = RetrievalQA.from_chain_type(
    llm,
    retriever=self_house_ref,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
)

rqa_self_house_plain = RetrievalQA.from_chain_type(
    llm,
    retriever=self_house_plain,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

rqa_self_full_ref = RetrievalQA.from_chain_type(
    llm,
    retriever=self_full_ref,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

rqa_self_full_plain = RetrievalQA.from_chain_type(
    llm,
    retriever=self_full_plain,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

# --- 

### 1.7. Langchain evaluation 
To access the results from the dashboard you can use the folowing [link](https://smith.langchain.com/o/917d7cd4-4420-5477-8a36-902a60673259/projects?paginationState=%7B%22pageIndex%22%3A0%2C%22pageSize%22%3A10%7D&chartedColumn=latency_p50)

#### 1.7.1. Single question eval

In [None]:
rqa_self.invoke("Who are the student representatives?")

## 2. Memory and Sourcing 

### 2.1. Memory 

In [None]:
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True 
)

from langchain.chains import ConversationalRetrievalChain
retriever=vectorstore.as_retriever()
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory
)

In [None]:
# New type of chain: It adds a new bit on top that allows for keeping chat history and new question creating a ew standalone question  
from langchain.chains import ConversationalRetrievalChain
retriever=vectorstore.as_retriever()
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory
)

In [None]:
question = "Quali sono le dotazioni disponibili all'interno delle camere? "
result = qa({"question": question})

In [None]:
result["answer"] 

In [None]:
question = "Per quanto riguarda la cucina?"
result = qa({"question": question})

In [None]:
result["answer"] 

In [None]:
question = "Sono quindi comuni?"
result = qa({"question": question})

In [None]:
result["answer"] 

In [None]:
# Comparison with model with no memory 


In [None]:
qa_chain1.invoke(question)

### 2.2. Sourcing 
https://python.langchain.com/docs/use_cases/question_answering/sources

In [None]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
from langchain_core.runnables import RunnableParallel

rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain_with_source = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
).assign(answer=rag_chain_from_docs)

rag_chain_with_source.invoke("Cosa troverà nella stanza in residenza? ")

## 3. Initialize Feedback Function(s)
For iterations over different models
N.B. in case of problems refer to the langchain_quickstart in this folder, or to: [Optimize RAG application - Trulens](https://colab.research.google.com/drive/1bjplY8jIUYtkiKzM4tXmZ5U5U10BaiCd)

In [None]:
from trulens_eval import TruChain, Feedback, Huggingface, Tru
from trulens_eval.schema import FeedbackResult
tru = Tru()
tru.reset_database()

In [None]:
from trulens_eval.feedback.provider import OpenAI
import numpy as np

# Initialize provider class
openai = OpenAI()

# select context to be used in feedback. the location of context is app specific.
from trulens_eval.app import App
context = App.select_context(rqa_base)

from trulens_eval.feedback import Groundedness
grounded = Groundedness(groundedness_provider=OpenAI())
# Define a groundedness feedback function
f_groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons)
    .on(context.collect()) # collect context chunks into a list
    .on_output()
    .aggregate(grounded.grounded_statements_aggregator)
)

# Question/answer relevance between overall question and answer.
f_qa_relevance = Feedback(openai.relevance).on_input_output()
# Question/statement relevance between question and each context chunk.
f_context_relevance = (
    Feedback(openai.qs_relevance)
    .on_input()
    .on(context)
    .aggregate(np.mean)
    )

### 3.1 Instrument chain for logging with TruLens


In [None]:
#OK 
tru_recorder = TruChain(rqa_base,
    app_id='Chain1_ChatApplication',
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness])

In [None]:
with tru_recorder as recording:
    llm_response = rqa_base.invoke("Come funziona l'ingresso in residenza")

print(llm_response)

In [None]:
rqa_base.invoke("Come funziona l'ingresso in residenza")

In [None]:
tru_recorder2 = TruChain(rqa_compressed,
    app_id='Chain2_ChatApplication',
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness])

with tru_recorder2 as recording:
    llm_response = rqa_compressed.invoke("What is the purpose of the source?")

display(llm_response)

In [None]:


tru_recorder3 = TruChain(self_retriever,
    app_id='ChainSelf_ChatApplication',
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness])

tru_recorder4 = TruChain(multi_retriever,
    app_id='Chainmulti_ChatApplication',
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness])



### 3.2 Retrieve records and feedback (single question) 

In [None]:
# The record of the app invocation can be retrieved from the `recording`:

rec = recording.get() # use .get if only one record
#recs = recording.records # use .records if multiple

#display(rec)

In [None]:
# The results of the feedback functions can be rertireved from the record. These
# are `Future` instances (see `concurrent.futures`). You can use `as_completed`
# to wait until they have finished evaluating.

from concurrent.futures import as_completed

for feedback_future in  as_completed(rec.feedback_results):
    feedback, feedback_result = feedback_future.result()

    feedback: Feedback
    feedbac_result: FeedbackResult

    display(feedback.name, feedback_result.result)


In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=["Chain1_ChatApplication"])

records.head()

In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=["Chain2_ChatApplication"])

records.head()

In [None]:
tru.get_leaderboard(app_ids=[])

### 3.3. Multiple questions evaluations

In [None]:
eval_questions = []
with open('eval_questions.txt', 'r') as file:
    for line in file:
        # Remove newline character and convert to integer
        item = line.strip()
        print(item)
        eval_questions.append(item)

In [None]:
for question in eval_questions:
    with tru_recorder as recording:
        rag_chain.invoke(question)

In [None]:
self_retriever.invoke("Vorrei prenotare un alloggio a tariffa intera per l'a.a. 2023-24. Come posso procedere?")

In [None]:
for question in eval_questions:
    with tru_recorder3 as recording:
        self_retriever.invoke(question)
        
        #__record__.app.first.steps.context.first.get_relevant_documents

In [None]:
for question in eval_questions:
    with tru_recorder4 as recording:
        self_retriever.invoke(question)

In [None]:
for question in eval_questions:
    with tru_recorder2 as recording:
        rag_chain_compressed.invoke(question)

In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=[])
records.head()

In [None]:
import pandas as pd

pd.set_option("display.max_colwidth", None)
records[["input", "output"] + feedback]

In [None]:
tru.get_leaderboard(app_ids=[])

### 3.4. Explore in a Dashboard
For reference see the following [link](https://www.trulens.org/trulens_eval/api/tru/#trulens_eval.trulens_eval.tru.Tru)
def run_dashboard(
        self,
        port: Optional[int] = 8501,
        address: Optional[str] = None,
        force: bool = False,
        _dev: Optional[Path] = None
    ) -> Process:
        """
        Run a streamlit dashboard to view logged results and apps.

        Args:
            - port: int: port number to pass to streamlit through server.port.

In [None]:
tru = Tru()
#tru.reset_database()
tru.run_dashboard(port = 8503) # open a local streamlit app to explore

# tru.stop_dashboard() # stop if needed

In [None]:
conda activate aienv
cd Finetuning/BOT_V3_Langchain  
# PORT problem solved by chainging the port number in tru.py 

Alternatively, you can run `trulens-eval` from a command line in the same folder to start the dashboard.

Note: Feedback functions evaluated in the deferred manner can be seen in the "Progress" page of the TruLens dashboard.


In [None]:
# the recorder is initialized as prebuilt we will need some more lessons to undertand how to actually implemet 


In [None]:
# The results of the feedback functions can be rertireved from the record. These
# are `Future` instances (see `concurrent.futures`). You can use `as_completed`
# to wait until they have finished evaluating.

from concurrent.futures import as_completed

for feedback_future in  as_completed(rec.feedback_results):
    feedback, feedback_result = feedback_future.result()

    feedback: Feedback
    feedbac_result: FeedbackResult

    display(feedback.name, feedback_result.result)


In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=["Chain1_ChatApplication"])

records.head()

In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=["Chain2_ChatApplication"])

records.head()

In [None]:
tru.get_leaderboard(app_ids=[])

### 3.3. Multiple questions evaluations

In [None]:
eval_questions = []
with open('eval_questions.txt', 'r') as file:
    for line in file:
        # Remove newline character and convert to integer
        item = line.strip()
        print(item)
        eval_questions.append(item)

In [None]:
for question in eval_questions:
    with tru_recorder as recording:
        rag_chain.invoke(question)

In [None]:
self_retriever.invoke("Vorrei prenotare un alloggio a tariffa intera per l'a.a. 2023-24. Come posso procedere?")

In [None]:
for question in eval_questions:
    with tru_recorder3 as recording:
        self_retriever.invoke(question)
        
        #__record__.app.first.steps.context.first.get_relevant_documents

In [None]:
for question in eval_questions:
    with tru_recorder4 as recording:
        self_retriever.invoke(question)

In [None]:
for question in eval_questions:
    with tru_recorder2 as recording:
        rag_chain_compressed.invoke(question)

In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=[])
records.head()

In [None]:
import pandas as pd

pd.set_option("display.max_colwidth", None)
records[["input", "output"] + feedback]

In [None]:
tru.get_leaderboard(app_ids=[])

### 3.4. Explore in a Dashboard
For reference see the following [link](https://www.trulens.org/trulens_eval/api/tru/#trulens_eval.trulens_eval.tru.Tru)
def run_dashboard(
        self,
        port: Optional[int] = 8501,
        address: Optional[str] = None,
        force: bool = False,
        _dev: Optional[Path] = None
    ) -> Process:
        """
        Run a streamlit dashboard to view logged results and apps.

        Args:
            - port: int: port number to pass to streamlit through server.port.

In [None]:
tru = Tru()
#tru.reset_database()
tru.run_dashboard(port = 8503) # open a local streamlit app to explore

# tru.stop_dashboard() # stop if needed

In [None]:
conda activate aienv
cd Finetuning/BOT_V3_Langchain  
# PORT problem solved by chainging the port number in tru.py 

Alternatively, you can run `trulens-eval` from a command line in the same folder to start the dashboard.

Note: Feedback functions evaluated in the deferred manner can be seen in the "Progress" page of the TruLens dashboard.


In [None]:
# the recorder is initialized as prebuilt we will need some more lessons to undertand how to actually implemet 


In [None]:
rqa_basic_full_ref.save(file_path="models/rqa_basic_full_ref.yaml")


In [None]:
import json
from pathlib import Path
from typing import Any, Union

import yaml

In [None]:
new_chain = RetrievalQA.load("models/rqa_basic_full_ref.yaml")

In [None]:
a = langchain.chains.loading.load_chain("models/rqa_basic_full_ref.yaml", retriever=ret_full_ref)


In [None]:
a = langchain.chains.loading.load_chain_from_file("models/rqa_basic_full_ref.yaml")

In [None]:
import 

In [None]:
import langchain.chains.loading

In [None]:
langchain.__version__

In [None]:
!sudo pip install langchain --upgrade

### 2. MODEL COMPARISONS 

#### 2.1. COMPARISON ON RETRIVALQA 
We will use ceteris paribus for evaluating which on is the best model. 
We will choose the best model in Retrival_QA setting. 
We want to investigate: 
1. HOW DO THE MODEL PERFORM 

#### Trulens troubleshooting

In [None]:
# terminal commands 
conda activate aienv
cd Finetuning/BOT_V3_Langchain  
# PORT problem solved by chainging the port number in /Users/valedipalo/miniforge3/lib/python3.9/site-packages/tru.py 

Alternatively, you can run `trulens-eval` from a command line in the same folder to start the dashboard.

Note: Feedback functions evaluated in the deferred manner can be seen in the "Progress" page of the TruLens dashboard.


# 3. Comparison
## 3.1. Amount of data 
How do the model perform based on the amount of datas we are giving into. 
To do so we will evaluate with a basic retriever 
1. rqa_basic_house_ref VS rqa_basic_full_ref 
2. rqa_basic_house_plain VS rqa_basic_full_plain 

## 3.2 Importance of Data Cleaning 
We will evaluate how much structuring data is relevant for the models to properly work. 
To do so we will evaluate the performance for 
1. rqa_basic_house_ref VS rqa_basic_house_plain
2. rqa_basic_full_ref VS rqa_basic_full_plain

## 3.3 How much is important the retriever 
The same tests will be done with the self retriever to evaluate if it is performing better or worse than the Basic one. 

## 3.4. Different chains 
Once determined the bes scoring from previous test, we will evaluate different chains and how do they perform using 
- RetrivalQA
- MAPreduce 
- MAPrerank 

# determining eval question 

In [None]:
rqa_basic_full_plain.invoke

In [None]:
# TEST EVAL_QUESTIONS 

In [None]:

import pandas as pd

# Function to invoke models
def invoke_model_with_inputs(models,model, inputs):
    print(f"🤖 starting execution of the model: {model}") 
    result = models[model].invoke(inputs)
    return result

# Map model names to instances
models_basic = {
    "rqa_basic_house_ref": rqa_basic_house_ref,
    "rqa_basic_house_plain": rqa_basic_house_plain,
    "rqa_basic_full_ref": rqa_basic_full_ref,
    "rqa_basic_full_plain": rqa_basic_full_plain,
}

models_self = {
    "rqa_self_house_ref": rqa_self_house_ref,
    "rqa_self_house_plain": rqa_self_house_plain,
    "rqa_self_full_ref": rqa_self_full_ref,
    "rqa_self_full_plain": rqa_self_full_plain,
}

In [None]:
# define the question to ask to the model
question = "Does Bocconi has a Medical Center?" 

# Initialize an empty list to collect data
data = []

# Iterate over your models dictionary and invoke them
for model_name, model_instance in models_basic.items():
    # Invoke the model with a question and get the result
    result = invoke_model_with_inputs(models_basic,model_name, question)
    print(result)#
    print("--")#

    # Extract the question and answer from the result
    question_asked = result["query"]
    answer_received = result["result"]
    
    # Append a dictionary with model name, question, and answer to the data list
    data.append({"Model Name": model_name, "Question": question_asked, "Answer": answer_received})

# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
df

# ---
# Model appendix 

In [None]:
#PATHS 
path_fees_r = "../../Data/New/Markdown/Fees_reformat.md"
path_full_r = "../../Data/New/Markdown/Full_reformat.md"
path_funding_r = "../../Data/New/Markdown/Funding_reformat.md"
path_housing_r = "../../Data/New/Markdown/Housing_reformat.md"
path_oth_r = "../../Data/New/Markdown/Library-Freemover_DD_reformat.md"
#plain 
path_fees_p = "../../Data/New/Markdown/Fees_plain.md"
path_funding_p = "../../Data/New/Markdown/Funding_plain.md"
path_housing_p = "../../Data/New/Markdown/Housing_plain.md"
path_exc_p = "../../Data/New/Markdown/Incoming-Exc_plain.md"
path_oth_p = "../../Data/New/Markdown/Library-Freemover-DD_plain.md"

In [None]:
headers_to_split_on_reformat = [
    ("#", "Category"),
    ("##", "Subcategory"),
    ("###", "Question"),
    ("####", "Subquestion"),
    ("#####", "Subsubquestion"),
    ("######", "URL"),
    ("#######","ID"), ]

### Vector store and basic retrievers
In this section there are models that probably are not necessary given the way in which retrievers work. 

In [None]:
with open(path_full_r, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_reformat)
splits = markdown_splitter.split_text(markdown_content)
vs_full_ref = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_full_ref = vs_full_ref.as_retriever()

#### Refined

In [None]:
# oth

with open(path_housing_r, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_reformat)
splits = markdown_splitter.split_text(markdown_content)
vs_house_ref = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_house_ref = vs_house_ref.as_retriever()
###

with open(path_fees_r, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_reformat)
splits = markdown_splitter.split_text(markdown_content)
vs_fees_ref = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_fees_ref = vs_fees_ref.as_retriever()

with open(path_funding_r, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_reformat)
splits = markdown_splitter.split_text(markdown_content)
vs_funding_ref = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_funding_ref = vs_funding_ref.as_retriever()

with open(path_oth_r, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_reformat)
splits = markdown_splitter.split_text(markdown_content)
vs_oth_ref = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_oth_ref = vs_oth_ref.as_retriever()

#### Plain vector stores 

In [None]:
#oth 

with open(path_housing_p, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_plain)
splits = markdown_splitter.split_text(markdown_content)
vs_house_plain = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_house_plain = vs_house_plain.as_retriever()

with open(path_fees_p, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_plain)
splits = markdown_splitter.split_text(markdown_content)
vs_fees_plain = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_fees_plain = vs_fees_plain.as_retriever()

with open(path_funding_p, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_plain)
splits = markdown_splitter.split_text(markdown_content)
vs_funding_plain = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_fudning_plain = vs_funding_plain.as_retriever()

with open(path_exc_p, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_plain)
splits = markdown_splitter.split_text(markdown_content)
vs_exc_plain = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_exc_plain = vs_exc_plain.as_retriever()


with open(path_oth_p, 'r') as file:
    markdown_content = file.read()

markdown_splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=headers_to_split_on_plain)
splits = markdown_splitter.split_text(markdown_content)
vs_oth_plain = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
ret_oth_plain = vs_oth_plain.as_retriever()

### Self retrievers

In [None]:
# self_full_ref
metadata_field_info_ref = [
    AttributeInfo(
        name="Category",
        description="a primary category or a general topic. It introduces the broader theme under which more specific information is grouped. In a retrieval task, it acts as the first level of data filtering or organization, offering a broad overview of the context or subject area.",
        type="string",
    ),
    AttributeInfo(
        name="Subcategory",
        description="This is a subtheme or subcategory of Header 1. It provides a further level of detail, focusing on a specific aspect of the main theme. It serves to refine the search or understanding within the general topic defined by Header 1, guiding the user towards more targeted information.",
        type="string",
    ),
    AttributeInfo(
        name="Question",
        description="This represents an even more specific subdivision of Header 2. This level contains the actual question. In a retrieval task, this header helps to focus on a very specific question, making the search even more targeted. ",
        type="string",
    ),
    AttributeInfo(
        name="Subquestion",
        description="For questions which are represented by multiple section,it serves to direct the user or the retrieval system towards a highly detailed and specific answer or information. It's the level that directly responds to the user's questions or needs. Oftentime is defined as General as a placeholder. ",
        type="string",
    ),
        AttributeInfo(
        name="Subsubquestion",
        description="This is the most specific level, is used in case of further and specific details. In most of the cases is defined as general as a placeholder",
        type="string",
    ),
        AttributeInfo(
        name="URL",
        description="A reference to the URL from which the Question has been obtained. It is not relevant in any way for retrieving",
        type="string",
    ),
    AttributeInfo(
        name="ID",
        description="A reference to the specific question. It is not relevant in any way for retrieving",
        type="string",
    ),
]

self_full_ref = SelfQueryRetriever.from_llm(
    llm,
    vs_full_ref,
    document_content_description, #
    metadata_field_info_ref,          #
    verbose= True
)

In [None]:
self_house_ref = SelfQueryRetriever.from_llm(
    llm,
    vs_house_ref,
    document_content_description, #
    metadata_field_info,          #
    verbose= True
)
self_house_plain = SelfQueryRetriever.from_llm(
    llm,
    vs_house_plain,
    document_content_description, #
    metadata_field_info,          #
    verbose= True
)

##### 2.1.2 RETRIVAL_QA BASIC

In [None]:
rqa_basic_house_ref = RetrievalQA.from_chain_type(
    llm,
    retriever=ret_house_ref,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
    tags = ["base", "house","refined"]
)

rqa_basic_house_plain = RetrievalQA.from_chain_type(
    llm,
    retriever=ret_house_plain,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

##### 2.1.2 RETRIVAL_QA SELF 

In [None]:
#
rqa_self_house_ref = RetrievalQA.from_chain_type(
    llm,
    retriever=self_house_ref,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
)

rqa_self_house_plain = RetrievalQA.from_chain_type(
    llm,
    retriever=self_house_plain,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

rqa_self_full_ref = RetrievalQA.from_chain_type(
    llm,
    retriever=self_full_ref,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

rqa_self_full_plain = RetrievalQA.from_chain_type(
    llm,
    retriever=self_full_plain,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

# --- 

### 1.7. Langchain evaluation 
To access the results from the dashboard you can use the folowing [link](https://smith.langchain.com/o/917d7cd4-4420-5477-8a36-902a60673259/projects?paginationState=%7B%22pageIndex%22%3A0%2C%22pageSize%22%3A10%7D&chartedColumn=latency_p50)

#### 1.7.1. Single question eval

In [None]:
rqa_self.invoke("Who are the student representatives?")

## 2. Memory and Sourcing 

### 2.1. Memory 

In [None]:
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True 
)

from langchain.chains import ConversationalRetrievalChain
retriever=vectorstore.as_retriever()
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory
)

In [None]:
# New type of chain: It adds a new bit on top that allows for keeping chat history and new question creating a ew standalone question  
from langchain.chains import ConversationalRetrievalChain
retriever=vectorstore.as_retriever()
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory
)

In [None]:
question = "Quali sono le dotazioni disponibili all'interno delle camere? "
result = qa({"question": question})

In [None]:
result["answer"] 

In [None]:
question = "Per quanto riguarda la cucina?"
result = qa({"question": question})

In [None]:
result["answer"] 

In [None]:
question = "Sono quindi comuni?"
result = qa({"question": question})

In [None]:
result["answer"] 

In [None]:
# Comparison with model with no memory 


In [None]:
qa_chain1.invoke(question)

### 2.2. Sourcing 
https://python.langchain.com/docs/use_cases/question_answering/sources

In [None]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
from langchain_core.runnables import RunnableParallel

rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain_with_source = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
).assign(answer=rag_chain_from_docs)

rag_chain_with_source.invoke("Cosa troverà nella stanza in residenza? ")

## 3. Initialize Feedback Function(s)
For iterations over different models
N.B. in case of problems refer to the langchain_quickstart in this folder, or to: [Optimize RAG application - Trulens](https://colab.research.google.com/drive/1bjplY8jIUYtkiKzM4tXmZ5U5U10BaiCd)

In [None]:
from trulens_eval import TruChain, Feedback, Huggingface, Tru
from trulens_eval.schema import FeedbackResult
tru = Tru()
tru.reset_database()

In [None]:
from trulens_eval.feedback.provider import OpenAI
import numpy as np

# Initialize provider class
openai = OpenAI()

# select context to be used in feedback. the location of context is app specific.
from trulens_eval.app import App
context = App.select_context(rqa_base)

from trulens_eval.feedback import Groundedness
grounded = Groundedness(groundedness_provider=OpenAI())
# Define a groundedness feedback function
f_groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons)
    .on(context.collect()) # collect context chunks into a list
    .on_output()
    .aggregate(grounded.grounded_statements_aggregator)
)

# Question/answer relevance between overall question and answer.
f_qa_relevance = Feedback(openai.relevance).on_input_output()
# Question/statement relevance between question and each context chunk.
f_context_relevance = (
    Feedback(openai.qs_relevance)
    .on_input()
    .on(context)
    .aggregate(np.mean)
    )

### 3.1 Instrument chain for logging with TruLens


In [None]:
#OK 
tru_recorder = TruChain(rqa_base,
    app_id='Chain1_ChatApplication',
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness])

In [None]:
with tru_recorder as recording:
    llm_response = rqa_base.invoke("Come funziona l'ingresso in residenza")

print(llm_response)

In [None]:
rqa_base.invoke("Come funziona l'ingresso in residenza")

In [None]:
tru_recorder2 = TruChain(rqa_compressed,
    app_id='Chain2_ChatApplication',
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness])

with tru_recorder2 as recording:
    llm_response = rqa_compressed.invoke("What is the purpose of the source?")

display(llm_response)

In [None]:


tru_recorder3 = TruChain(self_retriever,
    app_id='ChainSelf_ChatApplication',
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness])

tru_recorder4 = TruChain(multi_retriever,
    app_id='Chainmulti_ChatApplication',
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness])



### 3.2 Retrieve records and feedback (single question) 

In [None]:
# The record of the app invocation can be retrieved from the `recording`:

rec = recording.get() # use .get if only one record
#recs = recording.records # use .records if multiple

#display(rec)

In [None]:
# The results of the feedback functions can be rertireved from the record. These
# are `Future` instances (see `concurrent.futures`). You can use `as_completed`
# to wait until they have finished evaluating.

from concurrent.futures import as_completed

for feedback_future in  as_completed(rec.feedback_results):
    feedback, feedback_result = feedback_future.result()

    feedback: Feedback
    feedbac_result: FeedbackResult

    display(feedback.name, feedback_result.result)


In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=["Chain1_ChatApplication"])

records.head()

In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=["Chain2_ChatApplication"])

records.head()

In [None]:
tru.get_leaderboard(app_ids=[])

### 3.3. Multiple questions evaluations

In [None]:
eval_questions = []
with open('eval_questions.txt', 'r') as file:
    for line in file:
        # Remove newline character and convert to integer
        item = line.strip()
        print(item)
        eval_questions.append(item)

In [None]:
for question in eval_questions:
    with tru_recorder as recording:
        rag_chain.invoke(question)

In [None]:
self_retriever.invoke("Vorrei prenotare un alloggio a tariffa intera per l'a.a. 2023-24. Come posso procedere?")

In [None]:
for question in eval_questions:
    with tru_recorder3 as recording:
        self_retriever.invoke(question)
        
        #__record__.app.first.steps.context.first.get_relevant_documents

In [None]:
for question in eval_questions:
    with tru_recorder4 as recording:
        self_retriever.invoke(question)

In [None]:
for question in eval_questions:
    with tru_recorder2 as recording:
        rag_chain_compressed.invoke(question)

In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=[])
records.head()

In [None]:
import pandas as pd

pd.set_option("display.max_colwidth", None)
records[["input", "output"] + feedback]

In [None]:
tru.get_leaderboard(app_ids=[])

### 3.4. Explore in a Dashboard
For reference see the following [link](https://www.trulens.org/trulens_eval/api/tru/#trulens_eval.trulens_eval.tru.Tru)
def run_dashboard(
        self,
        port: Optional[int] = 8501,
        address: Optional[str] = None,
        force: bool = False,
        _dev: Optional[Path] = None
    ) -> Process:
        """
        Run a streamlit dashboard to view logged results and apps.

        Args:
            - port: int: port number to pass to streamlit through server.port.

In [None]:
tru = Tru()
#tru.reset_database()
tru.run_dashboard(port = 8503) # open a local streamlit app to explore

# tru.stop_dashboard() # stop if needed

In [None]:
conda activate aienv
cd Finetuning/BOT_V3_Langchain  
# PORT problem solved by chainging the port number in tru.py 

Alternatively, you can run `trulens-eval` from a command line in the same folder to start the dashboard.

Note: Feedback functions evaluated in the deferred manner can be seen in the "Progress" page of the TruLens dashboard.


In [None]:
# the recorder is initialized as prebuilt we will need some more lessons to undertand how to actually implemet 


In [None]:
tru = Tru()
#tru.reset_database()
tru.run_dashboard(port = 8503) # open a local streamlit app to explore

# tru.stop_dashboard() # stop if needed

In [None]:
conda activate aienv
cd Finetuning/BOT_V3_Langchain  
# PORT problem solved by chainging the port number in tru.py 

Alternatively, you can run `trulens-eval` from a command line in the same folder to start the dashboard.

In [None]:
# the recorder is initialized as prebuilt we will need some more lessons to undertand how to actually implemet 


In [None]:
# the recorder is initialized as prebuilt we will need some more lessons to undertand how to actually implemet 


In [None]:
conda activate aienv
cd Finetuning/BOT_V3_Langchain  
# PORT problem solved by chainging the port number in tru.py 

Alternatively, you can run `trulens-eval` from a command line in the same folder to start the dashboard.

Note: Feedback functions evaluated in the deferred manner can be seen in the "Progress" page of the TruLens dashboard.


In [None]:
# the recorder is initialized as prebuilt we will need some more lessons to undertand how to actually implemet 


In [None]:
tru = Tru()
#tru.reset_database()
tru.run_dashboard(port = 8503) # open a local streamlit app to explore

# tru.stop_dashboard() # stop if needed

In [None]:
conda activate aienv
cd Finetuning/BOT_V3_Langchain  
# PORT problem solved by chainging the port number in tru.py 

Alternatively, you can run `trulens-eval` from a command line in the same folder to start the dashboard.

In [None]:
# the recorder is initialized as prebuilt we will need some more lessons to undertand how to actually implemet 


In [None]:
# the recorder is initialized as prebuilt we will need some more lessons to undertand how to actually implemet 


In [None]:
conda activate aienv
cd Finetuning/BOT_V3_Langchain  
# PORT problem solved by chainging the port number in tru.py 

Alternatively, you can run `trulens-eval` from a command line in the same folder to start the dashboard.

In [None]:
# the recorder is initialized as prebuilt we will need some more lessons to undertand how to actually implemet 


In [None]:
# the recorder is initialized as prebuilt we will need some more lessons to undertand how to actually implemet 


In [None]:
conda activate aienv
cd Finetuning/BOT_V3_Langchain  
# PORT problem solved by chainging the port number in tru.py 

Alternatively, you can run `trulens-eval` from a command line in the same folder to start the dashboard.

Note: Feedback functions evaluated in the deferred manner can be seen in the "Progress" page of the TruLens dashboard.


In [None]:
# the recorder is initialized as prebuilt we will need some more lessons to undertand how to actually implemet 


In [None]:
tru = Tru()
#tru.reset_database()
tru.run_dashboard(port = 8503) # open a local streamlit app to explore

# tru.stop_dashboard() # stop if needed

In [None]:
conda activate aienv
cd Finetuning/BOT_V3_Langchain  
# PORT problem solved by chainging the port number in tru.py 

Alternatively, you can run `trulens-eval` from a command line in the same folder to start the dashboard.

In [None]:
# the recorder is initialized as prebuilt we will need some more lessons to undertand how to actually implemet 


In [None]:
# the recorder is initialized as prebuilt we will need some more lessons to undertand how to actually implemet 
