# TruBot

This is the first part of the TruBot example notebook without the use of huggingface-based feedback functions.

In [None]:
%load_ext autoreload
%autoreload 2
from pathlib import Path
import sys

# If running from github repo, can use this:
sys.path.append(str(Path().cwd().parent.parent.parent.parent.resolve()))

## API keys setup

In [None]:
from trulens.core import TruSession

TruSession().migrate_database()

In [None]:
from trulens.core.utils.keys import check_keys

check_keys(
    "OPENAI_API_KEY",
)

In [None]:
from pprint import PrettyPrinter

# Imports from LangChain to build app:
from langchain.chains import ConversationalRetrievalChain
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.memory import ConversationSummaryBufferMemory
from langchain_community.llms import OpenAI
import numpy as np
from trulens.core import Feedback
from trulens.core import FeedbackMode
from trulens.core import Select
from trulens.core import TruSession
from trulens.dashboard import run_dashboard

# Imports main tools:
from trulens.providers.openai import OpenAI as fOpenAI

pp = PrettyPrinter()

# Tru object manages the database of apps, records, and feedbacks; and the
# dashboard to display these.
session = TruSession()

# Start the dasshboard. If you running from github repo, you will need to adjust
# the path the dashboard streamlit app starts in by providing the _dev argument.
run_dashboard(session, force=True, _dev=Path().cwd().parent.parent.resolve())

# If needed, you can reset the trulens dashboard database by running the
# below line:

# session.reset_database()

In [None]:
# Select vector db provider. Pinecone requires setting up a pinecone database
# first while the hnsw database is included with trulens.
# db_host = "pinecone"
db_host = "hnsw"

model_name = "gpt-3.5-turbo"
app_name = "TruBot"

# Embedding for vector db.
embedding = OpenAIEmbeddings(model="text-embedding-ada-002")  # 1536 dims

if db_host == "pinecone":
    check_keys("PINECONE_API_KEY", "PINECONE_ENV")

    # Pinecone configuration if using pinecone.

    import os

    from langchain_community.vectorstores import Pinecone
    import pinecone

    pinecone.init(
        api_key=os.environ.get("PINECONE_API_KEY"),  # find at app.pinecone.io
        environment=os.environ.get(
            "PINECONE_ENV"
        ),  # next to api key in console
    )

    # If using pinecone, make sure you create your index under name 'llmdemo' or
    # change the below.

    def get_doc_search():
        docsearch = Pinecone.from_existing_index(
            index_name="llmdemo", embedding=embedding
        )

        return docsearch

elif db_host == "hnsw":
    # Local pinecone alternative. Requires precomputed 'hnswlib_truera' folder.

    from langchain.vectorstores import DocArrayHnswSearch

    def get_doc_search():
        # We need to create this object in the thread in which it is used so we
        # wrap it in this function for later usage.

        docsearch = DocArrayHnswSearch.from_params(
            embedding=embedding,
            work_dir="hnswlib_trubot",
            n_dim=1536,
            max_elements=1024,
        )

        return docsearch

else:
    raise RuntimeError("Unhandled db_host, select either 'pinecone' or 'hnsw'.")

# LLM for completing prompts, and other tasks.
llm = OpenAI(temperature=0, max_tokens=256)

# Construct feedback functions.

# API endpoints for models used in feedback functions:
# hugs = Huggingface()
openai = fOpenAI()

# Language match between question/answer.
# f_lang_match = Feedback(hugs.language_match).on_input_output()
# By default this will evaluate feedback on main app input and main app output.

# Question/answer relevance between overall question and answer.
f_qa_relevance = Feedback(openai.relevance).on_input_output()
# By default this will evaluate feedback on main app input and main app output.

# Question/statement relevance between question and each context chunk.
f_context_relevance = (
    Feedback(openai.context_relevance)
    .on_input()
    .on(
        Select.Record.app.combine_docs_chain._call.args.inputs.input_documents[
            :
        ].page_content
    )
    .aggregate(np.min)
)
# First feedback argument is set to main app input, and the second is taken from
# the context sources as passed to an internal `combine_docs_chain._call`.

all_feedbacks = [
    # f_lang_match,
    f_qa_relevance,
    f_context_relevance,
]

# TruBot Version 1

In [None]:
from trulens.apps.langchain import TruChain


def v1_new_conversation(feedback_mode=FeedbackMode.WITH_APP):
    """
    Create a _LangChain_ app for a new conversation with a question-answering bot.

    Feedback_mode controls when feedback is evaluated:

    - FeedbackMode.WITH_APP -- app will wait until feedback is evaluated before
      returning from calls.

    - FeedbackMode.WITH_APP_THREAD -- app will return from calls and evaluate
      feedback in a new thread.

    - FeedbackMode.DEFERRED -- app will return and a separate runner thread (see
      usage later in this notebook) will evaluate feedback.
    """

    # Blank conversation memory.
    memory = ConversationSummaryBufferMemory(
        max_token_limit=650,
        llm=llm,
        memory_key="chat_history",
        output_key="answer",
    )

    docsearch = get_doc_search()

    # Context retriever.
    retriever = docsearch.as_retriever()

    # Conversational app puts it all together.
    app = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=retriever,
        return_source_documents=True,
        memory=memory,
        get_chat_history=lambda a: a,
        max_tokens_limit=4096,
    )

    # Trulens instrumentation.
    tc = TruChain(
        app_name=app_name,
        app_version="v1",
        chain=app,
        feedbacks=all_feedbacks,
        feedback_mode=feedback_mode,
    )

    return tc

In [None]:
# Instantiate the app with fresh memory:

tc1 = v1_new_conversation()

# Call the app:

res, record = tc1.with_record(tc1.app, "Who is Shayak?")
res

# Notice the `source_documents` returned include chunks about Shameek and the
# answer includes bits about Shameek as a result.

In [None]:
# The feedback should already be present in the dashboard, but we can check the
# context_relevance here manually as well:
feedback = f_context_relevance.run(record=record, app=tc1)
feedback.dict()

In [None]:
# Now a question about QII (quantitative input influence is a base technology
# employed in TruEra's products) question but in a non-English language:

# Start a new conversation as the app keeps prior questions in its memory which
# may cause you some testing woes.
tc1 = v1_new_conversation()

# res, record = tc1.with_record(tc1.app, "Co jest QII?") # Polish
res, record = tc1.with_record(tc1.app, "Was ist QII?")  # German
res

# Note here the response is in English. This example sometimes matches language
# so other variants may need to be tested.