# TruBot

In [1]:
%load_ext autoreload
%autoreload 2
from pathlib import Path
import sys

sys.path.append(str(Path().cwd().parent.parent.resolve()))

In [2]:
from pprint import PrettyPrinter
from typing import Sequence

from IPython.display import JSON
from langchain.chains import ConversationalRetrievalChain
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.memory import ConversationSummaryBufferMemory
from langchain.vectorstores import DocArrayHnswSearch
import numpy as np

from trulens_eval import Tru
from trulens_eval import tru_feedback
from trulens_eval.keys import *
from trulens_eval.tru_db import Query
from trulens_eval.tru_db import Record
from trulens_eval.tru_feedback import Feedback
from trulens_eval.tru_feedback import Huggingface
from trulens_eval.utils.langchain import WithFilterDocuments

# if using Pinecone vectordb:
# from langchain.vectorstores import Pinecone
# import pinecone

pp = PrettyPrinter()

KEY SET: OPENAI_API_KEY
KEY SET: PINECONE_API_KEY
KEY SET: PINECONE_ENV
KEY SET: HUGGINGFACE_API_KEY
KEY SET: SLACK_TOKEN
KEY SET: SLACK_SIGNING_SECRET
KEY SET: COHERE_API_KEY


In [3]:
model_name = "gpt-3.5-turbo"
chain_id = "TruBot"

# Pinecone configuration if using pinecone.
# pinecone.init(
#    api_key=PINECONE_API_KEY,  # find at app.pinecone.io
#    environment=PINECONE_ENV  # next to api key in console
#)
#docsearch = Pinecone.from_existing_index(
#    index_name="llmdemo", embedding=embedding
#)

# LLM for completing prompts, and other tasks.
llm = OpenAI(temperature=0, max_tokens=256)

def new_conversation(
    lang_prompt_fix: bool = False,
    context_prompt_fix: bool = False,
    context_filter_fix: bool = False,
    feedbacks: Sequence[Feedback] = None
):
    """
    Create a chain for a new conversation (blank memory). Set flags to enable
    adjustments to prompts or add context filtering.
    """
    
    assert not(lang_prompt_fix and context_prompt_fix), "Cannot use both prompt fixes at the same time."

    # Embedding needed for Pinecone vector db.
    embedding = OpenAIEmbeddings(model='text-embedding-ada-002')  # 1536 dims

    # Conversation memory.
    memory = ConversationSummaryBufferMemory(
        max_token_limit=650,
        llm=llm,
        memory_key="chat_history",
        output_key='answer'
    )

    # Pinecone alternative. Requires precomputed 'hnswlib_truera' folder.
    docsearch = DocArrayHnswSearch.from_params(
        embedding=embedding,
        work_dir='hnswlib_trubot',
        n_dim=1536,
        max_elements=1024
    )
    retriever = docsearch.as_retriever()

    # Better contexts fix, filter contexts with relevance:
    if context_filter_fix: 
        def filter_by_relevance(query, doc):
            return openai.qs_relevance(
                question=query, statement=doc.page_content
            ) > 0.5

        retriever = WithFilterDocuments.of_retriever(
            retriever=retriever, filter_func=filter_by_relevance
        )

    # Conversational chain puts it all together.
    chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=retriever,
        return_source_documents=True,
        memory=memory,
        get_chat_history=lambda a: a,
        max_tokens_limit=4096
    )

    # Need to copy these otherwise various chains will feature templates that
    # point to the same objects.
    chain.combine_docs_chain.llm_chain.prompt = \
        chain.combine_docs_chain.llm_chain.prompt.copy()
    chain.combine_docs_chain.document_prompt = \
        chain.combine_docs_chain.document_prompt.copy()

    # Language mismatch fix:
    if lang_prompt_fix:
        chain.combine_docs_chain.llm_chain.prompt.template = \
            "Use the following pieces of context to answer the question at the end " \
            "in the same language as the question. If you don't know the answer, " \
            "just say that you don't know, don't try to make up an answer.\n\n" \
            "{context}\n\n" \
            "Question: {question}\n" \
            "Helpful Answer: "

    # Poor contexts fix using prompts:
    elif context_prompt_fix:
        chain.combine_docs_chain.llm_chain.prompt.template = \
            "Use only the relevant contexts to answer the question at the end " \
            ". Some pieces of context may not be relevant. If you don't know the answer, " \
            "just say that you don't know, don't try to make up an answer.\n\n" \
            "Contexts: \n{context}\n\n" \
            "Question: {question}\n" \
            "Helpful Answer: "
        chain.combine_docs_chain.document_prompt.template = "\tContext: {page_content}"

    # Trulens instrumentation.
    tc = Tru().Chain(chain, feedbacks=feedbacks)

    return tc

In [4]:
# Setup some feedback functions.

hugs = tru_feedback.Huggingface()
openai = tru_feedback.OpenAI()

# Toxicity (of the response):
f_toxic = tru_feedback.Feedback(hugs.not_toxic).on_response()

# Language match (between prompt and response):
f_lang_match = tru_feedback.Feedback(hugs.language_match).on(
    text1="prompt", text2="response"
)

# Question to answer relevance:
f_relevance = tru_feedback.Feedback(openai.relevance).on(
    prompt="input", response="output"
)

# Question to context piece relevance:
f_qs_relevance = tru_feedback.Feedback(openai.qs_relevance).on(
    question="input",
    statement=Record.chain.combine_docs_chain._call.args.inputs.input_documents
).on_multiple(
    multiarg="statement", each_query=Record.page_content, agg=np.min
)

feedbacks=[
    # f_toxic, 
    f_lang_match, 
    # f_relevance, 
    f_qs_relevance
]


huggingface api: 0requests [00:00, ?requests/s]

openai api: 0requests [00:00, ?requests/s]

In [6]:
tc = new_conversation(feedbacks=feedbacks)

✅ chain chain_hash_a3bbf04ab4f88cf3ab470c786ee3f2e7 -> default.sqlite
✅ feedback def. feedback_hash_26c84c76d907d808c36028e024af63c0 -> default.sqlite
✅ feedback def. feedback_hash_b9475c84f84aee5e117ba7ec7aab7e25 -> default.sqlite


In [7]:
# Normal langchain usage:

res = tc("Who is Shayak?")
pp.pprint(res)

{'answer': ' Shayak Sen is a researcher who has been building systems and '
           'leading research to make machine learning and big data systems '
           'more explainable, privacy compliant, and fair. He obtained his PhD '
           'in Computer Science from Carnegie Mellon University and BTech in '
           'Computer Science from the Indian Institute of Technology, Delhi. '
           'Most recently, he was Group Chief Data Officer at Standard '
           'Chartered Bank.',
 'chat_history': '',
 'question': 'Who is Shayak?',
 'source_documents': [Document(page_content='When Shayak started building production grade machine learning models for algorithmic trading 10 years ago, he realized the need for putting the ‘science’ back in ‘data science’. Since then, he has been building systems and leading research to make machine learning and big data systems more explainable, privacy compliant, and fair. Shayak’s research at Carnegie Mellon University introduced a number of pio

In [8]:
# Also retrieve trulens records if needed for inspection or manual feedback
# evaluation:

res, record = tc.call_with_record("Who is Shayak?")

✅ record record_hash_6a05079940559fd6e399a59ba6a945ff from chain_hash_a3bbf04ab4f88cf3ab470c786ee3f2e7 -> default.sqlite
✅ feedback feedback_hash_26c84c76d907d808c36028e024af63c0 on record_hash_6a05079940559fd6e399a59ba6a945ff -> default.sqlite
✅ feedback feedback_hash_b9475c84f84aee5e117ba7ec7aab7e25 on record_hash_6a05079940559fd6e399a59ba6a945ff -> default.sqlite


✅ record record_hash_97e02355b8c0eab238a09b2cfe140330 from chain_hash_a3bbf04ab4f88cf3ab470c786ee3f2e7 -> default.sqlite
✅ feedback feedback_hash_26c84c76d907d808c36028e024af63c0 on record_hash_97e02355b8c0eab238a09b2cfe140330 -> default.sqlite
✅ feedback feedback_hash_b9475c84f84aee5e117ba7ec7aab7e25 on record_hash_97e02355b8c0eab238a09b2cfe140330 -> default.sqlite


In [12]:
# Run the feedback functions manually:

feedback = Tru().run_feedback_functions(
    record_json=record,
    feedback_functions=feedbacks
)

feedback

[{'_success': True,
  'feedback_id': 'feedback_hash_26c84c76d907d808c36028e024af63c0',
  'record_id': 'record_hash_97e02355b8c0eab238a09b2cfe140330',
  'language_match': 0.7016496420837939},
 {'_success': True,
  'feedback_id': 'feedback_hash_b9475c84f84aee5e117ba7ec7aab7e25',
  'record_id': 'record_hash_97e02355b8c0eab238a09b2cfe140330',
  'qs_relevance': 0.1}]

ERROR:root:openai request failed <class 'openai.error.RateLimitError'>=That model is currently overloaded with other requests. You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 9cd5fb5453980ff66cd66d72e39dc29e in your message.). Retries=3.
