# Slackbot-related work

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from trulens_evalchain.tru_db import Record, LocalTinyDB, TruDB, LocalSQLite
from trulens_evalchain import tru
from trulens_evalchain import tru_feedback
from IPython.display import JSON
from ipywidgets import widgets
import json

got OPENAI_API_KEY
got PINECONE_API_KEY
got PINECONE_ENV
got HUGGINGFACE_API_KEY
got SLACK_TOKEN
got SLACK_SIGNING_SECRET
got COHERE_API_KEY


In [3]:
from trulens_evalchain.provider_apis import Endpoint

In [4]:
e1 = Endpoint("openai")

creating <class 'trulens_evalchain.provider_apis.Endpoint'> instance with name = openai


openai: 0request [00:00, ?request/s]

creating <class 'trulens_evalchain.provider_apis.TP'> instance with name = None


In [5]:
e2 = Endpoint("openai")

openai: 0request [00:00, ?request/s]

In [6]:
e3 = Endpoint("huggingface")

creating <class 'trulens_evalchain.provider_apis.Endpoint'> instance with name = huggingface


huggingface: 0request [00:00, ?request/s]

In [8]:
id(e1) == id(e2)

True

In [None]:
e3.tqdm.update(10)

In [12]:
from langchain.callbacks import get_openai_callback
from langchain.chains import ConversationalRetrievalChain
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms.base import BaseLLM
from langchain.llms import OpenAI
from langchain.memory import ConversationSummaryBufferMemory
from langchain.vectorstores import Pinecone
import pinecone

from trulens_evalchain import tru
from trulens_evalchain import tru_chain
from trulens_evalchain.keys import *
from trulens_evalchain.keys import PINECONE_API_KEY
from trulens_evalchain.keys import PINECONE_ENV

# Set up GPT-3 model
model_name = "gpt-3.5-turbo"

chain_id = "TruBot_relevance"

# Pinecone configuration.
pinecone.init(
    api_key=PINECONE_API_KEY,  # find at app.pinecone.io
    environment=PINECONE_ENV  # next to api key in console
)

identity = lambda h: h

# Embedding needed for Pinecone vector db.
embedding = OpenAIEmbeddings(model='text-embedding-ada-002')  # 1536 dims
docsearch = Pinecone.from_existing_index(
    index_name="llmdemo", embedding=embedding
)
retriever = docsearch.as_retriever()

# LLM for completing prompts, and other tasks.
llm = OpenAI(temperature=0, max_tokens=128)

# Conversation memory.
memory = ConversationSummaryBufferMemory(
    max_token_limit=650,
    llm=llm,
    memory_key="chat_history",
    output_key='answer'
)

# Conversational chain puts it all together.
chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    return_source_documents=True,
    memory=memory,
    get_chat_history=identity,
    max_tokens_limit=4096
)

"""
# Language mismatch fix:
chain.combine_docs_chain.llm_chain.prompt.template = \
    "Use the following pieces of context to answer the question at the end " \
    "in the same language as the question. If you don't know the answer, " \
    "just say that you don't know, don't try to make up an answer.\n\n" \
    "{context}\n\n" \
    "Question: {question}\n" \
    "Helpful Answer: "
"""

# Contexts fix
chain.combine_docs_chain.llm_chain.prompt.template = \
    "Use only the relevant contexts to answer the question at the end " \
    ". Some pieces of context may not be relevant. If you don't know the answer, " \
    "just say that you don't know, don't try to make up an answer.\n\n" \
    "Contexts: \n{context}\n\n" \
    "Question: {question}\n" \
    "Helpful Answer: "

chain.combine_docs_chain.document_prompt.template="\tContext: {page_content}"

# Trulens instrumentation.
tc = tru_chain.TruChain(chain, chain_id=chain_id)

In [None]:
import numpy as np


In [None]:
print("\tContext: When Shayak started building production grade machine learning models for algorithmic trading 10 years ago, he realized the need for putting the ‘science’ back in ‘data science’. Since then, he has been building systems and leading research to make machine learning and big data systems more explainable, privacy compliant, and fair. Shayak’s research at Carnegie Mellon University introduced a number of pioneering breakthroughs to the field of explainable AI. Shayak obtained his PhD in Computer Science from Carnegie Mellon University and BTech in Computer Science from the Indian Institute of Technology, Delhi.\n\n\tContext: When Shayak started building production grade machine learning models for algorithmic trading 10 years ago, he realized the need for putting the ‘science’ back in ‘data science’. Since then, he has been building systems and leading research to make machine learning and big data systems more explainable, privacy compliant, and fair. Shayak’s research at Carnegie Mellon University introduced a number of pioneering breakthroughs to the field of explainable AI. Shayak obtained his PhD in Computer Science from Carnegie Mellon University and BTech in Computer Science from the Indian Institute of Technology, Delhi.\n\n\tContext: Most recently, Shameek was Group Chief Data Officer at Standard Chartered Bank, where he helped the bank explore and adopt AI in multiple areas (e.g., credit, financial crime compliance, customer analytics, surveillance), and shaped the bank’s internal approach to responsible AI\n\n\tContext: Shameek has spent most of his career in driving responsible adoption of data analytics/ AI in the financial services industry. Most recently, Shameek was Group Chief Data Officer at Standard Chartered Bank, where he helped the bank explore and adopt AI in multiple areas and shaped the bank’s internal approach to responsible AI. He plays an active role in the future of AI as a member of the Bank of England’s AI Public-Private Forum and the OECD Global Partnership on AI.")

In [None]:
print("\tContext Piece: When Shayak started building production grade machine learning models for algorithmic trading 10 years ago, he realized the need for putting the ‘science’ back in ‘data science’. Since then, he has been building systems and leading research to make machine learning and big data systems more explainable, privacy compliant, and fair. Shayak’s research at Carnegie Mellon University introduced a number of pioneering breakthroughs to the field of explainable AI. Shayak obtained his PhD in Computer Science from Carnegie Mellon University and BTech in Computer Science from the Indian Institute of Technology, Delhi..\n\n\tContext Piece: When Shayak started building production grade machine learning models for algorithmic trading 10 years ago, he realized the need for putting the ‘science’ back in ‘data science’. Since then, he has been building systems and leading research to make machine learning and big data systems more explainable, privacy compliant, and fair. Shayak’s research at Carnegie Mellon University introduced a number of pioneering breakthroughs to the field of explainable AI. Shayak obtained his PhD in Computer Science from Carnegie Mellon University and BTech in Computer Science from the Indian Institute of Technology, Delhi..\n\n\tContext Piece: Most recently, Shameek was Group Chief Data Officer at Standard Chartered Bank, where he helped the bank explore and adopt AI in multiple areas (e.g., credit, financial crime compliance, customer analytics, surveillance), and shaped the bank’s internal approach to responsible AI.\n\n\tContext Piece: Shameek has spent most of his career in driving responsible adoption of data analytics/ AI in the financial services industry. Most recently, Shameek was Group Chief Data Officer at Standard Chartered Bank, where he helped the bank explore and adopt AI in multiple areas and shaped the bank’s internal approach to responsible AI. He plays an active role in the future of AI as a member of the Bank of England’s AI Public-Private Forum and the OECD Global Partnership on AI..")

In [None]:
tc.chain_def

In [14]:
res, record = tc(dict(question="Who is Shayak?"))

Calling wrapped chain.


In [15]:
print(res['answer'])

 Shayak is a computer scientist who obtained his PhD in Computer Science from Carnegie Mellon University and BTech in Computer Science from the Indian Institute of Technology, Delhi. He has been building systems and leading research to make machine learning and big data systems more explainable, privacy compliant, and fair for the past 10 years.


In [16]:
TruDB.dictify(record)

{'memory': None,
 'callback_manager': {'_NON_SERIALIZED_OBJECT': {'class': 'SharedCallbackManager',
   'module': 'langchain.callbacks.shared',
   'bases': ['Singleton', 'BaseCallbackManager']}},
 'verbose': False,
 'chain': {'memory': {'human_prefix': 'Human',
   'ai_prefix': 'AI',
   'llm': {'cache': None,
    'verbose': False,
    'callback_manager': {'_NON_SERIALIZED_OBJECT': {'class': 'SharedCallbackManager',
      'module': 'langchain.callbacks.shared',
      'bases': ['Singleton', 'BaseCallbackManager']}},
    'client': {'_NON_SERIALIZED_OBJECT': {'class': 'type',
      'module': 'builtins',
      'bases': ['object']}},
    'model_name': 'text-davinci-003',
    'temperature': 0.0,
    'max_tokens': 128,
    'top_p': 1,
    'frequency_penalty': 0,
    'presence_penalty': 0,
    'n': 1,
    'best_of': 1,
    'model_kwargs': {},
    'openai_api_key': None,
    'openai_api_base': None,
    'openai_organization': None,
    'batch_size': 20,
    'request_timeout': None,
    'logit_bias

In [19]:
hugs = tru_feedback.Huggingface()
openai = tru_feedback.OpenAI()

f_toxic = tru_feedback.Feedback(hugs.not_toxic).on_response()
f_lang_match = tru_feedback.Feedback(hugs.language_match).on(text1="prompt", text2="response")
f_relevance = tru_feedback.Feedback(openai.qs_relevance).on(question="input", statement="output")


f_lang_match = tru_feedback.Feedback(hugs.language_match).on(text1="prompt", text2="response")
f_qs_relevance = tru_feedback.Feedback(openai.qs_relevance) \
    .on(question="input", statement=Record.chain.combine_docs_chain._call.args.inputs.input_documents) \
    .on_multiple(multiarg="statement", each_query=Record.page_content)

feedbacks = tru.run_feedback_functions(chain=tc, record=record, feedback_functions=[f_qs_relevance, f_toxic, f_lang_match, f_relevance])

huggingface: 0request [00:00, ?request/s]

openai: 0request [00:00, ?request/s]

In [20]:
feedbacks

{'qs_relevance': 1.0,
 'not_toxic': 0.0008220960735343397,
 'language_match': 0.6795655123423785}

In [None]:
for doc in TruDB.project(query=Record.chain.combine_docs_chain._call.args.inputs.input_documents, obj=record):
    print(doc)
    content = TruDB.project(query=Record.page_content, obj=doc)
    print(content)

In [None]:
feedbacks

In [None]:
# e = Endpoint(name="openai", rpm=120)
# print(e.pace.qsize())

In [None]:
tru.endpoint_openai.tqdm.display()
i = 0
while True:
    # print(e.pace.qsize())
    tru.endpoint_openai.pace_me()
    # print(i)
    i+=1

In [None]:
# tru_feedback.huggingface_language_match(prompt="Hello there?", response="How are you?")

In [None]:
# db = LocalTinyDB("slackbot.json")
#tru.init_db("slackbot.sql")
#db = LocalSQLite("slackbot.sql.db")
db = LocalSQLite()

In [None]:
df, dff = db.get_records_and_feedback(chain_ids=[])

In [None]:
df

In [None]:
from pprint import PrettyPrinter
pp = PrettyPrinter(compact=True)

for i, row in df.iterrows():
    
    display(widgets.HTML(f"<b>Question:</b> {row.input}"))
    
    display(widgets.HTML(f"<b>Answer:</b> {row.output}"))
    
    details = json.loads(eval(row.details))

    display(widgets.HTML(str(details['chain']['combine_docs_chain']['llm_chain']['prompt']['template'])))
    
    for doc in details['chain']['combine_docs_chain']['_call']['args']['inputs']['input_documents']:
        display(widgets.HTML(f"""
        <div style="border: 1px solid black; padding: 5px;">
        <b>Context chunk</b>: {doc['page_content']}
        """))

        """<br/>

        <b>source</b>: {doc['metadata']['source']}
        </div>"""

    print()


In [None]:
df = db.select(
    Record,
    Record.record_id,
    Record.chain_id,
    Record.chain._call.args.inputs.question,
    Record.chain._call.rets.answer)

In [None]:
df

In [None]:
for row_id, row in df.iterrows():
    record_id = row.record_id
    chain_id = row.chain_id

    main_question = row['Record.chain._call.args.inputs.question']
    main_answer = row['Record.chain._call.rets.answer']

    print(chain_id, record_id, main_question)
    """
    
    print(question, answer)

    # Run feedback function and get value

    feedback = tru.run_feedback_function(
        main_question, main_answer, [
            tru_feedback.get_not_hate_function(
                evaluation_choice='prompt',
                provider='openai',
                model_engine='moderation'
            ),
            tru_feedback.get_sentimentpositive_function(
                evaluation_choice='response',
                provider='openai',
                model_engine='gpt-3.5-turbo'
            ),
            tru_feedback.get_relevance_function(
                evaluation_choice='both',
                provider='openai',
                model_engine='gpt-3.5-turbo'
            )
        ]
    )
    print(f"will insert overall feedback for chain {chain_id}, record {record_id}")
    db.insert_feedback(record_id=record_id, chain_id=chain_id, feedback=feedback)
    """
    
    # display(JSON(row.Record))
    # print(row.Record['chain'])

    model_name = "gpt-3.5-turbo"

    """
    for page in TruDB.project(query=Record.chain.combine_docs_chain._call.args.inputs.input_documents, obj=row.Record):
        answer = page['page_content']
        feedback = tru.run_feedback_function(
            main_question,
            answer,
	        [
            tru_feedback.get_qs_relevance_function(
                evaluation_choice='prompt',
                provider='openai',
                model_engine=model_name
            )]
        )
        db.insert_feedback(record_id=record_id, chain_id=chain_id, feedback=feedback)

    """

    feedback = tru.run_feedback_function(
        main_question, main_answer, [
            tru_feedback.get_language_match_function(
                provider='huggingface'
            )
        ]
    )
    print(f"will insert language match feedback for chain {chain_id}, record {record_id}")
    db.insert_feedback(record_id=record_id, chain_id=chain_id, feedback=feedback)

    # feedback = tru.run_feedback_function(

    #for leaf in TruDB.leafs(row.Record):
    #    print(leaf)


In [None]:
row.record_id

In [None]:
feedback = {'openai_hate_function': 1.849137515819166e-05,
 'openai_sentimentpositive_feedback_function': 1,
 'openai_relevance_function': 10}

In [None]:
db.insert_feedback(2, feedback)

In [None]:
db.select(Record, table=db.feedbacks)

In [None]:
model_name = "gpt-3.5-turbo"
feedback = tru.run_feedback_function(
    "Who is Piotr?",
    "Piotr Mardziel works on transparency and accountability in machine learning with applications to security, privacy, and fairness. He holds Bachelor’s and Master’s degrees from the Worcester Polytechnic Institute and a PhD in computer science from University of Maryland, College Park. He has conducted post-doctoral research at Carnegie Mellon University, as well as taught classes in trustworthy machine learning at Stanford University and machine learning privacy and security at Carnegie Mellon University.",
	 [
        tru_feedback.get_qs_relevance_function(
            evaluation_choice='prompt',
            provider='openai',
            model_engine=model_name
        )])

In [None]:
feedback