# llama_index async Example

In [None]:
%load_ext autoreload
%autoreload 2
from pathlib import Path
import sys

# If running from github repo, can use this:
sys.path.append(str(Path().cwd().parent.parent.parent.resolve()))

# Uncomment for more debugging printouts.
"""
import logging
root = logging.getLogger()
root.setLevel(logging.DEBUG)

handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
root.addHandler(handler)
"""

In [None]:
# ! pip install html2text==2020.1.16

In [None]:
# Imports main tools:
from trulens_eval import TruLlama, Feedback, Tru, feedback, Select

tru = Tru()

from llama_index import VectorStoreIndex, SimpleWebPageReader

documents = SimpleWebPageReader(html_to_text=True).load_data(
    ["http://paulgraham.com/worked.html"]
)
index = VectorStoreIndex.from_documents(documents)

query_engine = index.as_chat_engine(streaming=True)

In [None]:
response = await query_engine.achat("What did the author do growing up?")

print(response)

In [None]:
import numpy as np

# Initialize Huggingface-based feedback function collection class:
hugs = feedback.Huggingface()
openai = feedback.OpenAI()

# Define a language match feedback function using HuggingFace.
f_lang_match = Feedback(hugs.language_match).on_input_output()
# By default this will check language match on the main app input and main app
# output.

# Question/answer relevance between overall question and answer.
f_qa_relevance = Feedback(openai.relevance).on_input_output()

# Question/statement relevance between question and each context chunk. Note
# limitation that we cannot use `TruLlama.select_source_nodes().node.text` here
# as that assumes the main callable of the engine is `query`, not `aquery`.
f_qs_relevance = Feedback(openai.qs_relevance).on_input().on(
    Select.Record.app.aquery.rets.source_nodes[:].node.text
).aggregate(np.mean)

In [None]:
tru_query_engine = TruLlama(
    query_engine, feedbacks=[f_lang_match, f_qa_relevance, f_qs_relevance]
)

In [None]:
tru_query_engine.print_instrumented()

In [None]:
# Instrumented query engine can operate like the original:
llm_response_async, rec_async = await tru_query_engine.aquery_with_record("What did the author do growing up?")

print(llm_response_async)

# Streaming test

Note that records with llama_index streaming mode are filled before the stream is complete. This is ongoing work.

In [None]:
query_engine = index.as_query_engine(streaming=True)
tru_query_engine = TruLlama(
    query_engine, feedbacks=[f_lang_match, f_qa_relevance, f_qs_relevance]
)

In [None]:
response, record = tru_query_engine.query_with_record(
    "What did the author do growing up?"
)

for c in response.response_gen:
    print(c)

In [None]:
record

In [None]:
proc = Tru().start_dashboard(force=True, _dev=Path.cwd().parent.parent.parent)