# TruLens-Canopy Quickstart

 Canopy is an open-source framework and context engine built on top of the Pinecone vector database so you can build and host your own production-ready chat assistant at any scale. By integrating TruLens into your Canopy assistant, you can quickly iterate on and gain confidence in the quality of your chat assistant.

## Set Keys

In [1]:
import os

os.environ["PINECONE_API_KEY"] = "d5589ecb-e0dd-4768-b92b-5d5aad35d304"
os.environ["OPENAI_API_KEY"] = "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"

## Load data

In [2]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

data = pd.read_parquet("https://storage.googleapis.com/pinecone-datasets-dev/pinecone_docs_ada-002/raw/file1.parquet")
data.head()

Unnamed: 0,id,text,source,metadata
0,728aeea1-1dcf-5d0a-91f2-ecccd4dd4272,# Scale indexes\n\n[Suggest Edits](/edit/scali...,https://docs.pinecone.io/docs/scaling-indexes,"{'created_at': '2023_10_25', 'title': 'scaling..."
1,2f19f269-171f-5556-93f3-a2d7eabbe50f,# Understanding organizations\n\n[Suggest Edit...,https://docs.pinecone.io/docs/organizations,"{'created_at': '2023_10_25', 'title': 'organiz..."
2,b2a71cb3-5148-5090-86d5-7f4156edd7cf,# Manage datasets\n\n[Suggest Edits](/edit/dat...,https://docs.pinecone.io/docs/datasets,"{'created_at': '2023_10_25', 'title': 'datasets'}"
3,1dafe68a-2e78-57f7-a97a-93e043462196,# Architecture\n\n[Suggest Edits](/edit/archit...,https://docs.pinecone.io/docs/architecture,"{'created_at': '2023_10_25', 'title': 'archite..."
4,8b07b24d-4ec2-58a1-ac91-c8e6267b9ffd,# Moving to production\n\n[Suggest Edits](/edi...,https://docs.pinecone.io/docs/moving-to-produc...,"{'created_at': '2023_10_25', 'title': 'moving-..."


## Setup Tokenizer

In [3]:
from canopy.tokenizer import Tokenizer
Tokenizer.initialize()

tokenizer = Tokenizer()

tokenizer.tokenize("Hello world!")

['Hello', ' world', '!']

## Create and Load Index

In [4]:
from canopy.knowledge_base import KnowledgeBase

INDEX_NAME = "my-index"

kb = KnowledgeBase(index_name=INDEX_NAME)

from canopy.knowledge_base import list_canopy_indexes
if not any(name.endswith(INDEX_NAME) for name in list_canopy_indexes()):
    kb.create_canopy_index()

kb = KnowledgeBase(index_name=INDEX_NAME)
kb.connect()

from canopy.models.data_models import Document

documents = [Document(**row) for _, row in data.iterrows()]

from tqdm.auto import tqdm

batch_size = 10

for i in tqdm(range(0, len(documents), batch_size)):
    kb.upsert(documents[i: i+batch_size])

  0%|          | 0/6 [00:00<?, ?it/s]

## Create context and chat engine

In [5]:
import json
from canopy.models.data_models import Query
from canopy.context_engine import ContextEngine
context_engine = ContextEngine(kb)

from canopy.chat_engine import ChatEngine
chat_engine = ChatEngine(context_engine)

## Instrument static methods used by engine with TruLens 

In [6]:
from trulens_eval.tru_custom_app import instrument

from canopy.context_engine import ContextEngine
instrument.method(ContextEngine, "query")

from canopy.chat_engine import ChatEngine
instrument.method(ChatEngine, "chat")

from canopy.chat_engine.query_generator.base import QueryGenerator
instrument.method(QueryGenerator, "generate")

## Create feedback functions using instrumented methods

In [12]:
from trulens_eval import Feedback, Select
from trulens_eval.feedback import Groundedness
from trulens_eval.feedback.provider.openai import OpenAI as fOpenAI

import numpy as np

# Initialize provider class
fopenai = fOpenAI()

grounded = Groundedness(groundedness_provider=fopenai)

intput = Select.RecordCalls.chat.args.messages[0].content
context = Select.RecordCalls.context_engine.query.rets.content.root[:].snippets[:].text
output = Select.RecordCalls.chat.rets.choices[0].message.content

# Define a groundedness feedback function
f_groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons, name = "Groundedness", higher_is_better=True)
    .on(context.collect())
    .on(output)
    .aggregate(grounded.grounded_statements_aggregator)
)

# Question/answer relevance between overall question and answer.
f_qa_relevance = (
    Feedback(fopenai.relevance_with_cot_reasons, name = "Answer Relevance", higher_is_better=True)
    .on(intput)
    .on(output)
)

# Question/statement relevance between question and each context chunk.
f_context_relevance = (
    Feedback(fopenai.qs_relevance_with_cot_reasons, name = "Context Relevance", higher_is_better=True)
    .on(intput)
    .on(context)
    .aggregate(np.mean)
)

✅ In Groundedness, input source will be set to __record__.app.context_engine.query.rets.content.root[:].snippets[:].text.collect() .
✅ In Groundedness, input statement will be set to __record__.app.chat.rets.choices[0].message.content .
✅ In Answer Relevance, input prompt will be set to __record__.app.chat.args.messages[0].content .
✅ In Answer Relevance, input response will be set to __record__.app.chat.rets.choices[0].message.content .
✅ In Context Relevance, input question will be set to __record__.app.chat.args.messages[0].content .
✅ In Context Relevance, input statement will be set to __record__.app.context_engine.query.rets.content.root[:].snippets[:].text .


In [13]:
from trulens_eval import TruCustomApp
tru_recorder = TruCustomApp(chat_engine, feedbacks = [f_groundedness, f_qa_relevance, f_context_relevance])

In [14]:
from canopy.models.data_models import Messages, UserMessage

query = [UserMessage(content="How can you get started with Pinecone and TruLens?")]

with tru_recorder as recording:
    chat_engine.chat(query)

Unsure what the main input string is for the call to chat with args [[UserMessage(role=<Role.USER: 'user'>, content='How can you get started with Pinecone and TruLens?')]].
Unsure what the main output string is for the call to chat with return type <class 'canopy.models.api_models.ChatResponse'>.


In [10]:
from trulens_eval import Tru
tru = Tru()
tru.run_dashboard()

Starting dashboard ...
Config file already exists. Skipping writing process.
Credentials file already exists. Skipping writing process.


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

Dashboard started at http://172.20.10.2:8501 .


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>

In [11]:
tru.get_leaderboard(app_ids=[])

Unnamed: 0_level_0,Context Relevance,Answer Relevance,Groundedness,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
app_hash_d31d0d4278e016266a7bab4f0b34e069,0.7,0.83,0.394444,5.7,0.002671
app_hash_727ea3fe2eefa2d7c8db72e2a75f0ca4,,,,8.0,0.003431
