In [None]:
# !pip install trulens trulens-providers-openai chromadb openai

In [None]:
# Ensure we use OTEL tracing.

import os

os.environ["TRULENS_OTEL_TRACING"] = "1"

In [None]:
# Set up python resolution paths.

from pathlib import Path
import sys

# Add base dir to path to be able to access test folder.
base_dir = Path().cwd().parent.parent.resolve()
if str(base_dir) not in sys.path:
    print(f"Adding {base_dir} to sys.path")
    sys.path.append(str(base_dir))

In [None]:
# Set up logging.

import logging

root = logging.getLogger()
root.setLevel(logging.DEBUG)
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.DEBUG)
formatter = logging.Formatter(
    "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
handler.addFilter(logging.Filter("trulens"))
handler.setFormatter(formatter)
root.addHandler(handler)

In [None]:
os.environ["SNOWFLAKE_ACCOUNT"] = "mlplatformtest.qa6.us-west-2.aws"
os.environ["SNOWFLAKE_USER"] = "..."
os.environ["SNOWFLAKE_USER_PASSWORD"] = "..."
os.environ["SNOWFLAKE_DATABASE"] = "..."
os.environ["SNOWFLAKE_SCHEMA"] = "..."
os.environ["SNOWFLAKE_WAREHOUSE"] = "..."
os.environ["SNOWFLAKE_ROLE"] = "ENGINEER"

In [None]:
# Create snowpark session.
import os

from snowflake.snowpark import Session
from trulens.connectors.snowflake import SnowflakeConnector

snowflake_connection_parameters = {
    "account": os.environ["SNOWFLAKE_ACCOUNT"],
    "user": os.environ["SNOWFLAKE_USER"],
    "password": os.environ["SNOWFLAKE_USER_PASSWORD"],
    "database": os.environ["SNOWFLAKE_DATABASE"],
    "schema": os.environ["SNOWFLAKE_SCHEMA"],
    "role": os.environ["SNOWFLAKE_ROLE"],
    "warehouse": os.environ["SNOWFLAKE_WAREHOUSE"],
}
snowpark_session = Session.builder.configs(
    snowflake_connection_parameters
).create()

# TruSession is no longer required as long as snowflake connector exists
sf_connector = SnowflakeConnector(snowpark_session=snowpark_session)

In [None]:
uw_info = """
The University of Washington, founded in 1861 in Seattle, is a public research university
with over 45,000 students across three campuses in Seattle, Tacoma, and Bothell.
As the flagship institution of the six public universities in Washington state,
UW encompasses over 500 buildings and 20 million square feet of space,
including one of the largest library systems in the world.
"""

wsu_info = """
Washington State University, commonly known as WSU, founded in 1890, is a public research university in Pullman, Washington.
With multiple campuses across the state, it is the state's second largest institution of higher education.
WSU is known for its programs in veterinary medicine, agriculture, engineering, architecture, and pharmacy.
"""

seattle_info = """
Seattle, a city on Puget Sound in the Pacific Northwest, is surrounded by water, mountains and evergreen forests, and contains thousands of acres of parkland.
It's home to a large tech industry, with Microsoft and Amazon headquartered in its metropolitan area.
The futuristic Space Needle, a legacy of the 1962 World's Fair, is its most iconic landmark.
"""

starbucks_info = """
Starbucks Corporation is an American multinational chain of coffeehouses and roastery reserves headquartered in Seattle, Washington.
As the world's largest coffeehouse chain, Starbucks is seen to be the main representation of the United States' second wave of coffee culture.
"""

newzealand_info = """
New Zealand is an island country located in the southwestern Pacific Ocean. It comprises two main landmasses—the North Island and the South Island—and over 700 smaller islands.
The country is known for its stunning landscapes, ranging from lush forests and mountains to beaches and lakes. New Zealand has a rich cultural heritage, with influences from 
both the indigenous Māori people and European settlers. The capital city is Wellington, while the largest city is Auckland. New Zealand is also famous for its adventure tourism,
including activities like bungee jumping, skiing, and hiking.
"""

## Create Vector Store

Create a chromadb vector store in memory.

In [None]:
os.environ["OPENAI_API_KEY"] = "sk-..."

In [None]:
import chromadb
from chromadb.utils.embedding_functions import OpenAIEmbeddingFunction

embedding_function = OpenAIEmbeddingFunction(
    api_key=os.environ.get("OPENAI_API_KEY"),
    model_name="text-embedding-ada-002",
)


chroma_client = chromadb.Client()
vector_store = chroma_client.get_or_create_collection(
    name="Washington", embedding_function=embedding_function
)

vector_store.add("uw_info", documents=uw_info)
vector_store.add("wsu_info", documents=wsu_info)
vector_store.add("seattle_info", documents=seattle_info)
vector_store.add("starbucks_info", documents=starbucks_info)
vector_store.add("newzealand_info", documents=newzealand_info)

In [None]:
from openai import OpenAI

oai_client = OpenAI()

In [None]:
!uv  pip list | grep trulens

In [None]:
# Define app.


from trulens.core.otel.instrument import instrument
from trulens.otel.semconv.trace import SpanAttributes


class TestApp:
    @instrument(
        span_type=SpanAttributes.SpanType.RECORD_ROOT,
        attributes={
            SpanAttributes.RECORD_ROOT.INPUT: "query",
            SpanAttributes.RECORD_ROOT.OUTPUT: "return",
        },
    )
    def query(self, query: str) -> str:
        retrieved_contexts = self.get_contexts(query)
        return self.generation(query, retrieved_contexts)

    @instrument(
        span_type=SpanAttributes.SpanType.RETRIEVAL,
        attributes={
            SpanAttributes.RETRIEVAL.QUERY_TEXT: "query",
            SpanAttributes.RETRIEVAL.RETRIEVED_CONTEXTS: "return",
        },
    )
    def get_contexts(self, query: str) -> list[str]:
        results = vector_store.query(query_texts=query, n_results=4)
        # Flatten the list of lists into a single list
        return [doc for sublist in results["documents"] for doc in sublist]

    @instrument(
        span_type=SpanAttributes.SpanType.GENERATION,
    )
    def generation(self, query: str, contexts: list[str]) -> str:
        if len(contexts) == 0:
            return "Sorry, I couldn't find an answer to your question."
        completion = (
            oai_client.chat.completions.create(
                model="gpt-4.1",
                temperature=0,
                messages=[
                    {
                        "role": "user",
                        "content": f"We have provided context information below. \n"
                        f"---------------------\n"
                        f"{contexts}"
                        f"\n---------------------\n"
                        f"First, say hello and that you're happy to help. \n"
                        f"\n---------------------\n"
                        f"Then, given this information, please answer the question: {query}",
                    }
                ],
            )
            .choices[0]
            .message.content
        )
        if completion:
            return completion
        else:
            return "Did not find an answer."

In [None]:
!uv pip list | grep trulens

In [None]:
# Create TruLens instrumented app from custom app.

import uuid

from trulens.apps.app import TruApp

APP_NAME = f"{os.getlogin()} pupr e2e {uuid.uuid4()}"
APP_VERSION = "V1"

test_app = TestApp()
tru_app = TruApp(
    test_app, app_name=APP_NAME, app_version=APP_VERSION, connector=sf_connector
)

## Add runs to agent

In [None]:
from trulens.core.run import Run
from trulens.core.run import RunConfig

run_name = f"test_run_0623_{uuid.uuid4()}"

run_config = RunConfig(
    run_name=run_name,
    dataset_name="table_name",
    source_type="DATAFRAME",
    dataset_spec={"RECORD_ROOT.INPUT": "query"},
)  # type: ignore

run: Run = tru_app.add_run(run_config=run_config)

### Start the Run (a pandas DataFrame or rows in user's table.) to invoke user's app directly and start ingestion

#### here we will be using a user provided test dataframe

In [None]:
import pandas as pd

test_data_entries = [
    {
        "query": "What wave of coffee culture is Starbucks seen to represent in the United States?"
    },
    {"query": "What is the largest city in New Zealand?"},
    {
        "query": "What is the main campus of the University of Washington located?"
    },
    {"query": "What is the capital city of New Zealand?"},
    {
        "query": "What is the largest institution of higher education in Washington state?"
    },
    {
        "query": "What wave of coffee culture is Starbucks seen to represent in the New Zealand?"
    },
    {"query": "What year was Washington State University founded?"},
    {
        "query": "Which university has a strong focus on veterinary medicine and agriculture?"
    },
    {"query": "Which landmark in Seattle was built for the 1962 World’s Fair?"},
    {"query": "How many campuses does the University of Washington have?"},
    {"query": "Where is Starbucks headquartered?"},
    {
        "query": "Which city is surrounded by water, mountains, and forests in the Pacific Northwest?"
    },
    {
        "query": "What is the oldest public research university in Washington state?"
    },
    {
        "query": "Which university has over 500 buildings and 20 million square feet of space?"
    },
    {
        "query": "What natural features make New Zealand a popular destination for adventure tourism?"
    },
    {
        "query": "Which companies have headquarters in the Seattle metropolitan area?"
    },
    {"query": "What are some popular adventure activities in New Zealand?"},
    {"query": "Which university is located in Pullman, Washington?"},
    {"query": "What are the three campuses of the University of Washington?"},
    {
        "query": "Which Washington university is known for pharmacy and engineering programs?"
    },
]


user_input_data_df = pd.DataFrame(test_data_entries)

In [None]:
run.start(input_df=user_input_data_df)

In [None]:
run.get_status()

In [None]:
# run.describe()['run_metadata']

In [None]:
run.compute_metrics([
    "answer_relevance",
    "context_relevance",
    "groundedness",
])

In [None]:
run.get_status()

In [None]:
run.describe()

In [None]:
# run.cancel()
# run.delete()