In [None]:
# !pip install trulens trulens-providers-openai chromadb openai

In [None]:
# Ensure we use OTEL tracing.

import os

os.environ["TRULENS_OTEL_TRACING"] = "1"

In [None]:
# Set up python resolution paths.

from pathlib import Path
import sys

# Add base dir to path to be able to access test folder.
base_dir = Path().cwd().parent.parent.resolve()
if str(base_dir) not in sys.path:
    print(f"Adding {base_dir} to sys.path")
    sys.path.append(str(base_dir))

In [None]:
# Set up logging.

import logging

root = logging.getLogger()
root.setLevel(logging.DEBUG)
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.DEBUG)
formatter = logging.Formatter(
    "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
handler.addFilter(logging.Filter("trulens"))
handler.setFormatter(formatter)
root.addHandler(handler)

In [None]:
# os.environ["SNOWFLAKE_ACCOUNT"] = "..."
# os.environ["SNOWFLAKE_USER"] = "..."
# os.environ["SNOWFLAKE_USER_PASSWORD"] = "..."
# os.environ["SNOWFLAKE_DATABASE"] = "..."
# os.environ["SNOWFLAKE_SCHEMA"] = "..."
# os.environ["SNOWFLAKE_WAREHOUSE"] = "..."
# os.environ["SNOWFLAKE_ROLE"] = "..."

In [None]:
# Create snowpark session.
import os

from snowflake.snowpark import Session
from trulens.connectors.snowflake import SnowflakeConnector

snowflake_connection_parameters = {
    "account": os.environ["SNOWFLAKE_ACCOUNT"],
    "user": os.environ["SNOWFLAKE_USER"],
    "password": os.environ["SNOWFLAKE_USER_PASSWORD"],
    "database": os.environ["SNOWFLAKE_DATABASE"],
    "schema": os.environ["SNOWFLAKE_SCHEMA"],
    "role": os.environ["SNOWFLAKE_ROLE"],
    "warehouse": os.environ["SNOWFLAKE_WAREHOUSE"],
}
snowpark_session = Session.builder.configs(
    snowflake_connection_parameters
).create()

# TruSession is no longer required as long as snowflake connector exists
sf_connector = SnowflakeConnector(snowpark_session=snowpark_session)

In [None]:
# Define app.
from trulens.core.otel.instrument import instrument
from trulens.otel.semconv.trace import SpanAttributes


class TestApp:
    def query(self, input_entry) -> str:
        query = input_entry["query"] if "query" in input_entry else input_entry
        retrieved_contexts = self.get_contexts(query)
        return self.generation(query, retrieved_contexts)

    @instrument(
        span_type=SpanAttributes.SpanType.RETRIEVAL,
        attributes={
            SpanAttributes.RETRIEVAL.QUERY_TEXT: "query",
            SpanAttributes.RETRIEVAL.RETRIEVED_CONTEXTS: "return",
        },
    )
    def get_contexts(self, query: str) -> list[str]:
        # Flatten the list of lists into a single list
        return ["a", "b", "c", "d"]

    @instrument(
        span_type=SpanAttributes.SpanType.GENERATION,
    )
    def generation(self, query: str, contexts: list[str]) -> str:
        if len(contexts) == 0:
            return "Sorry, I couldn't find an answer to your question."
        completion = "This is a test completion."
        if completion:
            return completion
        else:
            return "Did not find an answer."

In [None]:
# Create TruLens instrumented app from custom app.

import os
import uuid

from trulens.apps.app import TruApp
from trulens.core.app import trace_with_run

APP_NAME = f"{os.getlogin()} test live tracing run {uuid.uuid4()}"
APP_VERSION = "V1"

# Conditional tracing setup - set to False to disable tracing

test_app = TestApp()

tru_app = TruApp(
    test_app,
    app_name=APP_NAME,
    app_version=APP_VERSION,
    connector=sf_connector,
    main_method=test_app.query,
)

### Start the Run (a pandas DataFrame or rows in user's table.) to invoke user's app directly and start ingestion

#### here we will be using a user provided test dataframe

In [None]:
test_queries = [
    "What is the capital city of New Zealand?",
    "What is the largest institution of higher education in Washington state?",
    "What wave of coffee culture is Starbucks seen to represent in the New Zealand?",
    "What year was Washington State University founded?",
    "Which university has a strong focus on veterinary medicine and agriculture?",
    "Which landmark in Seattle was built for the 1962 Worldâ€™s Fair?",
]

In [None]:
run_name = f"test_live_tracing_run_{uuid.uuid4()}"


# The decorator works seamlessly whether tru_app is a TruApp instance or None
# If app is None (tracing disabled), the decorator will pass through to the original function
@trace_with_run(app=tru_app, run_name=run_name)
def run_queries_with_test_app(test_data):
    for query in test_data:
        test_app.query(query)


run_queries_with_test_app(test_queries)

In [None]:
run = tru_app.get_run(run_name=run_name)
run.describe()

#### We can also map arbitrary part of the input to the input attribute of the Record Root span, so that users can extract fields they want to be displayed as main input in the run view when using trace_with_run decorator. 

This is useful in the case where the app is passing in complex objects or dictionaries as inputs, while the users might be only interested in showing certain fields as main input in UI for readability. 

In [None]:
run_name = f"test_live_tracing_run_{uuid.uuid4()}"

test_input_set = [
    {
        "user_id": "123",
        "query": "What is the capital city of New Zealand?",
    },
    {
        "user_id": "234",
        "query": "What is the largest institution of higher education in Washington state?",
    },
]


# this will extract a dictionary of query as input to record root span
def extract_query_key_as_input(args, kwargs):
    """Extract only query from input data"""
    data = args[0]

    if isinstance(data, list):
        return [
            {
                "query": item.get("query"),
            }
            for item in data
            if isinstance(item, dict)
        ]
    elif isinstance(data, dict):
        return {
            "query": data.get("query"),
        }
    else:
        return str(data)


@trace_with_run(
    app=tru_app, run_name=run_name, input_selector=extract_query_key_as_input
)
def run_queries_with_test_app_using_input_selector(test_data):
    for query_item in test_data:
        test_app.query(query_item)  # Extract the actual query string

In [None]:
run_queries_with_test_app_using_input_selector(test_input_set)