In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# Ensure we use OTEL tracing.

import os

os.environ["TRULENS_OTEL_TRACING"] = "1"

In [None]:
# Set up python resolution paths.

from pathlib import Path
import sys

# Add base dir to path to be able to access test folder.
base_dir = Path().cwd().parent.parent.resolve()
if str(base_dir) not in sys.path:
    print(f"Adding {base_dir} to sys.path")
    sys.path.append(str(base_dir))

In [None]:
# Set up logging.

import logging

root = logging.getLogger()
root.setLevel(logging.DEBUG)
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.DEBUG)
formatter = logging.Formatter(
    "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
handler.addFilter(logging.Filter("trulens"))
handler.setFormatter(formatter)
root.addHandler(handler)

In [None]:
# Create snowpark session.
import os

from dotenv import load_dotenv
from snowflake.snowpark import Session
from trulens.connectors.snowflake import SnowflakeConnector

load_dotenv()

snowflake_connection_parameters = {
    "account": os.environ["SNOWFLAKE_ACCOUNT"],
    "user": os.environ["SNOWFLAKE_USER"],
    "password": os.environ["SNOWFLAKE_USER_PASSWORD"],
    "database": os.environ["SNOWFLAKE_DATABASE"],
    "schema": os.environ["SNOWFLAKE_SCHEMA"],
    "role": os.environ["SNOWFLAKE_ROLE"],
    "warehouse": os.environ["SNOWFLAKE_WAREHOUSE"],
}

snowpark_session = Session.builder.configs(
    snowflake_connection_parameters
).create()

sf_connector = SnowflakeConnector(snowpark_session=snowpark_session)

In [None]:
# Create TruSession.

from trulens.core.session import TruSession

tru_session = TruSession(connector=sf_connector)

# Create Test App

## Create vector store

In [None]:
from trulens.benchmark.benchmark_frameworks.dataset.beir_loader import (
    TruBEIRDataLoader,
)

n_samples = 10
beir_data_loader = TruBEIRDataLoader(data_folder="./", dataset_name="hotpotqa")
hotpotqa = beir_data_loader.load_dataset_to_df(download=True)
hotpotqa = hotpotqa.sample(n=n_samples, random_state=42)

In [None]:
all_contexts = list(
    set(
        context["text"]
        for _, row in hotpotqa.iterrows()
        for context in row["expected_chunks"]
    )
)

In [None]:
from sentence_transformers import SentenceTransformer

embed_model = SentenceTransformer(
    "Snowflake/snowflake-arctic-embed-m", trust_remote_code=True
)

In [None]:
ctx_embeddings = embed_model.encode(all_contexts)
ctx_embeddings.shape

In [None]:
import chromadb

chroma_client = chromadb.Client()
vector_store = chroma_client.get_or_create_collection(name="hotpotqa_contexts")

vector_store.upsert(
    [str(i) for i in range(len(all_contexts))],
    documents=all_contexts,
    embeddings=ctx_embeddings,
)

In [None]:
from snowflake.cortex import complete
from trulens.core.otel.instrument import instrument
from trulens.otel.semconv.trace import SpanAttributes


class TestApp:
    # TODO Not technically the right way to pass ground truth output, but using it as a workaround for this bug bash.
    def __init__(self, generation_model: str, vector_store):
        self.vector_store = vector_store
        self.generation_model = generation_model

    @instrument()
    def query(self, query: str) -> str:
        retrieved_contexts = self.get_contexts(query)
        return self.generation(query, retrieved_contexts)

    @instrument(
        span_type=SpanAttributes.SpanType.RETRIEVAL,
        attributes=lambda ret, exception, *args, **kwargs: {
            SpanAttributes.RETRIEVAL.QUERY_TEXT: args[1],
            SpanAttributes.RETRIEVAL.NUM_CONTEXTS: len(ret),
            SpanAttributes.RETRIEVAL.RETRIEVED_CONTEXTS: ret,
        },
    )
    def get_contexts(self, query: str, n_results: int = 3) -> list[str]:
        try:
            response = self.vector_store.query(
                query_embeddings=embed_model.encode(
                    [query], prompt_name="query"
                ),
                n_results=n_results,
            )
            return response["documents"][0]
        except Exception:
            return []

    @instrument(
        span_type=SpanAttributes.SpanType.GENERATION,
    )
    def generation(self, query: str, contexts: list[str]) -> str:
        prompt_template = "Generate a response to the following question: \n\n{}\n\nContext: \n\n{}"
        prompt = prompt_template.format(query, "\n".join(contexts))
        resp = complete(self.generation_model, prompt, session=snowpark_session)
        assert isinstance(resp, str)
        return resp

In [None]:
# Create TruLens instrumented app from custom app.

from datetime import datetime

from trulens.apps.app import TruApp

APP_NAME = f"{os.getlogin()} pupr e2e {datetime.now().strftime('%Y%m%d%H%M%S')}".upper()
APP_VERSION = "V3"

test_app = TestApp(generation_model="llama3.1-70b", vector_store=vector_store)
tru_app = TruApp(
    test_app,
    app_name=APP_NAME,
    app_version=APP_VERSION,
    connector=sf_connector,
    main_method=test_app.query,
)

In [None]:
print(tru_app.snowflake_object_type)  # EXTERNAL AGENT
print(tru_app.snowflake_object_name)  # APP_NAME
print(tru_app.snowflake_object_version)  # APP_VERSION

version_df = tru_app.snowflake_app_dao.list_agent_versions(APP_NAME)
print(version_df)

## Add runs to agent

In [None]:
from trulens.core.run import Run
from trulens.core.run import RunConfig

run_name = "test_run_0220".upper()

run_config = RunConfig(
    run_name=run_name,
    description="desc",
    dataset_name="My test dataframe name",
    source_type="DATAFRAME",
    label="label",
    dataset_spec={
        "input": "query",
        "ground_truth_output": "expected_response",
    },
)  # type: ignore

run: Run = tru_app.add_run(run_config=run_config)

In [None]:
run = tru_app.get_run(run_name=run_name)

In [None]:
run.describe()

In [None]:
tru_app.list_runs()

### Start the Run (a pandas DataFrame or rows in user's table.) to invoke user's app directly and start ingestion

#### here we will be using a user provided test dataframe

In [None]:
input_df = hotpotqa[["query", "expected_response"]]
input_df

In [None]:
run.start(input_df=input_df)

In [None]:
# Read the event table.

import time


def wait_for_nonzero_results(
    num_retries: int = 20, retry_cooldown_in_seconds: int = 5
):
    q = """
        SELECT
            *
        FROM
            table(snowflake.local.GET_AI_OBSERVABILITY_EVENTS(
                ?,
                ?,
                ?,
                'EXTERNAL AGENT'
            ))
        WHERE
            RECORD_ATTRIBUTES:"snow.ai.observability.run.name" = ?
        """
    for _ in range(num_retries):
        ret = snowpark_session.sql(
            q,
            params=[
                snowpark_session.get_current_database()[1:-1],
                snowpark_session.get_current_schema()[1:-1],
                APP_NAME,
                run_name,
            ],
        ).to_pandas()
        if len(ret) > 0:
            return ret
        time.sleep(retry_cooldown_in_seconds)
    raise ValueError("No results found!")


res = wait_for_nonzero_results()
res

In [None]:
res.iloc[-1]["RECORD_ATTRIBUTES"]

In [None]:
run.compute_metrics([
    "coherence",
    "answer_relevance",
    "context_relevance",
    "groundedness",
])