# 📊 Client-Side Custom Metrics with TruLens

This notebook demonstrates how to create and use client-side custom metrics with TruLens. Client-side custom metrics allow you to define your own evaluation functions that run locally on the client instead of on the server (Snowflake).

## Key Features

- **Custom Metric Decorator**: Use `@custom_metric` to convert any function into a metric
- **Flexible Selectors**: Map metric parameters to span attributes using selectors
- **Client-Side Computation**: Metrics are computed locally and results uploaded as OTEL spans
- **Seamless Integration**: Works with existing TruLens apps and runs

## Prerequisites

- OTEL tracing enabled
- TruLens feedback package installed
- Access to a TruLens app with instrumented methods


In [None]:
from dotenv import load_dotenv
from trulens.core import TruSession
from trulens.core.feedback.custom_metric import custom_metric
from trulens.core.otel.instrument import instrument

# Load environment variables
load_dotenv()


session = TruSession()
print("TruLens session initialized with OTEL tracing enabled")

## Step 1: Define Custom Metrics

Let's create some custom metrics using the `@custom_metric` decorator. These metrics will evaluate the quality of text-to-SQL generation.


In [None]:
# Define custom metrics using the decorator
@custom_metric(
    name="text2sql_accuracy", higher_is_better=True, metric_type="text2SQL"
)
def text_to_sql_accuracy(query: str, sql: str) -> float:
    """
    Evaluate the accuracy of text-to-SQL conversion.
    This is a simplified example - in practice you'd have more sophisticated logic.
    """
    if not sql or not query:
        return 0.0

    # Simple heuristics for demonstration
    score = 0.0

    # Check if SQL contains expected keywords based on query
    if "SELECT" in sql.upper():
        score += 0.3
    if "FROM" in sql.upper():
        score += 0.2
    if "WHERE" in sql.upper() and (
        "filter" in query.lower() or "where" in query.lower()
    ):
        score += 0.3
    if "movies" in query.lower() and "movies" in sql.lower():
        score += 0.2

    return min(score, 1.0)


@custom_metric(
    name="sql_syntax_check", higher_is_better=True, metric_type="syntax"
)
def sql_syntax_checker(sql: str) -> tuple[float, dict]:
    """
    Check SQL syntax validity.
    Returns both a score and metadata with explanation.
    """
    if not sql:
        return 0.0, {"explanation": "Empty SQL query"}

    # Basic syntax checks (simplified)
    sql_upper = sql.upper()
    issues = []

    if not sql_upper.startswith("SELECT"):
        issues.append("Query should start with SELECT")

    if "FROM" not in sql_upper:
        issues.append("Missing FROM clause")

    if sql.count("(") != sql.count(")"):
        issues.append("Unmatched parentheses")

    # Calculate score
    score = 1.0 - (len(issues) * 0.3)
    score = max(0.0, score)

    explanation = (
        "Valid SQL syntax" if not issues else f"Issues: {', '.join(issues)}"
    )

    return score, {"explanation": explanation, "issues_count": len(issues)}


# You can also create metrics without the decorator
def query_complexity_scorer(query: str) -> float:
    """Measure query complexity based on length and keywords."""
    if not query:
        return 0.0

    # Simple complexity scoring
    base_score = min(len(query.split()) / 10.0, 1.0)  # Based on word count

    # Bonus for complex keywords
    complex_keywords = ["JOIN", "GROUP BY", "ORDER BY", "HAVING", "SUBQUERY"]
    keyword_bonus = sum(
        0.1 for keyword in complex_keywords if keyword.lower() in query.lower()
    )

    return min(base_score + keyword_bonus, 1.0)


print("Custom metrics defined successfully!")

## Step 2: Create a Sample Application

Let's create a simple text-to-SQL application that we can evaluate with our custom metrics.


In [None]:
# Create a simple text-to-SQL application
class Text2SQLApp:
    """Simple text-to-SQL conversion app for demonstration."""

    def __init__(self):
        # Simple mapping for demo purposes
        self.templates = {
            "movies": "SELECT * FROM movies",
            "users": "SELECT * FROM users",
            "orders": "SELECT * FROM orders",
        }

    @instrument()  # This makes the method traceable
    def generate_sql(self, question: str) -> str:
        """
        Generate SQL query from natural language question.
        This is a simplified implementation for demonstration.
        """
        question_lower = question.lower()

        # Simple keyword-based SQL generation
        if "movies" in question_lower:
            base_query = "SELECT * FROM movies"
            if "released" in question_lower or "date" in question_lower:
                return base_query + " WHERE release_date >= '2020-01-01'"
            elif "genre" in question_lower:
                return base_query + " WHERE genre = 'Action'"
            return base_query

        elif "users" in question_lower:
            base_query = "SELECT * FROM users"
            if "active" in question_lower:
                return base_query + " WHERE status = 'active'"
            return base_query

        elif "orders" in question_lower:
            base_query = "SELECT * FROM orders"
            if "recent" in question_lower:
                return (
                    base_query
                    + " WHERE order_date >= CURRENT_DATE - INTERVAL '30 days'"
                )
            return base_query

        # Default fallback
        return "SELECT * FROM information_schema.tables"

    @instrument()
    def query(self, question: str) -> dict:
        """Main query method that generates SQL and returns result."""
        sql = self.generate_sql(question)
        return {"question": question, "sql": sql, "status": "success"}


# Create the app instance
app = Text2SQLApp()
print("Text2SQL app created successfully!")