In [6]:
# 1. Setup and initialization
import weaviate
from weaviate.classes.init import Auth
from weaviate.classes.query import MetadataQuery

from openai import OpenAI
from langsmith.wrappers import wrap_openai
from langsmith import traceable, evaluate

import os
from dotenv import load_dotenv

load_dotenv()

# Initialize clients
wcd_url = os.environ["WCD_URL"]
wcd_api_key = os.environ["WCD_API_KEY"]

client = weaviate.connect_to_weaviate_cloud(
    cluster_url=wcd_url,                                    
    auth_credentials=Auth.api_key(wcd_api_key),
)
openai_client = wrap_openai(OpenAI(api_key=os.getenv('OPENAI_API_KEY')))

            Please make sure to close the connection using `client.close()`.


In [9]:
@traceable
def tesla_agent(inputs: dict) -> dict:

    query = inputs["messages"][-1]["content"]

    print (query)
    response = openai_client.embeddings.create(
        model="text-embedding-3-large",
        input=query
    )
    query_embedding = response.data[0].embedding

    collection_name = "Vladimir"

    collection = client.collections.get(collection_name)
    similar_texts = collection.query.near_vector(
        near_vector=query_embedding,
        limit=3,
        return_properties=["text"],
        return_metadata=MetadataQuery(distance=True)
    )

    context_str = "\n\n---\n\n".join([doc.properties["text"] for doc in similar_texts.objects])

    prompt = f"""Answer the question using ONLY the information provided in the context below. 
    Do not add any general knowledge or information not contained in the context."

    Context:
    {context_str}

    Question: {query}

    Answer:"""

    print(prompt)
    
    response = openai_client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that answers questions based on the provided context."},
            {"role": "user", "content": prompt}
        ],
        temperature=0
    )

    return {
        "message": {
            "role": "assistant",
            "content": response.choices[0].message.content
        }
    }

In [10]:
# The name or UUID of the LangSmith dataset to evaluate on.
data = "rag_evaluation_dataset"

# A string to prefix the experiment name with.
experiment_prefix = "Tesla agent tests"

def correctness_evaluator(run, example) -> dict:
    # Extract the original query problem from inputs
    user_query = run.inputs["inputs"]["messages"][-1]["content"]
    
    # Extract the model's generated tests
    agent_response = run.outputs["message"]["content"]
    
    # Rest of the evaluation logic remains the same
    evaluation_prompt = f"""
    Given this question from an owner of a Tesla:
    {user_query}

    Evaluate this repsopnse from the Tesla support agent:
    {agent_response}
    
    Score from 0-4:
    4 = The response is accurate and complete. It provides all the necessary information and is well-structured. It is succint and clear.
    3 = The response is accurate and complete. It provides all the necessary information but may not be well-structured or succint. It is clear.
    2 = The solution is partially correct. It provides some of the necessary information but is missing some key details. It may be poorly structured or unclear.
    1 = The solution is partially correct. It provides some of the necessary information but is missing key details. It is poorly structured or unclear.
    0 = The solution is incorrect, irrelevant, or missing key details. It is poorly structured or unclear.
    
    Return only the number (0-4).
    """
    
    response = openai_client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are a support agent evaluation assistant. Respond only with a number 0-4."},
            {"role": "user", "content": evaluation_prompt}
        ],
        temperature=0
    )
    
    try:
        score = int(response.choices[0].message.content.strip())
        return {
            "key": "response score",
            "score": score / 4,  # Normalize to 0-1
            "explanation": f"Test response score: {score}/4"
        }
    except ValueError:
        return {
            "key": "response score",
            "score": 0,
            "explanation": "Failed to parse score"
        }

In [11]:
# List of evaluators to score the outputs of target task
evaluators = [
    correctness_evaluator
]

# Evaluate the target task
results = evaluate(
    tesla_agent,
    data=data,
    evaluators=evaluators,
    experiment_prefix=experiment_prefix
)

View the evaluation results for experiment: 'Tesla agent tests-6d3b08af' at:
https://smith.langchain.com/o/d0bb7181-4c71-4754-8ed4-d7f6829374d1/datasets/57491992-4dcd-409e-9b2f-561b8031e281/compare?selectedSessions=913dc88d-5761-4604-8ce9-4f7dbd166f35




0it [00:00, ?it/s]

any tips for connecting accessories to power feed safely?
Answer the question using ONLY the information provided in the context below. 
    Do not add any general knowledge or information not contained in the context."

    Context:
    Accessing the Power Feed on the Roof

1. Ensure that Cybertruck is powered off (see Powering Off on page 72).


2. Locate the strip of applique on the right side of the roof and use a flat, non-marring tool to gently lift under the edge and release the five clips that hold it in place. Lift upwards to remove the applique.

   [Image showing the location of the applique and the process of lifting it]

3. Locate the shrink-wrapped loop of wires. This is the power feed.

   [Image showing the location of the shrink-wrapped loop of wires]

4. Remove the shrink wrap from the power feed, then splice or solder the accessory's wire harness to it.

5. Ensure your accessory or attachment is properly secured to the vehicle.

6. Replace the roof applique by pressi