# Notebook Example
Use a simple in-memory vector store for RAG with Arctic-Embed and Optimzed Inference

In [None]:
# Temp: Replace with some other LLM for feedback so these are not needed
from dotenv import load_dotenv

load_dotenv()


In [None]:
from src.observability import start_observability

session = start_observability()

session.reset_database()

## Create and Load Vector Store

In [None]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader("https://www.trulens.org/")
docs = loader.load()

In [None]:
from src.vector_store import VectorStore

vector_store = VectorStore()

chunks = vector_store.split_documents(documents = docs)

vector_store.add_chunks(chunks)

## Set LLM

In [None]:
from src.generation import ChatModel

chat_model = ChatModel(generation_model_name="meta-llama/Llama-3.2-3B-Instruct") # TODO: Replace with arctic-training optimized model

## Create Evals, Register App

In [None]:
from src.observability import create_evals
from trulens.providers.litellm import LiteLLM

provider = LiteLLM("mistral/open-mistral-7b") # TODO: Replace with arctic-training optimized model

evals = create_evals(provider = provider)

In [None]:
from src.rag import Rag
from trulens.apps.app import TruApp

rag = Rag(chat_model=chat_model, vector_store=vector_store)

tru_rag = TruApp(
    rag,
    app_name="RAG",
    app_version="snowflake-oss",
    feedbacks = evals
)

## Generate an Answer

Next, we define a query and use the retrieval function to find relevant documents.

In [None]:
with tru_rag:
    llm_response = rag.retrieve_and_generate("What evaluations should be used to assess the quality of a RAG system?")

## Display

In [None]:
from IPython.display import HTML, display

html_content = f"<div style='white-space: pre-wrap; border: 1px solid #ccc; padding: 10px; background-color: white; color: black; font-size: 16px;'>{llm_response}</div>"
display(HTML(html_content))

In [None]:
from trulens.dashboard import run_dashboard

run_dashboard()