# Streamlit example development tuning

In this notebook, we load a few configurations of the streamlit core app and run a test set through TruLens evaluations. This shows how to test and find the optimal configuration of any LLM app, carefully keeping track of configuration metadata.

In [None]:
import os
from dotenv import load_dotenv
from common_ui import TruCustomApp, generator, ModelConfig, feedbacks_rag
import streamlit as st

from trulens_eval import Tru

load_dotenv()

tru = Tru()
tru.run_dashboard()

## Create the test set

In [None]:
from schema import Conversation, Message

def create_conversation_from_user_message(user_message: str) -> Conversation:
    conversation = Conversation()
    conversation.add_message(Message(role="user", content=user_message))
    return conversation

In [None]:
prompts = [
    "How do I launch a streamlit app?",
    "How can I capture the state of my session in streamlit?",
    "How do I install streamlit?",
    "How do I change the background color of a streamlit app?",
    "What's the advantage of using a streamlit form?",
    "What are some ways I should use checkboxes?",
    "How can I conserve space and hide away content?",
    "Can you recommend some resources for learning Streamlit?",
    "What are some common use cases for Streamlit?",
    "How can I deploy a streamlit app on the cloud?",
    "How do I add a logo to streamlit?",
    "What is the best way to deploy a Streamlit app?",
    "How should I use a streamlit toggle?",
    "How do I add new pages to my streamlit app?",
    "How do I write a dataframe to display in my dashboard?",
    "Can I plot a map in streamlit? If so, how?",
    "How do vector stores enable efficient similarity search?",
    "How do I prevent my child from using the internet?",
    "What should I pack for a camping trip this weekend?",
    "How do I defend myself against bear attacks?"
]

conversations = [create_conversation_from_user_message(prompt) for prompt in prompts]


## Test the first app

In [None]:

tru_recorder_1 = TruCustomApp(generator, app_id="app-dev-1", metadata={"model": "Snowflake Arctic", "use_rag": True, "temperature": 0.2, "top_p": 0.1}, feedbacks=feedbacks_rag)
generator.set_retriever_api_key(os.environ['PINECONE_API_KEY'])

with tru_recorder_1:
    for conversation in conversations:
        model_config_1 = ModelConfig(model="Snowflake Arctic", use_rag=True, temperature=0.2, top_p=0.1)
        chat = st.chat_message("assistant")
        conversation.model_config = model_config_1
        user_message, prompt = generator.prepare_prompt(conversation)
        if conversation.model_config.use_rag:
            text_response = generator.retrieve_and_generate_response(user_message, prompt, conversation, chat)

## Test the second app

In [None]:
tru_recorder_2 = TruCustomApp(generator, app_id="app-dev-2", metadata={"model": "Snowflake Arctic", "use_rag": True, "temperature": 0.8, "top_p": 0.9}, feedbacks=feedbacks_rag)

In [None]:
generator.set_retriever_api_key(os.environ['NEW_PINECONE_API_KEY'])

with tru_recorder_2:
    for conversation in conversations:
        model_config_2 = ModelConfig(model="Snowflake Arctic", use_rag=True, temperature=0.8, top_p=0.9)
        chat = st.chat_message("assistant")
        conversation.model_config = model_config_2
        user_message, prompt = generator.prepare_prompt(conversation)
        if conversation.model_config.use_rag:
            generator.retrieve_and_generate_response(user_message, prompt, conversation, chat)