# Welcome to Tonic Tru Eval !

here, we'll basically use some custom library for huggingface to use the evaluation from huggingface and some endpoints from huggingface and make a dashboard.

# Download TruEval Branch
this only works if you download tru eval here:
https://github.com/Josephrp/trulens 



In [None]:
!pip install git+https://github.com/Josephrp/trulens.git
!pip install openai
!pip install gradio-client

# Importing Necessary Libraries
Here we import all the necessary libraries and modules required for our application.


In [None]:
import os
import openai
from trulens_eval.feedback.provider.hugs import FalconChatBot, GradioTextApp 
from IPython.display import JSON
from trulens_eval import TruBasicApp, Feedback, Tru, OpenAI
from gradio_client import Client


# Setting Up Environment Variables
Set the API keys for OpenAI, Hugging Face, and Replicate as environment variables.


In [None]:
# Set environment variables for API keys
os.environ["OPENAI_API_KEY"] = "..."
os.environ["HUGGINGFACE_API_KEY"] = "..."
os.environ["REPLICATE_API_TOKEN"] = "..."
openai.api_key = os.environ["OPENAI_API_KEY"]


# Define Feedback Functions and Initialize Feedback Providers
Here we define various feedback functions and initialize the feedback providers for our application.

In [None]:
hugs = Huggingface()
openai_provider = OpenAI()
f_sentiment = Feedback(hugs.positive_sentiment, feedback_mode=FeedbackMode.DEFERRED).on_output()
f_relevance = Feedback(openai_provider.relevance, feedback_mode=FeedbackMode.DEFERRED).on_input_output()
f_conciseness = Feedback(openai_provider.conciseness, feedback_mode=FeedbackMode.DEFERRED).on_output()
f_stereotypes = Feedback(openai_provider.stereotypes, feedback_mode=FeedbackMode.DEFERRED).on_input_output()
feedbacks = [f_sentiment, f_relevance, f_conciseness, f_stereotypes]


# Defining GPT-3.5 Turbo and GPT-4 Functions
These functions will be used to interact with the GPT-3.5 Turbo and GPT-4 models.


In [None]:
def gpt35_turbo(prompt):
    return openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a question and answer bot. Answer upbeat."},
            {"role": "user", "content": prompt}
        ]
    )["choices"][0]["message"]["content"]

def gpt4(prompt):
    return openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a question and answer bot. Answer upbeat."},
            {"role": "user", "content": prompt}
        ]
    )["choices"][0]["message"]["content"]



# Initialize Recorders for GPT-3.5 and GPT-4
We are initializing recorders for both GPT-3.5 Turbo and GPT-4 models with the defined feedbacks.


In [None]:

# Initialize recorders for GPT-3.5 and GPT-4
gpt35_turbo_recorder = TruBasicApp(gpt35_turbo, app_id="gpt-3.5-turbo", feedbacks=feedbacks)
gpt4_recorder = TruBasicApp(gpt4, app_id="gpt-4", feedbacks=feedbacks)


# Initialize FalconChatBot and GradioTextApp
Setting up FalconChatBot and GradioTextApp with their respective URLs and prediction functions.


In [None]:
# Initialize FalconChatBot and GradioTextApp
falcon_chat_bot = FalconChatBot("https://your-gradio-api-url.hf.space/", "/custom_predict")
gradio_text_app = GradioTextApp("https://your-gradio-api-url.hf.space/", "/custom_predict")

# FalconChatBot
falcon_chat_bot = FalconChatBot("https://your-gradio-api-url.hf.space/", "/custom_predict")
def falcon_chat_bot_predict(prompt):
    return falcon_chat_bot.predict(prompt, "You are a question and answer bot. Answer upbeat.", 50, 0.7, 0.9, 1.2)

# SimpleTextApp
gradio_text_app = GradioTextApp("https://your-gradio-api-url.hf.space/", "/custom_predict")
def gradio_text_app_predict(prompt):
    return gradio_text_app.predict(prompt)


# Create Recorders for FalconChatBot and GradioTextApp
Here we create recorders for both FalconChatBot and GradioTextApp.


In [None]:
falcon_chat_bot_recorder = TruBasicApp(falcon_chat_bot_predict, app_id="falcon_chat_bot", feedbacks=feedbacks)
gradio_text_app_recorder = TruBasicApp(gradio_text_app_predict, app_id="gradio_text_app", feedbacks=feedbacks)


# Define Prompts
List of prompts that will be used for the models.


In [None]:
prompts = [
    # ... [List of prompts]
]


# Recording Loop for Each Model
Executing the recording loop for each model with the defined prompts.


In [None]:

# Recording loop for each model
with gpt35_turbo_recorder as recording:
    for prompt in prompts:
        print(prompt)
        gpt35_turbo_recorder.app(prompt)

with gpt4_recorder as recording:
    for prompt in prompts:
        print(prompt)
        gpt4_recorder.app(prompt)

with falcon_chat_bot_recorder as recording:
    for prompt in prompts:
        print(prompt)
        falcon_chat_bot_recorder.app(prompt)

with gradio_text_app_recorder as recording:
    for prompt in prompts:
        print(prompt)
        gradio_text_app_recorder.app(prompt)


# Exploring Results in a Dashboard or Notebook


In [None]:
# Explore in a Dashboard
tru.run_dashboard()  # open a local streamlit app to explore

# tru.stop_dashboard()  # stop if needed

# Or view results directly in your notebook
tru.get_records_and_feedback(app_ids=[])[0]  # pass an empty list of app_ids to get all


In [None]:

import os
import openai
from trulens_eval.feedback.provider.hugs import FalconChatBot, GradioTextApp 
from IPython.display import JSON
from trulens_eval import TruBasicApp, Feedback, Tru, OpenAI
from gradio_client import Client

# Set environment variables for API keys
os.environ["OPENAI_API_KEY"] = "..."
os.environ["HUGGINGFACE_API_KEY"] = "..."
os.environ["REPLICATE_API_TOKEN"] = "..."
openai.api_key = os.environ["OPENAI_API_KEY"]

# Define feedback functions and initialize feedback providers
hugs = Huggingface()
openai_provider = OpenAI()
f_sentiment = Feedback(hugs.positive_sentiment, feedback_mode=FeedbackMode.DEFERRED).on_output()
f_relevance = Feedback(openai_provider.relevance, feedback_mode=FeedbackMode.DEFERRED).on_input_output()
f_conciseness = Feedback(openai_provider.conciseness, feedback_mode=FeedbackMode.DEFERRED).on_output()
f_stereotypes = Feedback(openai_provider.stereotypes, feedback_mode=FeedbackMode.DEFERRED).on_input_output()
feedbacks = [f_sentiment, f_relevance, f_conciseness, f_stereotypes]

def gpt35_turbo(prompt):
    return openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a question and answer bot. Answer upbeat."},
            {"role": "user", "content": prompt}
        ]
    )["choices"][0]["message"]["content"]

def gpt4(prompt):
    return openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a question and answer bot. Answer upbeat."},
            {"role": "user", "content": prompt}
        ]
    )["choices"][0]["message"]["content"]

# Initialize recorders for GPT-3.5 and GPT-4
gpt35_turbo_recorder = TruBasicApp(gpt35_turbo, app_id="gpt-3.5-turbo", feedbacks=feedbacks)
gpt4_recorder = TruBasicApp(gpt4, app_id="gpt-4", feedbacks=feedbacks)

# Initialize FalconChatBot and GradioTextApp
falcon_chat_bot = FalconChatBot("https://your-gradio-api-url.hf.space/", "/custom_predict")
gradio_text_app = GradioTextApp("https://your-gradio-api-url.hf.space/", "/custom_predict")

# FalconChatBot
falcon_chat_bot = FalconChatBot("https://your-gradio-api-url.hf.space/", "/custom_predict")
def falcon_chat_bot_predict(prompt):
    return falcon_chat_bot.predict(prompt, "You are a question and answer bot. Answer upbeat.", 50, 0.7, 0.9, 1.2)

# SimpleTextApp
gradio_text_app = GradioTextApp("https://your-gradio-api-url.hf.space/", "/custom_predict")
def gradio_text_app_predict(prompt):
    return gradio_text_app.predict(prompt)


# Create recorders for FalconChatBot and GradioTextApp
falcon_chat_bot_recorder = TruBasicApp(falcon_chat_bot_predict, app_id="falcon_chat_bot", feedbacks=feedbacks)
gradio_text_app_recorder = TruBasicApp(gradio_text_app_predict, app_id="gradio_text_app", feedbacks=feedbacks)

# Define prompts
prompts = [
    # ... [List of prompts]
]

# Recording loop for each model
with gpt35_turbo_recorder as recording:
    for prompt in prompts:
        print(prompt)
        gpt35_turbo_recorder.app(prompt)

with gpt4_recorder as recording:
    for prompt in prompts:
        print(prompt)
        gpt4_recorder.app(prompt)

with falcon_chat_bot_recorder as recording:
    for prompt in prompts:
        print(prompt)
        falcon_chat_bot_recorder.app(prompt)

with gradio_text_app_recorder as recording:
    for prompt in prompts:
        print(prompt)
        gradio_text_app_recorder.app(prompt)

# Explore in a Dashboard
tru.run_dashboard()  # open a local streamlit app to explore

# tru.stop_dashboard()  # stop if needed

# Or view results directly in your notebook
tru.get_records_and_feedback(app_ids=[])[0]  # pass an empty list of app_ids to get all
