In [1]:
%load_ext autoreload
%autoreload 2

import sys
from typing import Any
from sqlalchemy import func
from sqlmodel import Session, select
import logging
import json
import random


sys.path.append("..")

from ypl.backend.config import settings
from ypl.backend.db import get_engine
from ypl.db.chats import ChatMessage, LanguageCode, MessageType, Turn 
from ypl.db.payments import PaymentTransaction
from ypl.backend.llm.chat import ModelInfo, get_chat_model
from ypl.backend.llm.constants import ChatProvider
from ypl.backend.llm.judge import YuppPromptDifficultyLabeler, YuppPromptDifficultyWithCommentLabeler
# Make the HTTP requests less chatty.
logging.getLogger("httpx").setLevel(logging.WARNING)

PyTorch version 2.4.1 available.


In [2]:
judge_llm = get_chat_model(
    ModelInfo(provider=ChatProvider.OPENAI, model="gpt-4o", api_key=settings.OPENAI_API_KEY),
    temperature=0.0,
)
comment_labeler = YuppPromptDifficultyWithCommentLabeler(judge_llm, timeout_secs=10)
full_labeler = YuppPromptDifficultyLabeler(judge_llm, timeout_secs=10)

In [3]:
# Get turns to label.

num_parallel = 5
max_num_turns = 60
prompts_responses = []
message_ids = []

with Session(get_engine()) as session:
    turn_ids = session.exec(
        select(Turn.turn_id)
        .order_by(func.random())
        .limit(max_num_turns)
    ).all()
    
    for turn_id in turn_ids:
        messages = session.exec(
            select(ChatMessage.message_id, ChatMessage.content, ChatMessage.message_type)
            .where(ChatMessage.turn_id == turn_id)
            .order_by(ChatMessage.created_at)
        ).all()
        
        prompt = None
        for message_id, content, message_type in messages:
            if message_type == MessageType.USER_MESSAGE:
                prompt = content 
                break

        if prompt is None:
            continue
            
        responses = []
        for message_id, content, message_type in messages:
            if message_type == MessageType.ASSISTANT_MESSAGE:
                response = content[:1300] + '...' if len(content) > 1300 else content  
                responses.append(response)

        if len(responses) >= 2:
            responses = responses[:2]
            prompts_responses.append((prompt, responses[0], responses[1]))
            message_ids.append(message_id)
        
print(f"Collected {len(prompts_responses)} prompt-response pairs")

print(prompts_responses[0])
print(prompts_responses[1])


Collected 36 prompt-response pairs
('Write me a sunglass ad in which if you buy 2 pairs of sunglasses you get a free pair of sunglasses, The sunglasses are from an unknown brand and are unique', '\n\n*The ad opens on a world bathed in a hazy, neon-lit cityscape. Rain streaks down the windows of a skyscraper office. Inside, a lone figure stands at the window, gazing out at the glowing metropolis.*\n\n**(Voiceover, cool and smooth):**  Tired of the same old shades?  The same boring styles, the same predictable lenses?  \n\n**(The figure turns, revealing a striking pair of sunglasses with a geometric, almost alien design. They are a vibrant, metallic blue)**\n\n**(Voiceover):**  Introducing **Aether Optics**.  Sunglasses designed for the pioneers, the dreamers, the ones who see beyond the ordinary.\n\n**(Cut to quick cuts of people wearing different Aether Optics designs: a pair with iridescent lenses reflecting the city lights, a pair with sleek, minimalist frames, a pair with wings exte

In [4]:
comment_results = await comment_labeler.abatch_label_full(prompts_responses, num_parallel=num_parallel)
print(comment_results[0])

100%|██████████| 36/36 [00:09<00:00,  3.60it/s]

(4, '{\n  "overall": 4,\n  "comment": "Creative task with clear promotional elements.",\n  "positive_notes": ["Engaging 🎯", "Creative 🎨"],\n  "negative_notes": ["Generic 😐", "Unclear 🤔"]\n}\n')





In [5]:
examples = list(zip(prompts_responses, comment_results))

def short_str(s: str, max_len: int = 140) -> str:
    short = s[:max_len] + '...' if len(s) > max_len else s
    return short.replace('\n', ' ')

for (prompt, response1, response2), (res, expl) in random.sample(examples, k=20):
    d = json.loads(expl)
    print("=" * 100)
    print(f'prompt:   \t{short_str(prompt)}')
    print("-" * 50)
    print(f'response1:\t{short_str(response1)}')
    print(f'response2:\t{short_str(response2)}')
    print("-" * 50)
    print(f'comment:  \t{d.get("comment", "???")}')
    print(f'positive_notes:\t{d.get("positive_notes", "???")}')
    print(f'negative_notes:\t{d.get("negative_notes", "???")}')
    print("=" * 100)
    print()


prompt:   	Write a full 1800s style poem about the word “synthesis”
--------------------------------------------------
response1:	**Synthesis**  In realms where thought and reason reign,   Where science weaves her subtle chain,   There lies a word, both rare and bright,...
response2:	**Synthesis: A Poem of Union**  In twilight’s glow, where knowledge sways,   Amidst the realms of thought’s embrace,   A whispered word, lik...
--------------------------------------------------
comment:  	Crafting period-specific poetry requires creativity and historical insight.
positive_notes:	['Authentic 🎩', 'Creative 🌟']
negative_notes:	['Inaccurate ❌', 'Uninspired 😐']

prompt:   	Do you have a list of the top 30 richest people as of 2023?
--------------------------------------------------
response1:	  I do not have access to real-time information, including constantly fluctuating data like stock prices and net worths.   To get the most u...
response2:	As of my knowledge cutoff in October 2023, here's