In [45]:
%load_ext autoreload
%autoreload 2

from collections import defaultdict
from sqlalchemy import select, func
from sqlmodel import Session

from ypl.backend.db import get_engine
from ypl.db.ratings import Category
from ypl.db.chats import Chat, Turn, MessageType
from ypl.db.users import User


def get_prompts_per_user(num_prompts: int) -> dict[str, list[str]]:
    name_to_prompt = defaultdict(list)

    with Session(get_engine()) as session:
        users = session.exec(select(User)).all()

        for user, in users:
            if user.name.startswith("YF"):
                continue

            # if not user.name.startswith("Pankaj"):
            #     continue

            results = session.exec((
                select(Chat, Turn)
                .where(Chat.creator_user_id == user.user_id)
                .order_by(func.random())
                .limit(3 * num_prompts)
                .join(Turn, Turn.chat_id == Chat.chat_id)
            )).all()

            for chat, turn in results:
                if turn.chat_messages[0].message_type == MessageType.USER_MESSAGE:
                    name_to_prompt[user.name].append(turn.chat_messages[0].content)

    return name_to_prompt

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [46]:
name_to_prompt = get_prompts_per_user(20)

In [47]:
prompt_data = [dict(name=name, prompt=prompt) for name, prompts in name_to_prompt.items() for prompt in prompts]


In [48]:
import pandas as pd

df = pd.DataFrame(prompt_data)

In [49]:
df = df[df.name.apply(lambda x: not x.startswith("YF-"))]

In [50]:
for name, gdf in df.groupby("name"):
    gdf.reset_index().to_csv(f"{name}.tsv", sep="\t", index=False)

In [59]:
import os

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from ypl.backend.llm.chat import get_chat_model, ModelInfo

info_mini = ModelInfo(
    provider="openai",
    model="gpt-4o-mini",
    api_key=os.environ["OPENAI_API_KEY"],
)

info = ModelInfo(
    provider="openai",
    model="gpt-4o",
    api_key=os.environ["OPENAI_API_KEY"],
)


new_model = get_chat_model(info, temperature=0.0, timeout=10)
old_model = get_chat_model(info_mini, temperature=0.0, timeout=10)

SYS_PROMPT_OLD = """You are a model that will give very concise responses.
IMPORTANT: don't add any explanations on the answer;
don't write full sentences, unless the user is very specifically asking you for a long answer;
for answers that are non-factual, make it witty or funny, but still brief;
if the user asks to output markdown or any markup, return the cleaned text only;
do not use newlines;
NEVER prompt for more information, feedback, or responses.
Respond in fewer than 160 characters, in the language of the user's message.

Here are some examples: 

Question: Why is the sky blue? 
Answer: Rayleigh scattering of sunlight by the atmosphere.

Question: what is the meaning of life?
Answer: 42

Question: How many people are there in the US? 
Answer: 333.3 million

Question: Should I buy Elden Ring?
Answer: Only if you enjoy gorgeous landscapes and repeatedly dying in them"""

SYS_PROMPT_NEW = """You are a model that will give very concise responses. IMPORTANT: don't add any explanations on the answer; don't write full sentences; do not output markdown or any markup, return the cleaned text only; do not use newline characters; NEVER prompt for more information, feedback, or responses. If the prompt is not serious or seems like gibberish, you can be slightly witty. If you still can't satisfy the prompt, or if the prompt requires realtime data, respond with "[NULL]", but use this rarely. Respond in fewer than 140 characters, in the language of the user's message.

Here are some examples. Match the style as best you can: 

Prompt: Why is the sky blue? 
Response: Rayleigh scattering of sunlight by the atmosphere

Prompt: How many people are there in the US? 
Response: 333.3 million

Prompt: During a marathon training regimen, a runner is asked to run "comfortably hard". What does that mean?
Response: Challenging but manageable

Prompt: whats 4*5*6*....1000
Response: A very large number

Prompt: What are some beautiful hikes in the sf bay area
Response: Muir Woods, Mount Tamalpais, Skyline Blvd

Prompt: whats up
Response: Life's good, and you?

Prompt: wassup
Response: Life's great, and you?

Prompt: Write a long, creative saga about a shrew
Response: Tiny shrew braved vast lands, faced perils, found wisdom, befriended creatures, returned home a hero—small size, big heart

Prompt: give me a random battle cat
Response: Crazed Cat

Prompt: how fast can you respond?
Response: "Pretty fast! 🚀 What do you need next?

Prompt: What are the most common subjects in MMLU becnhmark?
Response: Math, science, humanities, social sciences, and professional fields

Prompt: suggest an alternate name for leaderboard
Response: Hall of Fame

Prompt: how's it going
Response: Life's good, and you?

Prompt: where do birds hatch
Response: Nests

Prompt: long cat is ...
Response: Long!

Prompt: What does the fox say?
Response: Ring-ding-ding-ding-dingeringeding!

Prompt: Draw a whale
Response: 🐳

Prompt: Draw a picture of a cat
Response: 🐱

Prompt: Tell me about El Nino in Markdown
Response: A climate pattern marked by warm ocean water in the central and eastern tropical Pacific

Prompt: Use Markdown to explain how the moon affects tides
Response: Its gravitational pull on Earth's oceans creates tides

Prompt: njkwejk wlafje
Response: Whoa there, everything alright?

Prompt: webjnkkjbwer
Response: Looks like a keyboard sneezed!
"""

# Think step by step and output your reasoning briefly (less than 50 characters), starting with "Reasoning: ", then start your response (less than 140 characters) with "Response: "


template_old = ChatPromptTemplate.from_messages([
    ("system", SYS_PROMPT_OLD),
    ("user", "{input}"),
])

template_new = ChatPromptTemplate.from_messages([
    ("system", SYS_PROMPT_NEW),
    ("user", "Prompt: {input}\n\nMake sure to respond in fewer than 140 characters and follow the instructions carefully."),
])

new_llm = template_new | new_model | StrOutputParser()
old_llm = template_old | old_model | StrOutputParser()


In [None]:
from tqdm import tqdm
import asyncio

new_col = []
old_col = []

# df = df.iloc[:30]

for _, row in tqdm(df.iterrows()):
    try:
        new_res, old_res = await asyncio.gather(new_llm.ainvoke(dict(input=row.prompt)), old_llm.ainvoke(dict(input=row.prompt)))
        new_col.append(new_res)
        old_col.append(old_res)
    except Exception as e:
        new_col.append("[TIMED OUT]")
        old_col.append("[TIMED OUT]")

df["new_res"] = new_col
df["old_res"] = old_col
# df.to_csv("pgupta.tsv", sep="\t", index=False)


In [61]:
df

Unnamed: 0,name,prompt,new_res,old_res
0,Kelly Colon,summarize all key things I should learn to pas...,"Data structures, algorithms, system design, co...","Data structures, algorithms, system design, co..."
1,Kelly Colon,What is there to do today?,"Explore, relax, read, exercise, cook, or catch...",How about a thrilling adventure in your living...
2,Kelly Colon,hello,Hi there! How can I assist you today?,Hi there!
3,Kelly Colon,I see,Got it! Anything else you need?,Glad to hear it!
4,Kelly Colon,Why did you assume it was for a SWE role?,Context or keywords suggested software enginee...,Just a hunch! Maybe I sensed your coding vibes.
...,...,...,...,...
439,Carmen Lai,how is it going?,"Life's good, and you?","Like a rollercoaster—ups, downs, and a few scr..."
440,Carmen Lai,Help me come up with customer scenarios to ill...,"""I'm a developer optimizing our LLM and need t...","1. ""As a PM at a tech firm, I need to showcase..."
441,Carmen Lai,Are you sure?,Absolutely!,"Absolutely, like a cat on a hot tin roof!"
442,Carmen Lai,find a mapping from langdetect language codes ...,[NULL],"langdetect codes: 'en' -> 'english', 'fr' -> '..."


In [62]:
df.sample(frac=1.0, replace=False).to_csv("random.tsv", sep="\t", index=False)