# Debug Curator

In [1]:
%reload_ext autoreload
%autoreload 2

In [8]:
import os, json
from datasets import load_dataset
from pydantic import BaseModel

In [5]:
from speechless.reasoning.utils.code_execution_taco import process_dataset_parallel
from speechless.reasoning.utils.prompt import SKY_T1_SYSTEM_PROMPT, generate_prompt

from bespokelabs import curator

In [6]:
class TACOCurator(curator.LLM):
    """Curator class for processing TACO (Testing Algorithmic Coding prOblems) dataset.

    Handles prompting the LLM and parsing responses for code generation.
    """

    return_completions_object = True

    def prompt(self, problem):
        """Parse test cases and starter code from problem to create a prompt for the LLM."""
        test_case = json.loads(problem["input_output"])
        starter_code = problem["starter_code"]
        # Generate prompt text using test case, question and starter code
        prompt_text = generate_prompt(test_case, problem["question"], starter_code)
        return [{"role": "system", "content": SKY_T1_SYSTEM_PROMPT}, {"role": "user", "content": prompt_text}]

    def parse(self, input, response):
        """Parse the LLM response to extract reasoning and solution."""
        input["reasoning"] = response["choices"][0]["message"]["reasoning_content"]
        input["deepseek_solution"] = response["choices"][0]["message"]["content"]
        return input

In [7]:

class APPSCurator(curator.LLM):
    """Curator class for processing APPS (Automated Programming Problems Solutions) dataset."""

    return_completions_object = True

    def prompt(self, problem):
        """Parse test cases and starter code from problem to create a prompt for the LLM."""
        test_case = json.loads(problem["input_output"])
        starter_code = problem["starter_code"]
        prompt_text = generate_prompt(test_case, problem["question"], starter_code)
        return [{"role": "system", "content": SKY_T1_SYSTEM_PROMPT}, {"role": "user", "content": prompt_text}]

    def parse(self, input, response):
        """Parse the LLM response to extract reasoning and solution."""
        input["reasoning"] = response["choices"][0]["message"]["reasoning_content"]
        input["deepseek_solution"] = response["choices"][0]["message"]["content"]
        return input

In [9]:
class JudgeResult(BaseModel):
    """Result of the judge's evaluation."""
    correct: bool
    reasoning: str


class Numina162KJudge(curator.LLM):
    """Curator class for processing Numina dataset."""

    response_format = JudgeResult

    def prompt(self, input):
        """Create a prompt for the judge to evaluate the correctness of a solution."""
        return f"""
        You are a judge that evaluates the correctness of a solution.
        You will be given a solution and a ground truth solution.
        You will need to determine if the solution is correct.
        Answers are in the format of \\boxed{{}}.

        SOLUTION: {input["deepseek_solution"]}
        GROUND TRUTH SOLUTION: {input["ground_truth_solution"]}
        """

    def parse(self, input, response):
        """Parse the judge's response to extract correctness and reasoning."""
        return {**input, "correct": response.correct, "judge_reasoning": response.reasoning}


def extract_boxed_answer(text):
    """Extract the boxed answer from the text."""
    from speechless.reasoning.testing.math import extract_answer, strip_answer_string
    text = strip_answer_string(text)
    return extract_answer(text)

class Numina162K(curator.LLM):
    """Curator class for processing Numina dataset."""

    return_completions_object = True

    def prompt(self, input):
        """Create a prompt for the LLM to reason about the problem."""
        return [
            {"role": "system", "content": SKY_T1_SYSTEM_PROMPT},
            {"role": "user", "content": input["problem"]},
        ]

    def parse(self, input, response):
        """Parse the LLM response to extract reasoning and solution."""
        return [
            {
                "problem": input["problem"],
                "reasoning": response["choices"][0]["message"]["reasoning_content"],
                "deepseek_solution": response["choices"][0]["message"]["content"],
                "ground_truth_solution": input["solution"],
                "deepseek_final_answer": extract_boxed_answer(response["choices"][0]["message"]["content"]),
                "ground_truth_final_answer": extract_boxed_answer(input["solution"]),
            }
        ]
