In [17]:
OUTPUT_FILE = "teacher_finetune_tweak_dataset.jsonl"

In [18]:
import os
import pandas as pd
import glob
import json
import re
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
from dotenv import load_dotenv
import requests

In [19]:
# Load environment variables from .env file
load_dotenv()

True

## Load Questions similar to ta

In [20]:
def load_all_jsonl_to_dataframe(file_path):
    """Load all JSONL files from a folder into a pandas DataFrame."""
    
    try:
        # Read file into dataframe
        df = pd.read_json(file_path, lines=True)
        # df['source_file'] = os.path.basename(file_path)  # Optional: track source file
        print(f"Loaded {len(df)} rows from {os.path.basename(file_path)}")
        return df
    except Exception as e:
        print(f"Error loading {file_path}: {e}")

# Usage
file_path = "../top_600_ripe_0_similar/ripe-0-similar-ripe-epochs-30/grpo.jsonl"
raw_data = load_all_jsonl_to_dataframe(file_path)

Loaded 211 rows from grpo.jsonl


## Display sample data

In [21]:
# Display the first few rows
raw_data.head(100)

Unnamed: 0,problem,answer
0,\nConsider all possible quadratic polynomials ...,50
1,\nDecide whether there is an integer \( n > 1 ...,561
2,\nWe wrote the numbers from 1 to 2009 on a pie...,2009
3,$(1)$ $f(n)$ is a function defined on the set ...,364
4,$(1)$ State the operation laws or rules used i...,-\frac{1}{2}
...,...,...
95,"For the set $M$, define the function $f_M(x) =...",16
96,For what is the smallest $n$ such that there e...,44
97,"Four different natural numbers, of which one i...",40
98,"Four people, A, B, C, and D, stand on a stairc...",2394


In [22]:
print(raw_data.iloc[0]['problem'])


Consider all possible quadratic polynomials $x^2 + px + q$ with a positive discriminant, where the coefficients $p$ and $q$ are integers divisible by 5. Find the largest natural number $n$ such that for any polynomial with the described properties, the sum of the hundredth powers of the roots is an integer divisible by $5^n$. Let's think step by step and output the final answer within \boxed{}.


## Solution Extraction Utilities

Functions to extract solutions from model outputs and compute correctness.

In [23]:
def remove_boxed(s):
    if s is None:
        return None

    left = "\\boxed{"

    if s[:len(left)] != left:
        return None
    if s[-1] != "}":
        return None

    return s[len(left):-1]


def last_boxed_only_string(string):
    idx = string.rfind("\\boxed")
    if "\\boxed " in string:
        return "\\boxed " + string.split("\\boxed ")[-1].split("$")[0]
    if idx < 0:
        idx = string.rfind("\\fbox")
        if idx < 0:
            return None

    i = idx
    right_brace_idx = None
    num_left_braces_open = 0
    while i < len(string):
        if string[i] == "{":
            num_left_braces_open += 1
        if string[i] == "}":
            num_left_braces_open -= 1
            if num_left_braces_open == 0:
                right_brace_idx = i
                break
        i += 1

    if right_brace_idx is None:
        retval = None
    else:
        retval = string[idx:right_brace_idx + 1]

    return retval

def extract_solution(text):
    return remove_boxed(last_boxed_only_string(text))

def correctness_reward_func(response: str, actual_answers: str) -> float:
    extracted_answer = extract_solution(response)
    return 1.0 if extracted_answer == actual_answers else 0.0

## Get Teacher Reasoning Traces

In [67]:
print(tweak_dataset[0]['problem'])


Consider all possible quadratic polynomials $x^2 + px + q$ with a positive discriminant, where the coefficients $p$ and $q$ are integers divisible by 5. Find the largest natural number $n$ such that for any polynomial with the described properties, the sum of the hundredth powers of the roots is an integer divisible by $5^n$. Plan your answer. Check your answer against everything in the question and if it is not correct, go back to your last checked answer. Think step by step and output the final answer within \boxed{}.


In [44]:
from groq import Groq
client = Groq()

In [65]:
from groq import Groq

client = Groq()

model="qwen-qwq-32b"

stream = client.chat.completions.create(
    #
    # Required parameters
    #
    messages=[
        # Set a user message for the assistant to respond to.
        {
            "role": "user",
            "content": tweak_dataset[0]['problem'],
        }
    ],

    # The language model which will generate the completion.
    model=model,

    #
    # Optional parameters
    #

    # Controls randomness: lowering results in less random completions.
    # As the temperature approaches zero, the model will become deterministic
    # and repetitive.
    temperature=0.8,

    # The maximum number of tokens to generate. Requests can use up to
    # 2048 tokens shared between prompt and completion.
    max_completion_tokens=32767,
    top_p=0.95,

    # A stop sequence is a predefined or user-specified text string that
    # signals an AI to stop generating content, ensuring its responses
    # remain focused and concise. Examples include punctuation marks and
    # markers like "[end]".
    stop=None,

    stream=True,
)

# Print the incremental deltas returned by the LLM.
for chunk in stream:
    next_text = chunk.choices[0].delta.content
    if next_text:
        print(next_text, end="")


<think>
Okay, so I need to find the largest natural number n such that for any quadratic polynomial of the form x² + px + q, where p and q are integers divisible by 5, and the discriminant is positive, the sum of the hundredth powers of the roots is divisible by 5ⁿ. Hmm, let's break this down step by step.

First, let me recall that for a quadratic equation x² + px + q = 0, the sum of the roots is -p and the product is q. The discriminant is D = p² - 4q. The problem states that D must be positive, so the roots are real and distinct. Since p and q are divisible by 5, let me denote p = 5a and q = 5b where a and b are integers. Then the discriminant becomes D = (5a)² - 4*(5b) = 25a² - 20b. Since D must be positive, 25a² > 20b, or 5a² > 4b. Not sure if I need that yet.

The key part is to find the sum of the hundredth powers of the roots. Let me denote the roots as r₁ and r₂. Then we need to compute S₁₀₀ = r₁¹⁰⁰ + r₂¹⁰⁰. The problem says that this S₁₀₀ must be divisible by 5ⁿ for all such

In [None]:
def get_reasoning_trace_stream(problem, model="qwen-qwq-32b", timeout=300):  # 5 minutes default timeout
    """Get a reasoning trace for a single problem using Groq Flex API"""
    
    try:
        out = client.chat.completions.create(
            messages=[
                {"role": "user", "content": problem}
            ],
            model=model,
            temperature=0.8,
            top_p=0.95,
            max_completion_tokens=32767,
            # stop=None,
            stream=False,  # This enables streaming
            timeout=timeout,
        )

        return out.choices[0].message.content

    except Exception as e:
        print(f"Error making API request: {str(e)}")
        return None


In [37]:
def get_reasoning_trace(problem, model="qwen-qwq-32b", timeout=300):  # 5 minutes default timeout
    """Get a reasoning trace for a single problem using Groq Flex API"""
    
    try:
        out = client.chat.completions.create(
            messages=[
                {"role": "user", "content": problem}
            ],
            model=model,
            temperature=0.8,
            top_p=0.95,
            max_completion_tokens=32767,
            # stop=None,
            stream=False,  # This enables streaming
            timeout=timeout,
        )

        return out.choices[0].message.content

    except Exception as e:
        print(f"Error making API request: {str(e)}")
        return None


In [38]:
def get_reasoning_trace_request(problem, model="qwen-qwq-32b", timeout=300):  # 5 minutes default timeout
    """Get a reasoning trace for a single problem using Groq Flex API"""
    
    try:
        response = requests.post(
            "https://api.groq.com/openai/v1/chat/completions",
            headers={
                "Content-Type": "application/json",
                "Authorization": f"Bearer {os.getenv('GROQ_API_KEY')}"
            },
            json={
                "service_tier": "on_demand",
                "model": model,
                "messages": [
                    {"role": "user", "content": problem}
                ],
                "temperature": 0.8,  # Increased temperature for more diverse reasoning paths
                "top_p": 0.95,
                "max_tokens": 32767
            },
            timeout=timeout  # Add timeout parameter here (in seconds)
        )
        
        if response.status_code != 200:
            print(f"Error: API returned status code {response.status_code}")
            print(f"Response: {response.text}")
            return None
            
        return response.json()
    except Exception as e:
        print(f"Error making API request: {str(e)}")
        return None

In [39]:
ORIG_SYSTEM_PROMPT = "Let's think step by step and output the final answer within \\boxed{}."
NEW_SYSTEM_PROMPT = 'Plan your answer. Check your answer against everything in the question and if it is not correct, go back to your last checked answer. Think step by step and output the final answer within \\boxed{}.'

tweak_dataset = [
    {
        'problem': r['problem'],
        'answer': str(r['answer']),
    }
    for index, r in raw_data.reset_index().iterrows()
]

for r in tweak_dataset:
    if ORIG_SYSTEM_PROMPT in r['problem']:
        r['problem'] = r['problem'].replace(ORIG_SYSTEM_PROMPT, NEW_SYSTEM_PROMPT)

# len(tweak_dataset)
# tweak_dataset[:3]

In [63]:
tweak_dataset[0]['problem']

{'problem': '\nConsider all possible quadratic polynomials $x^2 + px + q$ with a positive discriminant, where the coefficients $p$ and $q$ are integers divisible by 5. Find the largest natural number $n$ such that for any polynomial with the described properties, the sum of the hundredth powers of the roots is an integer divisible by $5^n$. Plan your answer. Check your answer against everything in the question and if it is not correct, go back to your last checked answer. Think step by step and output the final answer within \\boxed{}.',
 'answer': '50',
 'teacher_trace': None}

In [42]:
limit = 1
tweak_dataset = tweak_dataset[:limit]

In [43]:
for i, r in enumerate(tweak_dataset):
    print(f"{i}: {r['problem']}")
    try:
        r['teacher_trace'] = get_reasoning_trace(r['problem'], model="qwen-qwq-32b", timeout=600)
    except Exception as e:
        print(f"Error making API request: {str(e)}")
        r['teacher_trace'] = None

tweak_dataset

0: 
Consider all possible quadratic polynomials $x^2 + px + q$ with a positive discriminant, where the coefficients $p$ and $q$ are integers divisible by 5. Find the largest natural number $n$ such that for any polynomial with the described properties, the sum of the hundredth powers of the roots is an integer divisible by $5^n$. Plan your answer. Check your answer against everything in the question and if it is not correct, go back to your last checked answer. Think step by step and output the final answer within \boxed{}.
Error making API request: upstream request timeout


[{'problem': '\nConsider all possible quadratic polynomials $x^2 + px + q$ with a positive discriminant, where the coefficients $p$ and $q$ are integers divisible by 5. Find the largest natural number $n$ such that for any polynomial with the described properties, the sum of the hundredth powers of the roots is an integer divisible by $5^n$. Plan your answer. Check your answer against everything in the question and if it is not correct, go back to your last checked answer. Think step by step and output the final answer within \\boxed{}.',
  'answer': '50',
  'teacher_trace': None}]

In [None]:
for i, r in enumerate(tweak_dataset):
    if tweak_dataset[i]['teacher_trace'] is None:
        print(f"{i} skipped")
        continue
    r['extracted_solution'] = str(extract_solution(tweak_dataset[i]['teacher_trace']))
    r['reward'] = correctness_reward_func(tweak_dataset[i]['teacher_trace'], tweak_dataset[i]['answer'])
    print(f"{i} Gold answer {tweak_dataset[i]['answer']} extracted answer {r['extracted_solution']} reward {r['reward']}")

In [None]:
correct = [r for r in tweak_dataset if r.get('reward', None) == 1.0]
len(correct)

In [None]:
# Create directory if it doesn't exist
dirname = os.path.dirname(OUTPUT_FILE)
if len(dirname.strip()) > 0:
    os.makedirs(dirname, exist_ok=True)

# Save to JSONL file
with open(OUTPUT_FILE, 'w') as f:
    for item in correct:
        f.write(json.dumps(item) + '\n')

print(f"Saved {len(correct)} records to {OUTPUT_FILE}")