In [1]:
OUTPUT_FILE = "top_87_teacher_finetune_tweak_dataset.jsonl"

In [2]:
import os
import pandas as pd
import glob
import json
import re
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
from dotenv import load_dotenv
import requests

In [3]:
# Load environment variables from .env file
load_dotenv()

True

## Load Questions similar to target question

In [4]:
def load_all_jsonl_to_dataframe(file_path):
    """Load all JSONL files from a folder into a pandas DataFrame."""
    
    try:
        # Read file into dataframe
        df = pd.read_json(file_path, lines=True)
        # df['source_file'] = os.path.basename(file_path)  # Optional: track source file
        print(f"Loaded {len(df)} rows from {os.path.basename(file_path)}")
        return df
    except Exception as e:
        print(f"Error loading {file_path}: {e}")

# Usage
file_path = "../top_87_ripe_0_similar/ripe-0-similar-ripe-epochs-30/grpo.jsonl"
raw_data = load_all_jsonl_to_dataframe(file_path)

Loaded 16 rows from grpo.jsonl


## Display sample data

In [5]:
# Display the first few rows
raw_data.head(100)

Unnamed: 0,problem,answer
0,A list of five positive integers has all of th...,15
1,A list of five positive integers has mean $12$...,6
2,A list of integers has mode 32 and mean 22. Th...,20
3,A list of seven positive integers has a median...,52
4,A list of seven positive integers has a median...,87
5,"A repunit is a positive integer, all of whose ...",1223456
6,Carolyn and Paul are playing a game starting w...,12
7,"Daniel writes over a board, from top to down, ...",10
8,Find the largest positive integer $k{}$ for wh...,2
9,"Let $(b_1, b_2, ... b_{12})$ be a list of the ...",2048


In [6]:
print(raw_data.iloc[0]['problem'])

A list of five positive integers has all of the following properties:

$\bullet$  The only integer in the list that occurs more than once is $8,$

$\bullet$  its median is $9,$ and

$\bullet$  its average (mean) is $10.$

What is the largest possible integer that could appear in the list? Let's think step by step and output the final answer within \boxed{}.


## Solution Extraction Utilities

Functions to extract solutions from model outputs and compute correctness.

In [7]:
def remove_boxed(s):
    if s is None:
        return None

    left = "\\boxed{"

    if s[:len(left)] != left:
        return None
    if s[-1] != "}":
        return None

    return s[len(left):-1]


def last_boxed_only_string(string):
    idx = string.rfind("\\boxed")
    if "\\boxed " in string:
        return "\\boxed " + string.split("\\boxed ")[-1].split("$")[0]
    if idx < 0:
        idx = string.rfind("\\fbox")
        if idx < 0:
            return None

    i = idx
    right_brace_idx = None
    num_left_braces_open = 0
    while i < len(string):
        if string[i] == "{":
            num_left_braces_open += 1
        if string[i] == "}":
            num_left_braces_open -= 1
            if num_left_braces_open == 0:
                right_brace_idx = i
                break
        i += 1

    if right_brace_idx is None:
        retval = None
    else:
        retval = string[idx:right_brace_idx + 1]

    return retval

def extract_solution(text):
    return remove_boxed(last_boxed_only_string(text))

def correctness_reward_func(response: str, actual_answers: str) -> float:
    extracted_answer = extract_solution(response)
    return 1.0 if extracted_answer == actual_answers else 0.0

## Get Teacher Reasoning Traces

## Example

In [8]:
# Simply change the API Key to get started

from openai import OpenAI

client = OpenAI(
    api_key=os.environ['BASETEN_API_KEY'],
    base_url="https://model-rwn19e03.api.baseten.co/environments/production/sync/v1",
    timeout=600.0
)

In [9]:
def get_reasoning_trace(problem):  # 5 minutes default timeout
    
    # try:
    response = client.chat.completions.create(
        model="placeholder",
        messages=[{"role":"user","content":problem}],
        max_tokens=24576
    )
    
    return response.choices[0].message.content
        
    # except Exception as e:
    #     print(f"Error making API request: {str(e)}")
    #     return None


# get_reasoning_trace_baseten("Tell me everything you know about optimized inference.")

In [10]:
# import os
# from azure.ai.inference import ChatCompletionsClient
# from azure.ai.inference.models import SystemMessage, UserMessage
# from azure.core.credentials import AzureKeyCredential

In [11]:
# def get_reasoning_trace(problem):  # 5 minutes default timeout
#     """Get a reasoning trace for a single problem using Groq Flex API"""
    
#     try:
#         endpoint = "https://ai-xiiiiiiiiii4559ai500267034787.services.ai.azure.com/models"
#         model_name = "DeepSeek-R1"
        
#         client = ChatCompletionsClient(
#             endpoint=endpoint,
#             credential=AzureKeyCredential(os.getenv('AZURRE_API_KEY')),
#         )
        
#         response = client.complete(
#             messages=[
#                 UserMessage(content=problem)
#             ],
#             max_tokens=24576,
#             model=model_name,
#             timeout=600  # 10 minutes in seconds
#         )
        
#         # print(response.choices[0].message.content)
#         # return out
        
#         return response.choices[0].message.content

#     except Exception as e:
#         print(f"Error making API request: {str(e)}")
#         return None


In [12]:
# def get_reasoning_trace_request(problem, model="qwen-qwq-32b", timeout=300):  # 5 minutes default timeout
#     """Get a reasoning trace for a single problem using Groq Flex API"""
    
#     try:
#         response = requests.post(
#             "https://api.groq.com/openai/v1/chat/completions",
#             headers={
#                 "Content-Type": "application/json",
#                 "Authorization": f"Bearer {os.getenv('GROQ_API_KEY')}"
#             },
#             json={
#                 "service_tier": "on_demand",
#                 "model": model,
#                 "messages": [
#                     {"role": "user", "content": problem}
#                 ],
#                 "temperature": 0.8,  # Increased temperature for more diverse reasoning paths
#                 "top_p": 0.95,
#                 "max_tokens": 24576
#             },
#             timeout=timeout  # Add timeout parameter here (in seconds)
#         )
        
#         if response.status_code != 200:
#             print(f"Error: API returned status code {response.status_code}")
#             print(f"Response: {response.text}")
#             return None
            
#         return response.json()
#     except Exception as e:
#         print(f"Error making API request: {str(e)}")
#         return None

In [13]:
ORIG_SYSTEM_PROMPT = "Let's think step by step and output the final answer within \\boxed{}."
NEW_SYSTEM_PROMPT = 'Plan your answer. Check your answer against everything in the question and if it is not correct, go back to your last checked answer. Think step by step and output the final answer within \\boxed{}.'

tweak_dataset = [
    {
        'problem': r['problem'],
        'answer': str(r['answer']),
    }
    for index, r in raw_data.reset_index().iterrows()
]

for r in tweak_dataset:
    if ORIG_SYSTEM_PROMPT in r['problem']:
        r['problem'] = r['problem'].replace(ORIG_SYSTEM_PROMPT, NEW_SYSTEM_PROMPT)

# len(tweak_dataset)
# tweak_dataset[:3]

In [14]:
print(tweak_dataset[0]['problem'])

A list of five positive integers has all of the following properties:

$\bullet$  The only integer in the list that occurs more than once is $8,$

$\bullet$  its median is $9,$ and

$\bullet$  its average (mean) is $10.$

What is the largest possible integer that could appear in the list? Plan your answer. Check your answer against everything in the question and if it is not correct, go back to your last checked answer. Think step by step and output the final answer within \boxed{}.


In [15]:
tweak_dataset[0]['answer']

'15'

In [16]:
# response = get_reasoning_trace(tweak_dataset[0]['problem'])
# print(f"Extracted solution: {str(extract_solution(response))}")
# print("Full response: ")
# print(response)

## Get All Teacher Reasoning Traces

In [17]:
# Debug
# limit = 1
# tweak_dataset = tweak_dataset[:limit]

In [18]:
len(tweak_dataset)

16

In [19]:
for i, r in enumerate(tweak_dataset):
    print(f"{i}: {r['problem']}")
    try:
        if r.get('reward', 0.0) != 1.0:
            # print(f"{i}: {r.get('reward', 0.0)}")
            r['teacher_trace'] = get_reasoning_trace(r['problem'])
    except Exception as e:
        print(f"Error making API request: {str(e)}")
        # r['teacher_trace'] = None

# tweak_dataset

0: A list of five positive integers has all of the following properties:

$\bullet$  The only integer in the list that occurs more than once is $8,$

$\bullet$  its median is $9,$ and

$\bullet$  its average (mean) is $10.$

What is the largest possible integer that could appear in the list? Plan your answer. Check your answer against everything in the question and if it is not correct, go back to your last checked answer. Think step by step and output the final answer within \boxed{}.
1: A list of five positive integers has mean $12$ and range $18$. The mode and median are both $8$. How many different values are possible for the second largest element of the list? Plan your answer. Check your answer against everything in the question and if it is not correct, go back to your last checked answer. Think step by step and output the final answer within \boxed{}.
2: A list of integers has mode 32 and mean 22. The smallest number in the list is 10. The median  m of the list is a member of t

In [20]:
for i, r in enumerate(tweak_dataset):
    if tweak_dataset[i].get('teacher_trace', None) is None:
        print(f"{i} skipped")
        continue
    r['extracted_solution'] = str(extract_solution(tweak_dataset[i]['teacher_trace']))
    r['reward'] = correctness_reward_func(tweak_dataset[i]['teacher_trace'], tweak_dataset[i]['answer'])
    print(f"{i} Gold answer {tweak_dataset[i]['answer']} extracted answer {r['extracted_solution']} reward {r['reward']}")

0 Gold answer 15 extracted answer 15 reward 1.0
1 Gold answer 6 extracted answer 6 reward 1.0
2 Gold answer 20 extracted answer 20 reward 1.0
3 Gold answer 52 extracted answer 55 reward 0.0
4 Gold answer 87 extracted answer 87 reward 1.0
5 Gold answer 1223456 extracted answer 1223456 reward 1.0
6 Gold answer 12 extracted answer 10 reward 0.0
7 Gold answer 10 extracted answer None reward 0.0
8 Gold answer 2 extracted answer 2 reward 1.0
9 Gold answer 2048 extracted answer 2048 reward 1.0
10 Gold answer 64 extracted answer 64 reward 1.0
11 Gold answer 83 extracted answer 83 reward 1.0
12 Gold answer \frac{206}{3} extracted answer \dfrac{206}{3} reward 0.0
13 Gold answer 16 extracted answer 28 reward 0.0
14 Gold answer 32 extracted answer 8 reward 0.0
15 Gold answer 1976.5 extracted answer 1976.5 reward 1.0


In [21]:
correct = [r for r in tweak_dataset if r.get('reward', None) == 1.0]
len(correct)

10

In [22]:
# Create directory if it doesn't exist
dirname = os.path.dirname(OUTPUT_FILE)
if len(dirname.strip()) > 0:
    os.makedirs(dirname, exist_ok=True)

# Save to JSONL file
with open(OUTPUT_FILE, 'w') as f:
    for item in correct:
        f.write(json.dumps(item) + '\n')

print(f"Saved {len(correct)} records to {OUTPUT_FILE}")

Saved 10 records to top_87_teacher_finetune_tweak_dataset.jsonl


In [26]:
print(correct[0]['problem'])

A list of five positive integers has all of the following properties:

$\bullet$  The only integer in the list that occurs more than once is $8,$

$\bullet$  its median is $9,$ and

$\bullet$  its average (mean) is $10.$

What is the largest possible integer that could appear in the list? Plan your answer. Check your answer against everything in the question and if it is not correct, go back to your last checked answer. Think step by step and output the final answer within \boxed{}.


In [25]:
print(correct[0]['teacher_trace'])

Okay, so I need to figure out the largest possible integer in this list of five positive integers given the constraints. Let me start by breaking down the problem and see how each condition affects the possibility.

First, the problem states:

- The only integer in the list that occurs more than once is 8. So, 8 is repeated at least twice, and all other numbers are unique.

- The median is 9. Since there are five numbers, the third number when arranged in order must be 9.

- The average is 10. Therefore, the total sum of all five numbers must be 50 (since 5 times 10 is 50).

Our goal is to maximize the largest number in the list. To do that, I should try to minimize the other numbers as much as possible, while still adhering to the constraints.

Let me structure the list in order: let's call the list [a, b, c, d, e]. Since the median is 9, the middle term c must be 9. So, c = 9.

Now, since the list is in non-decreasing order, a ≤ b ≤ 9 ≤ d ≤ e.

Also, the only repeated number is 8. Th

In [28]:
a_problem = tweak_dataset[0]['problem']
# print(a_problem)
response = get_reasoning_trace(a_problem)
print(f"Extracted solution: {str(extract_solution(response))}")
print("Full response: ")
print(response)

Extracted solution: 15
Full response: 
Okay, so there's this problem here about a list of five positive integers, right? And it has some specific conditions. Let me see if I can figure this out step by step. 

First, let's list out the requirements again so I don't miss anything:

1. The only number that occurs more than once is 8. So that means all the other numbers are unique, and 8 is the mode? Hmm. So in the five numbers, 8 must appear at least twice, right?

2. The median is 9. Since it's a list of five numbers, the middle one when arranged in order must be 9. So the third number is 9.

3. The average is 10. Since the average is 10 for five numbers, the total sum of the numbers has to be 50. Because 5 times 10 is 50.

And the question is asking for the largest possible integer that could be in this list. So we need to maximize one of the numbers, but still meet all those conditions. 

Alright, let's break this down.

Since the median is 9, the list arranged in non-decreasing order