In [1]:
# Extract AMC Data from numina
from concurrent.futures import as_completed
import json
from pprint import pprint
from tqdm import tqdm

from datasets import load_dataset
from rllm.grading.grader import extract_boxed_answer


ds = load_dataset("AI-MO/NuminaMath-CoT")
# Filter for amc_aime problems
olympiad = ds['train'].filter(lambda x: x['source'] == 'olympiads')

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# Filter for non-proof problems.
import concurrent.futures
from rllm.utils import call_gemini_llm
from rllm.system_prompts import PROOF_PROMPT

def process_entry(entry):
    output_dict = {}
    # 1) Get the problem text
    problem_text = entry['problem']
    solution_text = entry['solution']
    # 2) Call Gemini LLM
    output_str = call_gemini_llm(f'Problem: {problem_text} \n\n Solution: {solution_text}', system_prompt=PROOF_PROMPT)
    if not output_str:
        print("Gemini not happy.")
        return None
    if '2' in output_str:
        output_dict['problem'] = entry['problem']
        output_dict['solution'] = entry['solution']
        return {
            'problem': entry['problem'],
            'solution': entry['solution'],
        }
    else:
        if '3' in output_str:
            print(output_str, entry['problem'])
    return output_dict

# Suppose `olympiad` is your list of dictionaries
subset = olympiad  
results = []

with concurrent.futures.ProcessPoolExecutor(max_workers=48) as executor:
    # 1) Submit all jobs to the executor
    futures = [executor.submit(process_entry, entry) for entry in subset]

# 2) Process them as they complete, using tqdm for a progress bar
for future in tqdm(as_completed(futures), total=len(futures), desc="Processing entries"):
    # Get the result for each completed future
    result = future.result()
    if result:
        results.append(result)


# Save final list as json
with open("olympiad_processed.json", "w") as f:
    json.dump(results, f, indent=2)

3
 You see a square resembling a chessboard that is divided into 4 parts. Can you arrange these parts in such a way that the new figure has one less cell, i.e., 63 cells?
3
 How to easily and accurately remember the formulas for the sine and cosine of angles:

$$
\frac{\pi}{2} \pm \alpha ; \pi \pm \alpha ; \frac{3 \pi}{2} \pm \alpha, 2 \pi - \alpha ?
$$
3
 In one glass, there was milk, and in another - the same amount of coffee. A spoonful was transferred from the glass of milk to the glass of coffee and mixed. Then, the same spoonful of the mixture was transferred back to the glass with milk. What is there more of now: coffee in the glass with milk or milk in the glass with coffee?
3
 Cut a triangle into two parts, which can be reassembled into a 20-sided polygon.
Retry due to rate limit:  429 Resource exhausted. Please try again later. Please refer to https://cloud.google.com/vertex-ai/generative-ai/docs/error-code-429 for more details.
Retry due to rate limit:  429 Resource exhauste

Processing entries: 100%|██████████| 102270/102270 [00:00<00:00, 104646.63it/s]


In [1]:
from rllm.utils import RAG
import json

with open("numina_olympiad.json", 'r', encoding='utf-8') as f:
    olympiad_data = json.load(f)

with open("../raw/train/omni_math.json", 'r', encoding='utf-8') as f:
    omni_data = json.load(f)

rag_searcher = RAG(docs=[d["problem"] for d in omni_data])

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Filter for olympiad problems that are not in the omni dataset
from tqdm import tqdm

filter_olympiad = []
num_problems = 0

counter = 0
# Wrap olympiad_data with tqdm, optionally adding a description and total
for d in tqdm(olympiad_data, desc="Filtering olympiad data", total=len(olympiad_data)):
    search_result = rag_searcher.top_k(d["problem"], k=1)[0]
    score = search_result["score"]
    if score > 0.93:
        num_problems += 1
    else:
        filter_olympiad.append(d)
    counter += 1
    if counter %1000 == 0:
        print(counter)
# Save final list as json
with open("olympiad_filter.json", "w") as f:
    json.dump(filter_olympiad, f, indent=2)
num_problems

Filtering olympiad data:   1%|          | 1016/102125 [00:07<12:49, 131.44it/s]

1000


Filtering olympiad data:   2%|▏         | 2021/102125 [00:15<12:46, 130.56it/s]

2000


Filtering olympiad data:   3%|▎         | 3015/102125 [00:23<12:37, 130.76it/s]

3000


Filtering olympiad data:   4%|▍         | 4023/102125 [00:30<12:26, 131.48it/s]

4000


Filtering olympiad data:   5%|▍         | 5017/102125 [00:38<12:16, 131.78it/s]

5000


Filtering olympiad data:   6%|▌         | 6025/102125 [00:46<12:07, 132.02it/s]

6000


Filtering olympiad data:   7%|▋         | 7019/102125 [00:53<11:59, 132.16it/s]

7000


Filtering olympiad data:   8%|▊         | 8027/102125 [01:01<11:46, 133.16it/s]

8000


Filtering olympiad data:   9%|▉         | 9021/102125 [01:08<11:41, 132.69it/s]

9000


Filtering olympiad data:  10%|▉         | 10015/102125 [01:16<11:33, 132.81it/s]

10000


Filtering olympiad data:  11%|█         | 11023/102125 [01:24<11:25, 132.84it/s]

11000


Filtering olympiad data:  12%|█▏        | 12017/102125 [01:31<11:19, 132.60it/s]

12000


Filtering olympiad data:  13%|█▎        | 13025/102125 [01:39<11:14, 132.12it/s]

13000


Filtering olympiad data:  14%|█▎        | 14019/102125 [01:46<11:07, 132.05it/s]

14000


Filtering olympiad data:  15%|█▍        | 15013/102125 [01:54<11:00, 131.93it/s]

15000


Filtering olympiad data:  16%|█▌        | 16021/102125 [02:01<10:51, 132.10it/s]

16000


Filtering olympiad data:  17%|█▋        | 17015/102125 [02:09<10:42, 132.38it/s]

17000


Filtering olympiad data:  18%|█▊        | 18023/102125 [02:17<10:38, 131.76it/s]

18000


Filtering olympiad data:  19%|█▊        | 19017/102125 [02:24<10:28, 132.30it/s]

19000


Filtering olympiad data:  20%|█▉        | 20025/102125 [02:32<10:24, 131.38it/s]

20000


Filtering olympiad data:  21%|██        | 21019/102125 [02:39<10:13, 132.11it/s]

21000


Filtering olympiad data:  22%|██▏       | 22013/102125 [02:47<10:06, 132.10it/s]

22000


Filtering olympiad data:  23%|██▎       | 23021/102125 [02:55<09:59, 131.91it/s]

23000


Filtering olympiad data:  24%|██▎       | 24015/102125 [03:02<09:51, 132.12it/s]

24000


Filtering olympiad data:  25%|██▍       | 25023/102125 [03:10<09:43, 132.22it/s]

25000


Filtering olympiad data:  25%|██▌       | 26017/102125 [03:17<09:37, 131.78it/s]

26000


Filtering olympiad data:  26%|██▋       | 27025/102125 [03:25<09:29, 131.80it/s]

27000


Filtering olympiad data:  27%|██▋       | 28019/102125 [03:32<09:19, 132.56it/s]

28000


Filtering olympiad data:  28%|██▊       | 29027/102125 [03:40<09:07, 133.61it/s]

29000


Filtering olympiad data:  29%|██▉       | 30021/102125 [03:48<09:08, 131.48it/s]

30000


Filtering olympiad data:  30%|███       | 31015/102125 [03:55<09:00, 131.59it/s]

31000


Filtering olympiad data:  31%|███▏      | 32023/102125 [04:03<08:50, 132.27it/s]

32000


Filtering olympiad data:  32%|███▏      | 33017/102125 [04:10<08:42, 132.27it/s]

33000


Filtering olympiad data:  33%|███▎      | 34025/102125 [04:18<08:36, 131.95it/s]

34000


Filtering olympiad data:  34%|███▍      | 35019/102125 [04:25<08:26, 132.51it/s]

35000


Filtering olympiad data:  35%|███▌      | 36013/102125 [04:33<08:19, 132.47it/s]

36000


Filtering olympiad data:  36%|███▋      | 37021/102125 [04:41<08:11, 132.45it/s]

37000


Filtering olympiad data:  37%|███▋      | 38015/102125 [04:48<08:08, 131.15it/s]

38000


Filtering olympiad data:  38%|███▊      | 39023/102125 [04:56<07:57, 132.08it/s]

39000


Filtering olympiad data:  39%|███▉      | 40017/102125 [05:03<07:48, 132.60it/s]

40000


Filtering olympiad data:  40%|████      | 41025/102125 [05:11<07:44, 131.52it/s]

41000


Filtering olympiad data:  41%|████      | 42018/102125 [05:18<08:36, 116.48it/s]

42000


Filtering olympiad data:  42%|████▏     | 43026/102125 [05:26<07:29, 131.38it/s]

43000


Filtering olympiad data:  43%|████▎     | 44020/102125 [05:34<07:19, 132.30it/s]

44000


Filtering olympiad data:  44%|████▍     | 45014/102125 [05:41<07:11, 132.23it/s]

45000


Filtering olympiad data:  45%|████▌     | 46022/102125 [05:49<07:02, 132.73it/s]

46000


Filtering olympiad data:  46%|████▌     | 47016/102125 [05:56<06:56, 132.21it/s]

47000


Filtering olympiad data:  47%|████▋     | 48024/102125 [06:04<06:48, 132.57it/s]

48000


Filtering olympiad data:  48%|████▊     | 49018/102125 [06:11<06:40, 132.57it/s]

49000


Filtering olympiad data:  49%|████▉     | 50026/102125 [06:19<06:33, 132.37it/s]

50000


Filtering olympiad data:  50%|████▉     | 51020/102125 [06:27<06:25, 132.47it/s]

51000


Filtering olympiad data:  51%|█████     | 52014/102125 [06:34<06:17, 132.67it/s]

52000


Filtering olympiad data:  52%|█████▏    | 53022/102125 [06:42<06:11, 132.13it/s]

53000


Filtering olympiad data:  53%|█████▎    | 54016/102125 [06:49<06:00, 133.35it/s]

54000


Filtering olympiad data:  54%|█████▍    | 55024/102125 [06:57<05:54, 132.78it/s]

55000


Filtering olympiad data:  55%|█████▍    | 56018/102125 [07:04<05:48, 132.48it/s]

56000


Filtering olympiad data:  56%|█████▌    | 57026/102125 [07:12<05:39, 132.88it/s]

57000


Filtering olympiad data:  57%|█████▋    | 58020/102125 [07:19<05:30, 133.55it/s]

58000


Filtering olympiad data:  58%|█████▊    | 59014/102125 [07:27<05:24, 132.95it/s]

59000


Filtering olympiad data:  59%|█████▉    | 60022/102125 [07:35<05:18, 132.22it/s]

60000


Filtering olympiad data:  60%|█████▉    | 61016/102125 [07:42<05:10, 132.35it/s]

61000


Filtering olympiad data:  61%|██████    | 62024/102125 [07:50<05:04, 131.74it/s]

62000


Filtering olympiad data:  62%|██████▏   | 63018/102125 [07:57<04:55, 132.40it/s]

63000


Filtering olympiad data:  63%|██████▎   | 64026/102125 [08:05<04:47, 132.71it/s]

64000


Filtering olympiad data:  64%|██████▎   | 65020/102125 [08:12<04:40, 132.33it/s]

65000


Filtering olympiad data:  65%|██████▍   | 66014/102125 [08:20<04:31, 132.97it/s]

66000


Filtering olympiad data:  66%|██████▌   | 67021/102125 [08:28<04:24, 132.94it/s]

67000


Filtering olympiad data:  67%|██████▋   | 68015/102125 [08:35<04:20, 130.81it/s]

68000


Filtering olympiad data:  68%|██████▊   | 69023/102125 [08:43<04:09, 132.66it/s]

69000


Filtering olympiad data:  69%|██████▊   | 70017/102125 [08:50<04:02, 132.46it/s]

70000


Filtering olympiad data:  70%|██████▉   | 71025/102125 [08:58<03:50, 134.84it/s]

71000


Filtering olympiad data:  71%|███████   | 72019/102125 [09:05<03:47, 132.09it/s]

72000


Filtering olympiad data:  71%|███████▏  | 73013/102125 [09:13<03:40, 131.85it/s]

73000


Filtering olympiad data:  72%|███████▏  | 74021/102125 [09:20<03:31, 132.70it/s]

74000


Filtering olympiad data:  73%|███████▎  | 75015/102125 [09:28<03:26, 131.53it/s]

75000


Filtering olympiad data:  74%|███████▍  | 76023/102125 [09:36<03:18, 131.47it/s]

76000


Filtering olympiad data:  75%|███████▌  | 77017/102125 [09:43<03:09, 132.65it/s]

77000


Filtering olympiad data:  76%|███████▋  | 78025/102125 [09:51<03:01, 132.65it/s]

78000


Filtering olympiad data:  77%|███████▋  | 79019/102125 [09:58<02:53, 132.80it/s]

79000


Filtering olympiad data:  78%|███████▊  | 80013/102125 [10:06<02:47, 132.22it/s]

80000


Filtering olympiad data:  79%|███████▉  | 81021/102125 [10:13<02:40, 131.63it/s]

81000


Filtering olympiad data:  80%|████████  | 82015/102125 [10:21<02:31, 132.79it/s]

82000


Filtering olympiad data:  81%|████████▏ | 83023/102125 [10:29<02:23, 132.75it/s]

83000


Filtering olympiad data:  82%|████████▏ | 84017/102125 [10:36<02:16, 132.29it/s]

84000


Filtering olympiad data:  83%|████████▎ | 85025/102125 [10:44<02:09, 132.54it/s]

85000


Filtering olympiad data:  84%|████████▍ | 86019/102125 [10:51<02:01, 132.41it/s]

86000


Filtering olympiad data:  85%|████████▌ | 87013/102125 [10:59<01:54, 132.03it/s]

87000


Filtering olympiad data:  86%|████████▌ | 88021/102125 [11:06<01:46, 132.97it/s]

88000


Filtering olympiad data:  87%|████████▋ | 89015/102125 [11:14<01:38, 132.54it/s]

89000


Filtering olympiad data:  88%|████████▊ | 90023/102125 [11:21<01:32, 130.94it/s]

90000


Filtering olympiad data:  89%|████████▉ | 91017/102125 [11:29<01:24, 132.20it/s]

91000


Filtering olympiad data:  90%|█████████ | 92025/102125 [11:37<01:16, 131.85it/s]

92000


Filtering olympiad data:  91%|█████████ | 93019/102125 [11:44<01:09, 131.74it/s]

93000


Filtering olympiad data:  92%|█████████▏| 94013/102125 [11:52<01:01, 131.98it/s]

94000


Filtering olympiad data:  93%|█████████▎| 95021/102125 [11:59<00:53, 132.43it/s]

95000


Filtering olympiad data:  94%|█████████▍| 96015/102125 [12:07<00:46, 130.87it/s]

96000


Filtering olympiad data:  95%|█████████▌| 97023/102125 [12:14<00:38, 132.18it/s]

97000


Filtering olympiad data:  96%|█████████▌| 98017/102125 [12:22<00:30, 132.71it/s]

98000


Filtering olympiad data:  97%|█████████▋| 99025/102125 [12:30<00:23, 130.72it/s]

99000


Filtering olympiad data:  98%|█████████▊| 100019/102125 [12:37<00:15, 132.58it/s]

100000


Filtering olympiad data:  99%|█████████▉| 101013/102125 [12:44<00:08, 132.34it/s]

101000


Filtering olympiad data: 100%|█████████▉| 102021/102125 [12:52<00:00, 132.25it/s]

102000


Filtering olympiad data: 100%|██████████| 102125/102125 [12:53<00:00, 132.05it/s]


2491

In [3]:
import json
# Process dataset to prpoduce answers
# with open("olympiad_filter.json", 'r', encoding='utf-8') as f:
#     olympiad = json.load(f)

with open("../raw/train/olympiad.json", 'r', encoding='utf-8') as f:
    olympiad = json.load(f)


In [None]:
# Have gemini add the solutions to the numina dataset.
# Filter for non-proof problems.
import concurrent.futures
from concurrent.futures import as_completed
from rllm.utils import call_gemini_llm
from rllm.system_prompts import SOLUTION_PROMPT

def get_answer(entry):
    # 1) Get the problem text
    problem_text = entry['problem']
    solution_text = entry['solution']
    # 2) Call Gemini LLM
    output_list = call_gemini_llm(f'Problem: {problem_text} \n----\n Solution: {solution_text}', system_prompt=SOLUTION_PROMPT, n=3)
    output_list = [o for o in output_list if 
                   'error' not in o and 'Error' not in o and 'Solution not found' not in o]
    if not output_list:
        print(problem_text)
        print(solution_text)
        return None

    for output_str in output_list:
        if 'answer' in entry:
            if output_str in entry['answer']:
                continue
            entry['answer'].append(output_str)
        else:
            entry['answer'] = [output_str]
    return entry

results = []
idx = 0
with concurrent.futures.ThreadPoolExecutor(max_workers=32) as executor:
    # 1) Submit all jobs to the executor
    futures = [executor.submit(get_answer, entry) for entry in olympiad]
    # 2) Process them as they complete, using tqdm for a progress bar
    for future in as_completed(futures):
        # Get the result for each completed future
        result = future.result()
        if result:
            results.append(result)
        if idx%1000 == 0:
            print(idx)
            with open("olympiad_solutions.json", "w") as f:
                json.dump(results, f, indent=2)
        idx += 1

# Save final list as json
with open("olympiad_solutions.json", "w") as f:
    json.dump(results, f, indent=2)

  from .autonotebook import tqdm as notebook_tqdm


0
1000
2000
3000
Retry due to rate limit:  429 Resource exhausted. Please try again later. Please refer to https://cloud.google.com/vertex-ai/generative-ai/docs/error-code-429 for more details.
Retry due to rate limit:  429 Resource exhausted. Please try again later. Please refer to https://cloud.google.com/vertex-ai/generative-ai/docs/error-code-429 for more details.
Retry due to rate limit:  429 Resource exhausted. Please try again later. Please refer to https://cloud.google.com/vertex-ai/generative-ai/docs/error-code-429 for more details.
4000
5000
There are 5 piers around a circular lake, each with a person, and one of them has a single-person boat. The people on neighboring piers are in a conflict and do not want to meet each other. How can each person move to the next pier in a clockwise direction, given that movement is only allowed on the lake?

1. **Problem Setup**:
    - We have 5 docks arranged in a circle around a lake.
    - At each dock, there is one person.
    - One doc