In [1]:
K_CLOSEST_MATCHES_RAW = 100
OUTPUT_FILE = "../mod_microskills/val_dataset.jsonl"
GOLDEN_ANSWER = 495
TOTAL_PROBLEMS_DESIRED = 30

In [2]:
import json
import os
import bm25s
from datasets import load_dataset, Dataset
import pandas as pd

In [3]:
def get_di_AIME_1983_2024_questions(split="train", num_samples=None) -> Dataset:
    data = load_dataset('di-zhang-fdu/AIME_1983_2024', split=split)
    
    if num_samples is not None:
        # Randomly sample the specified number of examples
        data = data.shuffle(seed=42).select(range(min(num_samples, len(data))))
    
    return data

In [4]:
data_raw = get_di_AIME_1983_2024_questions()
# data_raw = get_di_AIME_1983_2024_questions(num_samples=10) # Debug

In [5]:
data_1983_2023 = [r for r in data_raw if r['Year'] < 2024]

In [6]:
aime_2024_median_mode_question = [r for r in data_raw if r['Year'] == 2024 and r['Problem Number'] == 2][0]['Question']
print(aime_2024_median_mode_question)

A list of positive integers has the following properties: $\bullet$ The sum of the items in the list is $30$ . $\bullet$ The unique mode of the list is $9$ . $\bullet$ The median of the list is a positive integer that does not appear in the list itself. Find the sum of the squares of all the items in the list.


### Index Records

In [7]:
data = {i: {**record} for i, record in enumerate(data_1983_2023)}
data[0]

{'ID': '1983-1',
 'Year': 1983,
 'Problem Number': 1,
 'Question': 'Let $x$ , $y$ and $z$ all exceed $1$ and let $w$ be a positive number such that $\\log_xw=24$ , $\\log_y w = 40$ and $\\log_{xyz}w=12$ . Find $\\log_zw$ .',
 'Answer': '60',
 'Part': None}

### Toy BM25 Search

In [8]:
### Toy BM25 Search

# Create your corpus here
corpus = [
    "a cat is a feline and likes to purr",
    "a dog is the human's best friend and loves to play",
    "a bird is a beautiful animal that can fly",
    "a fish is a creature that lives in water and swims",
]

corpus_with_ids = [(idx, doc) for idx, doc in enumerate(corpus)]
[print(idx_and_doc) for idx_and_doc in corpus_with_ids]
corpus_ids = [idx for idx, doc in corpus_with_ids]

# Create the BM25 model and index the corpus
retriever = bm25s.BM25(corpus=corpus, method="robertson") # With original method from Robertson.
retriever.index(bm25s.tokenize(corpus))

# Query the corpus and get top-k results
query = "does the fish purr like a cat?"
results, scores = retriever.retrieve(bm25s.tokenize(query), k=2, corpus=corpus_ids)

# Let's see what we got!
print(f"\nquery: {query}")
for score, result in zip(scores[0], results[0]):
    print(f"{score:.4f}: {result}")

(0, 'a cat is a feline and likes to purr')
(1, "a dog is the human's best friend and loves to play")
(2, 'a bird is a beautiful animal that can fly')
(3, 'a fish is a creature that lives in water and swims')


Split strings:   0%|          | 0/4 [00:00<?, ?it/s]

BM25S Count Tokens:   0%|          | 0/4 [00:00<?, ?it/s]

BM25S Compute Scores:   0%|          | 0/4 [00:00<?, ?it/s]

Split strings:   0%|          | 0/1 [00:00<?, ?it/s]

BM25S Retrieve:   0%|          | 0/1 [00:00<?, ?it/s]


query: does the fish purr like a cat?
0.7449: 0
0.3389: 3


### Actual BM25 Search

In [9]:
# Initialize corpus with items from query,item pairs data as well as from random pool.

corpus_docs = [v['Question'] for k, v in data.items()]
corpus_ids = [k for k, v in data.items()]
# print(corpus_ids[0])
# print(corpus_docs[0])

# Create the BM25 model and index the corpus
retriever = bm25s.BM25(corpus=corpus_docs, method="robertson") # With original method from Robertson.
retriever.index(bm25s.tokenize(corpus_docs))

Split strings:   0%|          | 0/919 [00:00<?, ?it/s]

BM25S Count Tokens:   0%|          | 0/919 [00:00<?, ?it/s]

BM25S Compute Scores:   0%|          | 0/919 [00:00<?, ?it/s]

In [10]:
# Test
query = 'integer sum' # aime_2024_median_mode_question
# gold_answer = '236' # to filter out anything with same answer.
tokenized_query = bm25s.tokenize(query)

results, scores = retriever.retrieve(
    tokenized_query, k=(K_CLOSEST_MATCHES_RAW+100), corpus=corpus_ids, show_progress=False, # return_as="documents", 
)
results
# ' '.join(results[0])

# Let's see what we got!
print(f"\nquery: {query}\n")
i = 0
for score, result in zip(scores[0], results[0]):
    if i > 50:
        break
    else:
        i += 1
    print(f"{score:.4f}: {data[result]['Question']} (solution:{data[result]['Answer']}, year:{data[result]['Year']}, part:{data[result]['Part']}, problem number:{data[result]['Problem Number']})\n")

Split strings:   0%|          | 0/1 [00:00<?, ?it/s]


query: integer sum

2.2089: For any positive integer $a,$ $\sigma(a)$ denotes the sum of the positive integer divisors of $a$ . Let $n$ be the least positive integer such that $\sigma(a^n)-1$ is divisible by $2021$ for all positive integers $a$ . Find the sum of the prime factors in the prime factorization of $n$ . (solution:125, year:2021, part:I, problem number:14)

2.1416: What is the smallest positive integer than can be expressed as the sum of nine consecutive integers, the sum of ten consecutive integers, and the sum of eleven consecutive integers?  (solution:495, year:1993, part:None, problem number:6)

2.1049: What is the largest positive integer that is not the sum of a positive integral multiple of 42 and a positive composite integer? (solution:215, year:1995, part:None, problem number:10)

2.0730: For positive integer $n$ , let $s(n)$ denote the sum of the digits of $n$ .  Find the smallest positive integer satisfying $s(n) = s(n+864) = 20$ . (solution:695, year:2015, part:

### Selected Question

In [11]:
# (solution:495, year:1993, part:None, problem number:6)
aime_p = [r for r in data_raw if r['Year'] == 1993 and r['Problem Number'] == 6][0]

SYSTEM_PROMPT = "Let's think step by step and output the final answer within \\boxed{}."

aime_ps = [
    {
        'problem': f"{aime_p['Question']} {SYSTEM_PROMPT}",
        'answer': str(aime_p['Answer'])
    }
]
aime_ps

[{'problem': "What is the smallest positive integer than can be expressed as the sum of nine consecutive integers, the sum of ten consecutive integers, and the sum of eleven consecutive integers?  Let's think step by step and output the final answer within \\boxed{}.",
  'answer': '495'}]

### Extra similar problems with different numbers

In [12]:
import random
import math
import re

# Mapping for numbers between 2 and 50
number_words = {
    2: "two", 3: "three", 4: "four", 5: "five", 6: "six", 7: "seven",
    8: "eight", 9: "nine", 10: "ten", 11: "eleven", 12: "twelve",
    13: "thirteen", 14: "fourteen", 15: "fifteen", 16: "sixteen",
    17: "seventeen", 18: "eighteen", 19: "nineteen", 20: "twenty",
    21: "twenty-one", 22: "twenty-two", 23: "twenty-three", 24: "twenty-four",
    25: "twenty-five", 26: "twenty-six", 27: "twenty-seven", 28: "twenty-eight",
    29: "twenty-nine", 30: "thirty", 31: "thirty-one", 32: "thirty-two",
    33: "thirty-three", 34: "thirty-four", 35: "thirty-five", 36: "thirty-six",
    37: "thirty-seven", 38: "thirty-eight", 39: "thirty-nine", 40: "forty",
    41: "forty-one", 42: "forty-two", 43: "forty-three", 44: "forty-four",
    45: "forty-five", 46: "forty-six", 47: "forty-seven", 48: "forty-eight",
    49: "forty-nine", 50: "fifty"
}

# Inverse mapping from word to int
word_to_int = {v: k for k, v in number_words.items()}

def generate_problem_consecutive_sums(k_values=None, num_conditions=None, min_k=2, max_k=50, max_attempts=1000):
    """
    Generates a problem of the form:
    
      "What is the smallest positive integer that can be expressed as the sum of 
       k1 consecutive integers, the sum of k2 consecutive integers, ..., and 
       the sum of kn consecutive integers?"
       
    For each k, an integer n can be expressed as the sum of k consecutive integers
      n = k*a + k*(k-1)//2    for some integer a,
    which is equivalent to:
      n ≡ (k*(k-1)//2) (mod k).
    
    If k_values is not provided, then the function randomly chooses a number
    of conditions (if num_conditions is not provided, it randomly picks 2 or 3) and then
    randomly samples distinct k values in the interval [min_k, max_k] (sorted in ascending order).
    
    Because some sets of k values have contradictory congruences (and hence no solution exists),
    the function will try up to max_attempts times to generate a set that yields a solution.
    
    Returns:
      tuple: (problem_statement, solution)
    """
    attempt = 0
    while attempt < max_attempts:
        attempt += 1
        # If not provided, choose a random set of k values.
        if k_values is None:
            if num_conditions is None:
                num_conditions = random.choice([2, 3])
            available = list(range(min_k, max_k+1))
            k_values_candidate = sorted(random.sample(available, num_conditions))
        else:
            k_values_candidate = k_values
        
        # For each k, the condition is n ≡ (k*(k-1)//2) (mod k)
        congruences = []
        for k in k_values_candidate:
            r = (k * (k - 1) // 2) % k
            congruences.append((r, k))
        
        # Build the problem statement string using number words.
        parts = []
        for k in k_values_candidate:
            k_word = number_words.get(k, str(k))
            parts.append(f"the sum of {k_word} consecutive integers")
        if len(parts) == 1:
            statement_body = parts[0]
        else:
            statement_body = ", ".join(parts[:-1]) + ", and " + parts[-1]
        problem_statement = f"What is the smallest positive integer that can be expressed as {statement_body}?"
        
        # A safe upper bound for the search is the product of the moduli.
        upper_bound = 1
        for (_, k) in congruences:
            upper_bound *= k

        solution = None
        for n in range(1, upper_bound + 1):
            if all(n % k == (r % k) for (r, k) in congruences):
                solution = n
                break
        if solution is not None:
            return problem_statement, solution

    raise Exception("Unable to find a valid problem after {} attempts.".format(max_attempts))

def test_generate_problem_consecutive_sums(num_tests=100):
    """
    Tests the generate_problem_consecutive_sums function using num_tests random cases.
    
    It extracts the k values from the problem statement using a regular expression 
    (which now matches written-out numbers) and verifies that the solution satisfies:
         n ≡ (k*(k-1)//2) (mod k)
    for every k.
    """
    # Regex to capture number words in the phrase "the sum of {word} consecutive integers"
    pattern = r"the sum of (\w+(?:-\w+)?) consecutive integers"
    for _ in range(num_tests):
        problem, solution = generate_problem_consecutive_sums()
        found = re.findall(pattern, problem)
        k_values = []
        for word in found:
            if word in word_to_int:
                k_values.append(word_to_int[word])
            else:
                k_values.append(int(word))
        for k in k_values:
            r = (k * (k - 1) // 2) % k
            assert solution % k == r, f"Test failed for problem: {problem}\nSolution: {solution}\nk: {k}, expected remainder {r}, got {solution % k}"
    print(f"All {num_tests} consecutive sum tests passed!")

# --- Example usage: Generate and print a few sample problems ---
for _ in range(10):
    problem, solution = generate_problem_consecutive_sums()
    print("Problem:", problem)
    print("Solution:", solution, "\n")

# --- Run tests ---
test_generate_problem_consecutive_sums(num_tests=1000)

# Add to list of problem generators (if one is used)
try:
    problem_gen_functions
except NameError:
    problem_gen_functions = []
problem_gen_functions.append(generate_problem_consecutive_sums)
print("Total number of problem generators:", len(problem_gen_functions))


Problem: What is the smallest positive integer that can be expressed as the sum of seven consecutive integers, and the sum of thirty-one consecutive integers?
Solution: 217 

Problem: What is the smallest positive integer that can be expressed as the sum of forty-one consecutive integers, and the sum of forty-eight consecutive integers?
Solution: 984 

Problem: What is the smallest positive integer that can be expressed as the sum of twenty consecutive integers, the sum of thirty-six consecutive integers, and the sum of thirty-seven consecutive integers?
Solution: 3330 

Problem: What is the smallest positive integer that can be expressed as the sum of eleven consecutive integers, the sum of seventeen consecutive integers, and the sum of thirty-three consecutive integers?
Solution: 561 

Problem: What is the smallest positive integer that can be expressed as the sum of sixteen consecutive integers, and the sum of forty-seven consecutive integers?
Solution: 376 

Problem: What is the sm

In [13]:
cnt_per_gen = int(TOTAL_PROBLEMS_DESIRED / len(problem_gen_functions)) + 1
print(f"len(problem_generators): {len(problem_gen_functions)}")
print(f"cnt_per_gen: {cnt_per_gen}")

tweak_dataset = [
    f()
    for i in range(cnt_per_gen)
    for f in problem_gen_functions
]

SYSTEM_PROMPT = "Let's think step by step and output the final answer within \\boxed{}."

tweak_dataset = [
    {
        'problem': f"{p[0]} {SYSTEM_PROMPT}",
        'answer': str(p[1])
    }
    for p in tweak_dataset
]

print(len(tweak_dataset))
# tweak_dataset

len(problem_generators): 1
cnt_per_gen: 31
31


In [14]:
tweak_dataset = [p for p in tweak_dataset if p['answer'] != GOLDEN_ANSWER]
len(tweak_dataset)

31

In [15]:
tweak_dataset = tweak_dataset + aime_ps
len(tweak_dataset)

32

In [16]:
# tweak_dataset

In [17]:
# Create directory if it doesn't exist
dirname = os.path.dirname(OUTPUT_FILE)
if len(dirname.strip()) > 0:
    os.makedirs(dirname, exist_ok=True)

# Save to JSONL file
with open(OUTPUT_FILE, 'w') as f:
    for item in tweak_dataset:
        f.write(json.dumps(item) + '\n')

print(f"Saved {len(tweak_dataset)} records to {OUTPUT_FILE}")

Saved 32 records to ../mod_microskills/val_dataset.jsonl
