# Omni-Math Preprocessing

## Filter for non-proof problems.

In [1]:
import json
with open('../train/omni_math.json', 'r') as f:
    omni = json.load(f)
len(omni)

4422

In [2]:
# Filter for non-proof problems.
from copy import deepcopy
from rllm.utils import call_gemini_llm
from rllm.system_prompts import FILTER_PROOF_PROMPT
from tqdm import tqdm
from concurrent.futures import ProcessPoolExecutor, as_completed
from pprint import pprint

def filter_proofs(idx, entry):
    # 1) Get the problem text
    problem_text = entry['problem']
    solution_text = entry['answer']
    # 2) Call Gemini LLM
    output_str = call_gemini_llm(f'Problem: {problem_text} \n\n Answer: {solution_text}', 
                                 system_prompt=FILTER_PROOF_PROMPT, temperature=0.8, n=4)
    if not output_str:
        return idx, entry
    for output in output_str:
        if '[[1]]' in output:
            return idx, entry
    pprint(problem_text)
    pprint(solution_text)
    pprint(output_str[0])
    return idx, {}

data = deepcopy(omni)

with ProcessPoolExecutor(max_workers=32) as executor:
    # 1) Submit all jobs to the executor
    futures = [executor.submit(filter_proofs, f_idx, entry) for f_idx, entry in enumerate(data)]

# 2) Process them as they complete, using tqdm for a progress bar
for future in tqdm(as_completed(futures), total=len(futures), desc="Processing entries"):
    # Get the result for each completed future
    idx, result = future.result()
    data[idx] = result
data = [d for d in data if d]
# Save final list as json
with open("omni_math.json", "w") as f:
    json.dump(data, f, indent=2)

  from .autonotebook import tqdm as notebook_tqdm


('A physicist encounters $2015$ atoms called usamons. Each usamon either has '
 "one electron or zero electrons, and the physicist can't tell the "
 "difference.  The physicist's only tool is a diode. The physicist may connect "
 'the diode from any usamon $A$ to any other usamon $B$. (This connection is '
 'directed.) When she does so, if usamon $A$ has an electron and usamon $B$ '
 'does not, then the electron jumps from $A$ to $B$. In any other case, '
 'nothing happens. In addition, the physicist cannot tell whether an electron '
 "jumps during any given step.  The physicist's goal is to isolate two usamons "
 'that she is  sure are currently in the same state. Is there any series of '
 'diode usage that makes this possible?')
'\\text{No}'
('The problem asks whether there exists a series of diode usage that allows '
 'the physicist to isolate two usamons in the same state. The answer provided '
 'is "No." This indicates that the problem is asking for a proof to a yes/no '
 'questio

Processing entries: 100%|██████████| 4422/4422 [00:00<00:00, 150473.49it/s]


In [5]:
len(data)

4158