In [1]:
import re
import numpy as np
import sys
from pathlib import Path

sys.path.append(str(Path("/home/yann/ssd_storage/python/arcprize2025/tests/")))
sys.path.append(str(Path("/home/yann/ssd_storage/python/arcprize2025/sources/")))

from test_dsl_symbolic_executor import TEST_CASES
from core.dsl_symbolic_interpreter import SYMBOL_RULES

In [2]:
ATOMIC_PATTERNS = []
CONDITIONALLY_ATOMIC_PATTERNS_CHECKERS = {}
NON_ATOMIC_PATTERNS = []

for rule_key, rule_definition in SYMBOL_RULES.items():
    sigil = rule_definition.get("sigil")
    pattern = rule_definition.get("pattern")
    nested_commands = rule_definition.get("nested_commands")

    if sigil and not pattern and not nested_commands:
        ATOMIC_PATTERNS.append(re.compile(rf"^{re.escape(sigil)}$"))
        continue

    if not pattern:
        continue

    try:
        compiled_pattern = re.compile(pattern)
    except re.error:
        continue

    if nested_commands is None or nested_commands == {}:
        ATOMIC_PATTERNS.append(compiled_pattern)
    elif rule_key in ["flip_h", "flip_v", "flatten_grid", "extract_bounding_box", "reverse_row"]:
        def make_checker(rule_key_inner):
            def checker(match):
                arg_content = match.group("arg_content") if match.lastindex and match.lastgroup == "arg_content" else None
                return arg_content is None or arg_content.strip() == "⌂"
            return checker
        CONDITIONALLY_ATOMIC_PATTERNS_CHECKERS[compiled_pattern] = make_checker(rule_key)
    else:
        NON_ATOMIC_PATTERNS.append(compiled_pattern)

block_builder_def = SYMBOL_RULES.get("block_grid_builder")
if block_builder_def and "pattern" in block_builder_def:
    try:
        ATOMIC_PATTERNS.append(re.compile(block_builder_def["pattern"]))
    except re.error:
        pass

In [3]:
def is_atomic_rule(rule_str: str) -> bool:
    rule_str = rule_str.strip()

    for non_atomic_pattern in NON_ATOMIC_PATTERNS:
        if non_atomic_pattern.match(rule_str):
            return False

    for atomic_pattern in ATOMIC_PATTERNS:
        if atomic_pattern.match(rule_str):
            return True

    for conditional_pattern, checker in CONDITIONALLY_ATOMIC_PATTERNS_CHECKERS.items():
        match = conditional_pattern.match(rule_str)
        if match and checker(match):
            return True

    return False

In [4]:
atomic_training_data = []

for rule_str, input_grid_np, _ in TEST_CASES:
    if not is_atomic_rule(rule_str):
        continue

    if input_grid_np is not None:
        input_grid_list = input_grid_np.tolist()
    else:
        if "▦(" in rule_str:
             input_grid_list = []
        else:
             continue

    atomic_training_data.append({
        "input_grid": input_grid_list,
        "dsl_rule": rule_str
    })

print(f"Found {len(atomic_training_data)} atomic training examples.")


Found 119 atomic training examples.


In [5]:
import numpy as np
import sys
from pathlib import Path

def execute_dsl_rule_on_grid(input_grid_list: list[list[int]], dsl_rule_str: str) -> np.ndarray:
    
    current_dir = Path.cwd()
    project_root_candidates = [
        current_dir,
        current_dir.parent,
        current_dir.parent.parent
    ]
    
    src_path = None
    for root_candidate in project_root_candidates:
        if (root_candidate / "sources").exists():
            src_path = root_candidate / "sources"
            break
            
    if src_path is None:
        raise FileNotFoundError("Could not find the 'sources' directory. Please ensure your working directory is correctly set relative to 'sources' or manually adjust the 'src_path' variable in this function.")

    if str(src_path) not in sys.path:
        sys.path.append(str(src_path))

    try:
        from core.dsl_symbolic_interpreter import SymbolicRuleParser
        from core.dsl_symbolic_executor import DSLExecutor
    except ImportError as e:
        raise ImportError(f"Failed to import DSL components. Ensure 'core/dsl_symbolic_interpreter.py' and 'core/dsl_symbolic_executor.py' exist in your '{src_path}' directory. Error: {e}")

    input_grid_np = np.array(input_grid_list, dtype=int)
    parser = SymbolicRuleParser()

    try:
        command = parser.parse_rule(dsl_rule_str)
        executor = DSLExecutor(
            root_command=command,
            initial_puzzle_input=input_grid_np,
        )
        result_grid_np = executor.execute_program()
        return result_grid_np
    except Exception as e:
        print(f"Error executing rule '{dsl_rule_str}' on input grid: {e}")
        return np.array([]) 

In [6]:
import numpy as np
import random

def mutate_flip_rule(item: dict, num_variants: int = 3, max_dim: int = 15, num_range: int = 10) -> list[dict]:
    mutated_items = []
    original_grid = np.array(item['input_grid'], dtype=int)
    original_rule = item['dsl_rule']

    for _ in range(num_variants - 1):
        new_grid_candidate = original_grid.copy()

        if random.random() < 0.7:
            new_rows = random.randint(1, max_dim + 1)
            new_cols = random.randint(1, max_dim + 1)
            new_values = np.random.choice(range(1, num_range + 1), size=new_rows * new_cols, replace=True)
            new_grid = new_values.reshape(new_rows, new_cols)
        else:
            while True:
                if original_grid.size > 0:
                    new_values = np.random.choice(range(1, num_range + 1), size=original_grid.size, replace=True)
                    temp_grid = new_values.reshape(original_grid.shape)
                else:
                    temp_grid = np.array([[random.randint(1, num_range)]])
                
                if not np.array_equal(temp_grid, original_grid):
                    new_grid = temp_grid
                    break
        
        if new_grid.size == 0:
            new_grid = np.array([[random.randint(1, num_range)]])

        mutated_items.append({'input_grid': new_grid.tolist(), 'dsl_rule': original_rule})

    return mutated_items

In [7]:
# 1. Sort the list by the first character of the 'dsl_rule'
atomic_training_data.sort(key=lambda item: item['dsl_rule'][0])

# 2. Group the sorted list by the first character of the 'dsl_rule'
from itertools import groupby
grouped_data = groupby(atomic_training_data, key=lambda item: item['dsl_rule'][0])
groups = {key: list(group) for key, group in grouped_data}

In [8]:
outputed_dataset = []

In [9]:
mutation_functions_map = {
    # '↢': lambda item, num_variants, **kwargs: []
    '↔': [mutate_flip_rule, 2],
    '↕': [mutate_flip_rule, 2],
}

for dsl_symbol, initial_rules_list in groups.items():
    mutate_function , num_variants_per_rule = mutation_functions_map.get(dsl_symbol, (None, None))

    if mutate_function is None:
        print(f"Warning: No mutation function defined for DSL symbol '{dsl_symbol}'. Skipping this group.")
        continue

    for rule_input_pair in initial_rules_list:
        mutated_rules: list = mutate_function(rule_input_pair, num_variants=num_variants_per_rule)
    
        rule_input_pair["output_grid"] = execute_dsl_rule_on_grid(
                input_grid_list=rule_input_pair['input_grid'],
                dsl_rule_str=rule_input_pair['dsl_rule']
            ).tolist()
        outputed_dataset.append(rule_input_pair)
        
        for mutated_pair in mutated_rules:
            mutated_pair["output_grid"] = execute_dsl_rule_on_grid(
                input_grid_list=mutated_pair['input_grid'],
                dsl_rule_str=mutated_pair['dsl_rule']
            ).tolist()
            outputed_dataset.append(mutated_pair)

2025-07-25 22:44:51,464 - core.dsl_symbolic_executor - INFO - Executor initialization started.
2025-07-25 22:44:51,467 - core.dsl_symbolic_executor - INFO - Executor initialization complete.
2025-07-25 22:44:51,468 - core.dsl_symbolic_executor - INFO - Starting DSL program execution.
2025-07-25 22:44:51,469 - core.dsl_symbolic_executor - INFO - DSL program execution completed successfully.
2025-07-25 22:44:51,470 - core.dsl_symbolic_executor - INFO - Executor initialization started.
2025-07-25 22:44:51,470 - core.dsl_symbolic_executor - INFO - Executor initialization complete.
2025-07-25 22:44:51,471 - core.dsl_symbolic_executor - INFO - Starting DSL program execution.
2025-07-25 22:44:51,471 - core.dsl_symbolic_executor - INFO - DSL program execution completed successfully.
2025-07-25 22:44:51,473 - core.dsl_symbolic_executor - INFO - Executor initialization started.
2025-07-25 22:44:51,474 - core.dsl_symbolic_executor - INFO - Executor initialization complete.
2025-07-25 22:44:51,474

2025-07-25 22:44:51,493 - core.dsl_symbolic_executor - INFO - Starting DSL program execution.
2025-07-25 22:44:51,494 - core.dsl_symbolic_executor - INFO - DSL program execution completed successfully.
2025-07-25 22:44:51,495 - core.dsl_symbolic_executor - INFO - Executor initialization started.
2025-07-25 22:44:51,498 - core.dsl_symbolic_executor - INFO - Executor initialization complete.
2025-07-25 22:44:51,501 - core.dsl_symbolic_executor - INFO - Starting DSL program execution.
2025-07-25 22:44:51,502 - core.dsl_symbolic_executor - INFO - DSL program execution completed successfully.
2025-07-25 22:44:51,504 - core.dsl_symbolic_executor - INFO - Executor initialization started.
2025-07-25 22:44:51,505 - core.dsl_symbolic_executor - INFO - Executor initialization complete.
2025-07-25 22:44:51,505 - core.dsl_symbolic_executor - INFO - Starting DSL program execution.
2025-07-25 22:44:51,506 - core.dsl_symbolic_executor - INFO - DSL program execution completed successfully.
2025-07-25 2



In [10]:
outputed_dataset

[{'input_grid': [[1]], 'dsl_rule': '↔', 'output_grid': [[1]]},
 {'input_grid': [[7, 4, 2, 5, 1, 6, 10, 2]],
  'dsl_rule': '↔',
  'output_grid': [[2, 10, 6, 1, 5, 2, 4, 7]]},
 {'input_grid': [[1, 2, 3]], 'dsl_rule': '↔', 'output_grid': [[3, 2, 1]]},
 {'input_grid': [[1, 9, 10, 2, 3, 3, 7, 4, 10],
   [8, 7, 6, 1, 7, 7, 2, 4, 5],
   [8, 7, 10, 6, 1, 8, 1, 4, 1],
   [6, 5, 6, 2, 1, 6, 2, 8, 8],
   [2, 1, 5, 4, 10, 2, 7, 2, 7],
   [5, 5, 7, 10, 5, 9, 7, 6, 10],
   [2, 1, 8, 2, 4, 4, 3, 1, 8]],
  'dsl_rule': '↔',
  'output_grid': [[10, 4, 7, 3, 3, 2, 10, 9, 1],
   [5, 4, 2, 7, 7, 1, 6, 7, 8],
   [1, 4, 1, 8, 1, 6, 10, 7, 8],
   [8, 8, 2, 6, 1, 2, 6, 5, 6],
   [7, 2, 7, 2, 10, 4, 5, 1, 2],
   [10, 6, 7, 9, 5, 10, 7, 5, 5],
   [8, 1, 3, 4, 4, 2, 8, 1, 2]]},
 {'input_grid': [[1], [2], [3]],
  'dsl_rule': '↔',
  'output_grid': [[1], [2], [3]]},
 {'input_grid': [[4, 1, 6, 9, 10, 5, 9, 8, 3],
   [3, 1, 1, 1, 7, 2, 4, 10, 1],
   [1, 9, 8, 5, 1, 6, 1, 9, 9],
   [10, 6, 9, 9, 1, 10, 7, 6, 8],
   [5, 

In [11]:
import json
from pprint import pprint
# print(json.dumps(outputed_dataset, indent=4))
pprint(outputed_dataset, width=500)

[{'dsl_rule': '↔', 'input_grid': [[1]], 'output_grid': [[1]]},
 {'dsl_rule': '↔', 'input_grid': [[7, 4, 2, 5, 1, 6, 10, 2]], 'output_grid': [[2, 10, 6, 1, 5, 2, 4, 7]]},
 {'dsl_rule': '↔', 'input_grid': [[1, 2, 3]], 'output_grid': [[3, 2, 1]]},
 {'dsl_rule': '↔', 'input_grid': [[1, 9, 10, 2, 3, 3, 7, 4, 10], [8, 7, 6, 1, 7, 7, 2, 4, 5], [8, 7, 10, 6, 1, 8, 1, 4, 1], [6, 5, 6, 2, 1, 6, 2, 8, 8], [2, 1, 5, 4, 10, 2, 7, 2, 7], [5, 5, 7, 10, 5, 9, 7, 6, 10], [2, 1, 8, 2, 4, 4, 3, 1, 8]], 'output_grid': [[10, 4, 7, 3, 3, 2, 10, 9, 1], [5, 4, 2, 7, 7, 1, 6, 7, 8], [1, 4, 1, 8, 1, 6, 10, 7, 8], [8, 8, 2, 6, 1, 2, 6, 5, 6], [7, 2, 7, 2, 10, 4, 5, 1, 2], [10, 6, 7, 9, 5, 10, 7, 5, 5], [8, 1, 3, 4, 4, 2, 8, 1, 2]]},
 {'dsl_rule': '↔', 'input_grid': [[1], [2], [3]], 'output_grid': [[1], [2], [3]]},
 {'dsl_rule': '↔', 'input_grid': [[4, 1, 6, 9, 10, 5, 9, 8, 3], [3, 1, 1, 1, 7, 2, 4, 10, 1], [1, 9, 8, 5, 1, 6, 1, 9, 9], [10, 6, 9, 9, 1, 10, 7, 6, 8], [5, 7, 9, 8, 3, 6, 10, 4, 1], [3, 3, 2, 9, 6, 5

In [17]:
groups_list = list(groups.items())
groups_list[2]

('⇄',
 [{'input_grid': [[1, 2], [3, 4]], 'dsl_rule': '⇄(II,I)'},
  {'input_grid': [[1, 2], [3, 4], [5, 6]], 'dsl_rule': '⇄(II,III)'},
  {'input_grid': [[1, 2], [3, 4]], 'dsl_rule': '⇄(I,II)'},
  {'input_grid': [[1, 2], [3, 4], [5, 6]], 'dsl_rule': '⇄(I,III)'},
  {'input_grid': [[1, 2], [3, 4]], 'dsl_rule': '⇄(I,I)'}])

In [14]:
groups['↕']

[{'input_grid': [[1]], 'dsl_rule': '↕', 'output_grid': [[1]]},
 {'input_grid': [[1, 2]], 'dsl_rule': '↕', 'output_grid': [[1, 2]]},
 {'input_grid': [[1], [2], [3]],
  'dsl_rule': '↕',
  'output_grid': [[3], [2], [1]]},
 {'input_grid': [[1, 2], [3, 4]],
  'dsl_rule': '↕',
  'output_grid': [[3, 4], [1, 2]]},
 {'input_grid': [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
  'dsl_rule': '↕',
  'output_grid': [[7, 8, 9], [4, 5, 6], [1, 2, 3]]},
 {'input_grid': [[1, 2], [3, 4]],
  'dsl_rule': '↕(⌂)',
  'output_grid': [[3, 4], [1, 2]]},
 {'input_grid': [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
  'dsl_rule': '↕(⌂)',
  'output_grid': [[7, 8, 9], [4, 5, 6], [1, 2, 3]]}]