In [2]:
import sys
import os
from pathlib import Path
from dotenv import load_dotenv

# Load environment variables from .env
load_dotenv()
# Add src folder to path
project_root = Path.cwd().parent  # assumes notebook is in child folder
src_path = project_root / "sources"
sys.path.append(str(src_path))
from core.llm import OpenRouterClient
from assets.symbols import SYMBOL_SETS_JSON

  from .autonotebook import tqdm as notebook_tqdm


In [33]:
import json
from typing import List, Dict


def decode_double_encoded_utf8(s: str) -> str:
    """
    Converts strings like "\\u00e3\\u0082\\u00b1" back to "ケ".
    """
    try:
        # Step 1: Convert the string to bytes via 'latin-1'
        byte_like = s.encode("latin-1")
        # Step 2: Decode as UTF-8
        return byte_like.decode("utf-8")
    except Exception:
        return s  # fallback if decoding fails
    

def prompt_all_glyph_sets(
    glyphs_dict,
    llm,
    operation_types: List[str]
) -> List[Dict]:
    """
    Loops through all glyph sets in SYMBOL_SETS_JSON and asks an LLM to assign 
    likely transformation meanings to each symbol.
    
    Args:
        llm: An initialized LLM object (e.g., OpenRouterClient or ChatOpenAI instance)
        operation_types: List of allowed operation names
    
    Returns:
        A list of dicts with:
            - id
            - description
            - mapping: {symbol: inferred_operation}
    """
    results = []

    # Loop over all glyph sets in the dictionary
    for category in glyphs_dict.keys():
        print(f"Processing category: {category}")
        glyph_sets = glyphs_dict[category]

        for glyph_set in glyph_sets:
            try:
                # Validate structure
                if not isinstance(glyph_set, dict):
                    raise ValueError("Each glyph set must be a dictionary")

                glyph_id = glyph_set.get("id")
                description = glyph_set.get("description", "")
                symbols = glyph_set.get("symbols", [])

                if not glyph_id or not symbols:
                    raise ValueError(f"Invalid glyph set: missing 'id' or 'symbols' in {glyph_id}")

                # Decode any Unicode escape sequences in symbols
                decoded_symbols = [
                    decode_double_encoded_utf8(symbol) if isinstance(symbol, str) else symbol
                    for symbol in symbols
                ]

                prompt = f"""
                You are an expert in visual language and symbolic representation.

                You will be given a set of abstract symbols. Based on their appearance alone, your task is to categorize each symbol.

                Choose if a symbol suggests:
                - **sigil**: an action or movement that describes a transformation within a visual puzzle (e.g., a symbol that implies growth, rotation, or a change of state).
                - **gridglyph**: a static element designed to fit as a component on a grid (e.g., a marker, a fixed label, or a boundary element within a structured layout).

                Symbols: {', '.join(symbols)}

                For each set of symbols, respond with  a score (sum should be 100%):
                "symbolset: gridglyph: 60%, sigil 40%"


                """

                # Call LLM
                response = llm(prompt).strip()
                symbol_to_op = response
                # Parse LLM response into symbol → operation mapping
                # symbol_to_op = {}
                # for line in response.splitlines():
                #     if ":" in line:
                #         symbol, op = line.split(":", 1)
                #         symbol = symbol.strip()
                #         op = op.strip()
                #         if op in operation_types:
                #             symbol_to_op[symbol] = op  # Already using real symbols here

                # Append result
                results.append({
                    "id": glyph_id,
                    "description": description,
                    "mapping": symbol_to_op
                })

            except Exception as e:
                print(f"Error processing glyph set '{glyph_id}': {str(e)}")
                continue

    return results

In [34]:
api_key = os.getenv("OPENROUTER_API_KEY")


llm = OpenRouterClient(
    api_key=api_key,
    model=r"qwen/qwen2.5-vl-32b-instruct",
    infra="deepinfra",
    max_tokens=1200,
    proxy=None,
    temperature=0.0,
    top_p=0.1,
    top_k=1,
    repetition_penalty=1.0,
    stream=False,
    seed=None
)

In [35]:
operation_types = OPERATION_TYPES = [
    "shift_right", "shift_left", "shift_up", "shift_down",
    "flip_h", "flip_v", "swap_rows", "swap_columns",
    "repeat_grid", "alternate", "sequence", "mirror_row", "mirror_column"
]

In [36]:
sample_glyph_set = SYMBOL_SETS_JSON

In [37]:
result = prompt_all_glyph_sets(sample_glyph_set, llm, operation_types)

print(json.dumps(result, indent=2, ensure_ascii=False))

Processing category: grid_glyphs
Processing category: operation_glyphs
[
  {
    "id": "katakana",
    "description": "Clean Japanese syllabary; monospaced, structured, and ideal for pattern recognition.",
    "mapping": "To categorize the given symbols based on their appearance, we will analyze each symbol to determine whether it suggests an action or transformation (sigil) or if it appears as a static element designed for a grid (gridglyph). Here's the breakdown:\n\n### Symbols: ツ, レ, ハ, ア, ヤ, ユ, ヨ, キ, ク, ケ\n\n#### 1. **ツ**\n   - **Appearance**: This symbol resembles a fork or a branching structure.\n   - **Analysis**: It suggests a splitting or diverging action, which implies transformation or movement.\n   - **Category**: Sigil\n\n#### 2. **レ**\n   - **Appearance**: This symbol looks like a rotated \"L\" shape.\n   - **Analysis**: It appears static and could serve as a boundary or marker in a grid.\n   - **Category**: Gridglyph\n\n#### 3. **ハ**\n   - **Appearance**: This symbol res

In [39]:
print(json.dumps(result, indent=2, ensure_ascii=False))

[
  {
    "id": "katakana",
    "description": "Clean Japanese syllabary; monospaced, structured, and ideal for pattern recognition.",
    "mapping": "To categorize the given symbols based on their appearance, we will analyze each symbol to determine whether it suggests an action or transformation (sigil) or if it appears as a static element designed for a grid (gridglyph). Here's the breakdown:\n\n### Symbols: ツ, レ, ハ, ア, ヤ, ユ, ヨ, キ, ク, ケ\n\n#### 1. **ツ**\n   - **Appearance**: This symbol resembles a fork or a branching structure.\n   - **Analysis**: It suggests a splitting or diverging action, which implies transformation or movement.\n   - **Category**: Sigil\n\n#### 2. **レ**\n   - **Appearance**: This symbol looks like a rotated \"L\" shape.\n   - **Analysis**: It appears static and could serve as a boundary or marker in a grid.\n   - **Category**: Gridglyph\n\n#### 3. **ハ**\n   - **Appearance**: This symbol resembles a horizontal line with vertical extensions, like a \"H.\"\n   - 

In [31]:
a = llm("""
You are given several pairs of 5x5 grids. Each pair shows how an input grid transforms into an output grid.

Study the pattern carefully and describe the logical transformation in plain English. Then, apply that same transformation to the final input grid and write out the resulting output grid.

---


Input (flattened):
['イ', 'ロ', 'ハ', 'ニ', 'ホ', 'ヘ', 'ト', 'チ', 'リ', 'ヌ', 'ス', 'セ', 'ソ', 'タ', 'カ', 'キ', 'ク', 'ケ', 'コ', 'サ', 'マ', 'ミ', 'ム', 'メ', 'モ']

Output (flattened):
['イ', 'ロ', 'ハ', 'ニ', 'ホ', 'ロ', 'ハ', 'ニ', 'ホ', 'ヘ', 'ハ', 'ニ', 'ホ', 'ヘ', 'ト', 'ニ', 'ホ', 'ヘ', 'ト', 'チ', 'ホ', 'ヘ', 'ト', 'チ', 'リ']

Apply the same transformation to this input:

Test Input:
[
    ['ア', 'イ', 'ウ', 'エ', 'オ'],
    ['カ', 'キ', 'ク', 'ケ', 'コ'],
    ['サ', 'シ', 'ス', 'セ', 'ソ'],
    ['タ', 'チ', 'ツ', 'テ', 'ト'],
    ['ナ', 'ニ', 'ヌ', 'ネ', 'ノ']
]

Describe the transformation first, then write the transformed output.
                """)

In [32]:
print(a)

### Step 1: Analyze the Transformation Pattern

#### Input Grid:
The input grid is a 5x5 grid of characters. Flattening it gives the sequence:
```
['イ', 'ロ', 'ハ', 'ニ', 'ホ', 'ヘ', 'ト', 'チ', 'リ', 'ヌ', 'ス', 'セ', 'ソ', 'タ', 'カ', 'キ', 'ク', 'ケ', 'コ', 'サ', 'マ', 'ミ', 'ム', 'メ', 'モ']
```

#### Output Grid:
The output grid is also a 5x5 grid of characters. Flattening it gives the sequence:
```
['イ', 'ロ', 'ハ', 'ニ', 'ホ', 'ロ', 'ハ', 'ニ', 'ホ', 'ヘ', 'ハ', 'ニ', 'ホ', 'ヘ', 'ト', 'ニ', 'ホ', 'ヘ', 'ト', 'チ', 'ホ', 'ヘ', 'ト', 'チ', 'リ']
```

#### Observations:
1. **First Row Remains Unchanged**: The first row of the input grid (`['イ', 'ロ', 'ハ', 'ニ', 'ホ']`) is identical in the output grid.
2. **Subsequent Rows Are Shifted**: Each subsequent row in the output grid is a **cyclic shift** of the corresponding row in the input grid.
   - The second row (`['ロ', 'ハ', 'ニ', 'ホ', 'ヘ']`) is a cyclic shift of the second row in the input (`['ヘ', 'ト', 'チ', 'リ', 'ヌ']`).
   - The third row (`['ハ', 'ニ', 'ホ', 'ヘ', 'ト']`) is a cyclic shi

In [11]:
glyph = "\u1310"
print(glyph)

ጐ


In [25]:
print(llm(
prompt = f"""
Below are several examples of input-output pairs. Each input is a list of chess pieces (`♙` = pawn, `♘` = knight), and each output is the result after some kind of transformation.

Your task is to:

1. **Identify the pattern or rule** used in the transformations.
2. **Apply that same rule** to the final input and provide the correct output.

Think carefully about what changes between the inputs and outputs — items may move, shift, rotate, wrap around, or change position based on direction or frequency.
be consitent the transformation is from the input to the output

Could be a combination of moves — try to apply one move at a time and score it, like a chess player evaluating possibilities.

If you're unsure at first, look at multiple examples and compare them to spot the logic.
i want clear explaination, how many pieces move, treat each pieces indidually
---

Here are the transformation examples:

1. ["♙", "♙", "♘", "♙", "♙", "♙"] ➡ ["♙", "♙", "♙", "♘", "♙", "♙"]
2. ["♘", "♙", "♙", "♙", "♙"] ➡ ["♙", "♘", "♙", "♙", "♙"]
3. ["♙", "♘", "♙", "♘", "♙"] ⬅ ["♘", "♙", "♘", "♙", "♙"]
4. ["♙", "♙", "♙", "♘"] ⬅⬅⬅ ["♘", "♙", "♙", "♙"]
5. ["♙", "♘", "♙", "♙", "♘"] ⬅ ["♘", "♙", "♙", "♘", "♙"]
6. ["♙", "♙", "♘", "♙", "♙", "♙", "♙"] ⬅ ?

---

### Task:

Based on the previous patterns, what should replace the `?` in the last line?

👉 Replace the `?` with the correct array in this format:

["♙", "♙", "♘", "♙", "♙", "♙", "♙"] ⬅ ["?", "?", "?", "?", "?", "?", "?"]
"""
))

To solve this problem, let's first identify the pattern in the transformations. We'll analyze each example to see how the pieces move or change positions.

### Analysis of the Examples:

1. **Input:** `["♙", "♙", "♘", "♙", "♙", "♙"]`
   **Output:** `["♙", "♙", "♙", "♘", "♙", "♙"]`
   - The knight (`♘`) moves from the third position to the fourth position.

2. **Input:** `["♘", "♙", "♙", "♙", "♙"]`
   **Output:** `["♙", "♘", "♙", "♙", "♙"]`
   - The knight (`♘`) moves from the first position to the second position.

3. **Input:** `["♙", "♘", "♙", "♘", "♙"]`
   **Output:** `["♘", "♙", "♘", "♙", "♙"]`
   - The knight (`♘`) moves from the second position to the first position.

4. **Input:** `["♙", "♙", "♙", "♘"]`
   **Output:** `["♘", "♙", "♙", "♙"]`
   - The knight (`♘`) moves from the fourth position to the first position.

5. **Input:** `["♙", "♘", "♙", "♙", "♘"]`
   **Output:** `["♘", "♙", "♙", "♘", "♙"]`
   - The knight (`♘`) moves from the second position to the first position.

6. 

In [None]:
print(llm(
prompt = f"""You are a puzzle interpreter that understands Egyptian Hieroglyphs as symbolic actions.

Each hieroglyph represents a transformation rule. Here are some examples:


Example 1:
Input: [ . . # . . . ]
Rule: 𓁘
Output: [ . . . # . . ]

Example 2:
Input: [ # . . . . ]
Rule: 𓁘
Output: [ . # . . . ]

Example 3:
Input: [ . # . # . ]
Rule: 𓁙
Output: [ # . # . . ]

Now solve this puzzle:
Input: [ . . # . . . . ]
Rules: 𓁘 𓁘
What is the output?
"""
))

To solve the puzzle, we need to apply the given rules sequentially to the input array. The rules provided are:

- 𓁘 (8): Shift Right
- 𓁘 (8): Shift Right

Let's start with the initial input and apply each rule step by step.

### Initial Input:
\[ [ . . # . . . . ] \]

### Step 1: Apply the first rule (Shift Right)
When we shift right, each element moves one position to the right, and the last element wraps around to the first position.

\[ [ . . # . . . . ] \rightarrow [ . . . # . . . ] \]

### Step 2: Apply the second rule (Shift Right)
Now we take the result from the first step and apply another shift right.

\[ [ . . . # . . . ] \rightarrow [ . . . . # . . ] \]

### Final Output:
After applying both shift right rules, the final output is:

\[ [ . . . . # . . ] \]


In [14]:
print(f"""

[ . . 🟦 . . . ]
Rule: ⇝
[ . . . 🟦 . . ] 

[ 🟦 . . . . ]
Rule: ⇝
[ . 🟦 . . . ] 

[ . 🟦 . 🟦 . ]
Rule: ⇜
[ 🟦 . 🟦 . . ] 

[ . . . 🟦 ]
Rule: ⇜⇜⇜
[ 🟦 . . . ] 

[ . 🟦 . . 🟦 ]
Rule: ⇜
[ 🟦 . . 🟦 . ] 

[ . . 🟦 . . . . ]
Rules: 𓁘 𓁘⇜
? 
"""

)



[ . . 🟦 . . . ]
Rule: ⇝
[ . . . 🟦 . . ] 

[ 🟦 . . . . ]
Rule: ⇝
[ . 🟦 . . . ] 

[ . 🟦 . 🟦 . ]
Rule: ⇜
[ 🟦 . 🟦 . . ] 

[ . . . 🟦 ]
Rule: ⇜⇜⇜
[ 🟦 . . . ] 

[ . 🟦 . . 🟦 ]
Rule: ⇜
[ 🟦 . . 🟦 . ] 

[ . . 🟦 . . . . ]
Rules: 𓁘 𓁘⇜
? 



In [52]:
import json

# Define the CORRECT Unicode range for Indus Valley Script
INDUS_VALLEY_SCRIPT_START = 0x110D0
INDUS_VALLEY_SCRIPT_END = 0x110FF # This covers the entire assigned Indus Valley Script block

# Configuration for the sets
CHARS_PER_SET = 10  # How many symbols in each set
NUM_SETS_TO_CREATE = 4 # How many sets you want

# Generate a master list of all valid Indus Valley Script characters
all_valid_indus_chars = []
for code_point in range(INDUS_VALLEY_SCRIPT_START, INDUS_VALLEY_SCRIPT_END + 1):
    try:
        char = chr(code_point)
        all_valid_indus_chars.append(char)
    except ValueError:
        pass # Skip unassigned/invalid code points

# Dynamically build the sets
indus_script_sets_dynamic = []
current_char_index = 0

for i in range(NUM_SETS_TO_CREATE):
    end_index = current_char_index + CHARS_PER_SET
    current_set_chars = all_valid_indus_chars[current_char_index : min(end_index, len(all_valid_indus_chars))]

    if not current_set_chars:
        break

    set_id = f"indus_script_set_dynamic_{i + 1}"
    description = f"Dynamically generated set {i + 1} of {len(current_set_chars)} Indus Valley Script symbols."
    
    indus_script_sets_dynamic.append({
        "id": set_id,
        "description": description,
        "symbols": current_set_chars
    })
    
    current_char_index = end_index

# Print the dynamically built sets
print("--- Dynamically Built Indus Valley Script Symbol Sets ---")

for set_data in indus_script_sets_dynamic:
    print(f"\nID: {set_data['id']}")
    print(f"Description: {set_data['description']}")
    print("Symbols:")
    print(" ".join(set_data['symbols']))

print("\n--- End of Dynamically Built Symbol Sets ---")

--- Dynamically Built Indus Valley Script Symbol Sets ---

ID: indus_script_set_dynamic_1
Description: Dynamically generated set 1 of 10 Indus Valley Script symbols.
Symbols:
𑃐 𑃑 𑃒 𑃓 𑃔 𑃕 𑃖 𑃗 𑃘 𑃙

ID: indus_script_set_dynamic_2
Description: Dynamically generated set 2 of 10 Indus Valley Script symbols.
Symbols:
𑃚 𑃛 𑃜 𑃝 𑃞 𑃟 𑃠 𑃡 𑃢 𑃣

ID: indus_script_set_dynamic_3
Description: Dynamically generated set 3 of 10 Indus Valley Script symbols.
Symbols:
𑃤 𑃥 𑃦 𑃧 𑃨 𑃩 𑃪 𑃫 𑃬 𑃭

ID: indus_script_set_dynamic_4
Description: Dynamically generated set 4 of 10 Indus Valley Script symbols.
Symbols:
𑃮 𑃯 𑃰 𑃱 𑃲 𑃳 𑃴 𑃵 𑃶 𑃷

--- End of Dynamically Built Symbol Sets ---


In [53]:



SyntaxError: invalid non-printable character U+E1B3 (775235539.py, line 1)