# First steps with o1

## Goal

Let's see if the new OpenAI's o1 model can write python code to solve the challenges.

## Imports

In [None]:
import sys
import os
import glob
import json
import random
from itertools import islice
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
from tqdm.auto import tqdm

# add path to python path
sys.path.append(os.path.realpath('../scripts/'))


from arc24.data import load_arc_data_with_solutions
from evaluation import plot_grids, plot_task, plot_grid

plt.plot()
plt.close('all')
plt.rcParams["figure.figsize"] = (25, 4)
mpl.rcParams['lines.linewidth'] = 3
mpl.rcParams['font.size'] = 16

## PQA



https://github.com/neoneye/arc-dataset-collection/tree/main/dataset/PQA

### Visualize tasks from the same gestalt law

In [None]:
def visualize_tasks_from_same_gestalt_law(folder, n=5):
    filepaths = sorted(glob.glob(os.path.join(folder, '*.json')))
    filepaths = random.choices(filepaths, k=n)
    for filepath in filepaths:
        with open(filepath, 'r') as f:
            task = json.load(f)
        plot_task(task)
        plt.suptitle(os.path.splitext(os.path.basename(filepath))[0])
        plt.show()

In [None]:
for folder in sorted(glob.glob('/mnt/hdd0/Kaggle/arc24/data/PQA/PQA-dataset_10k/pqa-dataset/*')):
    print(os.path.basename(folder))
    visualize_tasks_from_same_gestalt_law(folder, n=5)
    print('\n'*5)

I don't see any difference between tasks of the same folder. We only have 7 different tasks, we could group all the tasks together in the same style as I do with RE-ARC.

All the tasks have 6 train samples and 3 test.

### Group all tasks together

In [None]:
def group_pqa_tasks(folder, max_tasks_per_category=100):
    task_folders = sorted(glob.glob(os.path.join(folder, '*')))
    tasks = {}
    for task_folder in tqdm(task_folders):
        task_name = os.path.basename(task_folder)
        tasks[task_name] = dict(train=list(), n_train=6)
        for filepath in islice(sorted(glob.glob(os.path.join(task_folder, '*.json'))), max_tasks_per_category):
            with open(filepath, 'r') as f:
                task = json.load(f)
            tasks[task_name]['train'].extend(task['train'])
            tasks[task_name]['train'].extend(task['test'])
        print(f'{task_name} tasks: {len(tasks[task_name]["train"])}')
    return tasks

In [None]:
tasks = group_pqa_tasks('/mnt/hdd0/Kaggle/arc24/data/PQA/PQA-dataset_10k/pqa-dataset', max_tasks_per_category=100)
with open('/mnt/hdd0/Kaggle/arc24/data/PQA/PQA-dataset_10k/pqa-dataset-1k.json', 'w') as f:
    json.dump(tasks, f)

In [None]:
tasks = group_pqa_tasks('/mnt/hdd0/Kaggle/arc24/data/PQA/PQA-dataset_10k/pqa-dataset', max_tasks_per_category=1000)
with open('/mnt/hdd0/Kaggle/arc24/data/PQA/PQA-dataset_10k/pqa-dataset-10k.json', 'w') as f:
    json.dump(tasks, f)

In [None]:
for task in tasks.values():
    for sample in tqdm(task['train']):
        assert np.min(sample['input']) >= 0
        assert np.min(sample['output']) >= 0
        assert np.max(sample['input']) <= 9
        assert np.max(sample['output']) <= 9

This probes that there are no weird colors.

## Extra kaggle datasets

In [None]:
def load_extra_kaggle_datasets():
    tasks = dict()
    for filepath in sorted(glob.glob('/mnt/hdd0/Kaggle/arc24/data/kaggle/*/*.json')):
        with open(filepath, 'r') as f:
            task = json.load(f)
        tasks[os.path.splitext(os.path.basename(filepath))[0]] = task
    return tasks

In [None]:
tasks = load_extra_kaggle_datasets()

In [None]:
for task_id, task in tasks.items():
    plot_task(task)
    plt.suptitle(task_id)
    plt.show()

In [None]:
with open('/mnt/hdd0/Kaggle/arc24/data/kaggle/kaggle.json', 'w') as f:
    json.dump(tasks, f)

## Neoeye datasets

### ARC dataset tama

https://github.com/neoneye/arc-dataset-tama/tree/main

#### Visualize tasks from the same folder

In [None]:
def visualize_tasks_from_same_folder(folder, n=5):
    filepaths = sorted(glob.glob(os.path.join(folder, '*.json')))
    filepaths = random.choices(filepaths, k=n)
    for filepath in filepaths:
        with open(filepath, 'r') as f:
            task = json.load(f)
        plot_task(task)
        plt.suptitle(os.path.splitext(os.path.basename(filepath))[0])
        plt.show()

In [None]:
for folder in sorted(glob.glob('/mnt/hdd0/Kaggle/arc24/data/arc-dataset-tama/dataset/*')):
    print(os.path.basename(folder))
    visualize_tasks_from_same_folder(folder, n=5)
    print('\n'*5)

The tasks are repeated, but they use different colors. Thus I could not group them in the same task. Instead I'm thinking of grouping them in a list, and select a random element from the list when training. 

#### Create dataset

In [None]:
def curate_tama_dataset(folder):
    tasks = dict()
    subfolders = sorted(glob.glob(os.path.join(folder, '*')))
    for subfolder in tqdm(subfolders):
        task_name = os.path.basename(subfolder)
        tasks[task_name] = list()
        for filepath in sorted(glob.glob(os.path.join(subfolder, '*.json'))):
            with open(filepath, 'r') as f:
                task = json.load(f)
            task.pop('metadata')
            tasks[task_name].append(task)
        print(f'{task_name} tasks: {len(tasks[task_name])}')
    return tasks

In [None]:
tasks = curate_tama_dataset('/mnt/hdd0/Kaggle/arc24/data/arc-dataset-tama/dataset')

There are 50 different tasks, each with 100 variations. It's a good dataset.

In [None]:
with open('/mnt/hdd0/Kaggle/arc24/data/neoeye_tama.json', 'w') as f:
    json.dump(tasks, f)

### Old attempt

In [None]:
from typing import List, Optional
import numpy as np

class DecodeRLEError(ValueError):
    """Exception raised for errors in RLE decoding."""
    def __init__(self, message: str, details: Optional[str] = None):
        super().__init__(message)
        self.details = details

def decode_rle_row_inner(row: str) -> List[int]:
    if not row:
        raise DecodeRLEError("Invalid row: row cannot be empty")

    decoded_row = []
    prev_count = 1
    x = 0
    current_az_count = 0

    for ch in row:
        if ch.isdigit():
            color = int(ch)
            for _ in range(prev_count):
                decoded_row.append(color)
                x += 1
            prev_count = 1
            current_az_count = 0
        else:
            if not ('a' <= ch <= 'z'):
                raise DecodeRLEError("Invalid character inside row", details=f"Character: {ch}")
            current_az_count += 1
            if current_az_count >= 2:
                raise DecodeRLEError("No adjacent a-z characters are allowed", details=f"Character: {ch}")
            count = ord(ch) - ord('a') + 2
            prev_count = count

    if current_az_count > 0:
        raise DecodeRLEError("Last character must not be a-z character", details=f"Character: {ch}")

    return decoded_row

def decode_rle_row(row: str, width: int) -> List[int]:
    if not row:
        return []

    if len(row) == 1:
        ch = row[0]
        if ch.isdigit():
            color = int(ch)
            return [color] * width
        else:
            raise DecodeRLEError("Invalid character for full row", details=f"Character: {ch}")

    decoded_row = decode_rle_row_inner(row)
    length_decoded_row = len(decoded_row)
    if length_decoded_row != width:
        raise DecodeRLEError("Mismatch between width and the number of RLE columns",
                             details=f"Expected width: {width}, Decoded width: {length_decoded_row}")

    return decoded_row

def deserialize(input_str: str) -> np.array:
    verbose = False

    parts = input_str.split(' ')
    count_parts = len(parts)
    if count_parts != 3:
        raise DecodeRLEError("Expected 3 parts", details=f"But got {count_parts} parts")

    width_str, height_str, rows_str = parts
    rows = rows_str.split(',')

    # Validate width and height strings
    try:
        width = int(width_str)
        height = int(height_str)
    except ValueError as e:
        raise DecodeRLEError(
            "Cannot parse width and height",
            details=str(e)
        )

    # Images with negative dimensions cannot be created
    if width < 0 or height < 0:
        raise DecodeRLEError("Width and height must non-negative")

    count_rows = len(rows)
    if count_rows != height:
        raise DecodeRLEError("Mismatch between height and the number of RLE rows",
                             details=f"Expected height: {height}, Number of rows: {count_rows}")

    image = np.zeros((height, width), dtype=np.uint8)
    copy_y = 0

    for y in range(height):
        row = rows[y]
        if verbose:
            print(f"y: {y} row: {row}")
        if not row:
            if y == 0:
                raise DecodeRLEError("First row is empty")
            image[y, :] = image[copy_y, :]
            continue
        copy_y = y
        decoded_row = decode_rle_row(row, width)
        image[y, :] = decoded_row

    return image

In [None]:
text = """I0 2 7 90,9,30,09,0,03,0 O0 2 7 0,03,0,09,30,9,90 I1 7 3 07c18,a7c38,7 O1 7 3 7,a7c38,07c18 I2 3 11 6,616,626,,,6a2,6,,,, O2 3 11 6,,,,,6a2,626,,,616,6 I3T 11 9 37a3a7376a3,3a232a73262,7a37362a373,27a2323a6a3,a26b73a723,a2373a2a323,27a36726a32,73273a727a2,7a367b3237 O3T 11 9 37a373a76a3,3a232a73262,73273632373,2732323a6a3,a26b7a3273,a27a3a27b3,23736a763a2,a32737a27a2,a7367b3237"""
decode_rle_row(text)

In [None]:
text = """2 7 90,9,30,09,0,03,0 O0 2 7 0,03,0,09,30,9,90"""
deserialize(text)

## Datasets summary

In [None]:
def print_dataset_summary(filepath):
    tasks = load_arc_data_with_solutions(filepath)
    print(f"Number of tasks: {len(tasks)}")
    samples_per_task = []
    for task in tasks.values():
        if isinstance(task, dict):
            samples_per_task.append(len(task['train']) + len(task['test']))
        elif isinstance(task, list):
            samples_per_task.append(sum([len(subtask['train']) + len(subtask['test']) for subtask in task]))
        else:
            raise ValueError(f"Invalid task type: {type(task)}")
    print(f"Mean number of samples per task: {np.mean(samples_per_task):.1f}")


In [None]:
print_dataset_summary('../data/external_data/MINI-ARC.json')

In [None]:
print_dataset_summary('../data/external_data/kaggle.json')

In [None]:
print_dataset_summary('../data/external_data/neoeye_tama.json')