# Create reverse ARC dataset

## Goal

Loop over all the ARC tasks and tag the reversible tasks. Then create a new version of the dataset for training.

## Imports

In [None]:
import sys
import os
import json
import time
import textwrap
import matplotlib.pyplot as plt
import matplotlib as mpl

import ipywidgets as widgets
from IPython.display import display, clear_output
from ipywidgets import HBox, VBox

sys.path.append(os.path.realpath('../scripts/'))

from evaluation import plot_task

plt.plot()
plt.close('all')
plt.rcParams["figure.figsize"] = (25, 4)
mpl.rcParams['lines.linewidth'] = 3
mpl.rcParams['font.size'] = 16

## Task labelling

Let's find all the tasks that can be reversed.

In [None]:
labels_filepath = '/mnt/hdd0/Kaggle/arc24/data/reversed_arc/reversible_tasks.json'
if os.path.exists(labels_filepath):
    with open(labels_filepath, 'r') as f:
        labels = set(json.load(f))
    print(f'Loaded {len(labels)} labels')
else:
    print('No labels found')
    labels = set()

In [None]:
with open('/mnt/hdd0/Kaggle/arc24/data/new_partitions/train_rs7.json', 'r') as f:
    tasks = json.load(f)

In [None]:
# Initial index
index = 0
keys = list(tasks.keys())

# Function to plot the image (your custom function)
def plot_image(image):
    # Replace with your actual image plotting code
    print(f"Displaying {image}")

# Function to update the image display
def update_display():
    global index
    clear_output(wait=True)  # Clear previous output
    task_id = keys[index]
    plot_task(tasks[task_id])
    label_text.value = f"Current task: {task_id} ({index + 1}/{len(keys)}) Already labelled tasks as reversible: {len(reversible_tasks)}"
    if task_id in reversible_tasks:
        reversible_button.description = 'Unlabel as reversible'
        reversible_button.button_style = 'danger'
    else:
        reversible_button.description = 'Label as reversible'
        reversible_button.button_style = 'success'
    # Redraw the widgets after clearing the output
    display(VBox([label_text, HBox([prev_button, next_button, reversible_button])]))


# Next button click event handler
def on_next_button_clicked(b):
    global index
    if index < len(keys) - 1:
        index += 1
    update_display()

# Previous button click event handler
def on_prev_button_clicked(b):
    global index
    if index > 0:
        index -= 1
    update_display()

# Label as Reversible button click event handler
def on_reversible_button_clicked(b):
    global index, reversible_tasks
    task_id = keys[index]
    if task_id in reversible_tasks:
        reversible_tasks.remove(task_id)
    else:
        reversible_tasks.add(task_id)
    with open(labels_filepath, 'w') as f:
        json.dump(list(reversible_tasks), f)
    if index < len(keys) - 1:
        index += 1
    update_display()

# Creating the widgets
next_button = widgets.Button(description="Next")
prev_button = widgets.Button(description="Previous")
reversible_button = widgets.Button(description="Label as Reversible")
label_text = widgets.Label()

# Assigning the event handlers
next_button.on_click(on_next_button_clicked)
prev_button.on_click(on_prev_button_clicked)
reversible_button.on_click(on_reversible_button_clicked)

# Initial display
update_display()

- 12:14 started
- 13:04 249 tasks viewed
- 13:41 485

- 14:33 restart
- 14:54 600
- 15:08 700

Thus I have labeled the 700 tasks in around 2 hours. That is around 6 tasks per minute.
There were 338 reversible tasks.

In general I believe the reversed task is easier than the original task.

## Create reversed dataset

In [None]:
def create_reversed_dataset(input_filepath, output_filepath):
    with open(input_filepath, 'r') as f:
        tasks = json.load(f)
    with open(labels_filepath, 'r') as f:
        reversible_tasks = set(json.load(f))
    reversed_tasks = dict()
    for task_id, task in tasks.items():
        if task_id not in reversible_tasks:
            continue
        reversed_tasks[task_id] = reverse_task(task)
    with open(output_filepath, 'w') as f:
        json.dump(reversed_tasks, f)

def reverse_task(task):
    """ Changes the input for the output """
    new_task = dict()
    for key, samples in task.items():
        new_task[key] = []
        for sample in samples:
            new_task[key].append({
                'input': sample['output'],
                'output': sample['input']
            })
    return new_task

In [None]:
create_reversed_dataset('/mnt/hdd0/Kaggle/arc24/data/new_partitions/train_rs7.json', '/mnt/hdd0/Kaggle/arc24/data/reversed_arc/reversed_train_rs7.json')
with open('/mnt/hdd0/Kaggle/arc24/data/reversed_arc/reversed_train_rs7.json', 'r') as f:
    reversed_tasks = json.load(f)

plot_task(list(reversed_tasks.values())[200])