# Write python code for training tasks

## Goal

Write python code that implements training tasks and also creates the input distributions.

## Imports

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys
import os
import glob
import json
import random
import inspect
from itertools import islice
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
from tqdm.auto import tqdm

# add path to python path
sys.path.append(os.path.realpath('../scripts/'))
from arc24.data import load_arc_data_with_solutions
from evaluation import plot_grids, plot_grid
from arc24.logging import logging

logger = logging.getLogger(__name__)

sys.path.append(os.path.realpath('../arc'))
import training_inputs
import training_tasks

plt.plot()
plt.close('all')
plt.rcParams["figure.figsize"] = (25, 2)
mpl.rcParams['lines.linewidth'] = 3
mpl.rcParams['font.size'] = 16

## Code

In [None]:
eval_data = load_arc_data_with_solutions('/mnt/hdd0/Kaggle/arc24/data/arc-agi_evaluation_challenges.json')
train_data = load_arc_data_with_solutions('/mnt/hdd0/Kaggle/arc24/data/arc-agi_training_challenges.json')

In [None]:
def plot_grids_with_shape(grids, suptitle=None, facecolor='white'):
    plt.figure(facecolor=facecolor)
    for plot_idx, grid in enumerate(grids):
        plt.subplot(1, len(grids), plot_idx + 1)
        plot_grid(grid)
        plt.title(f'{len(grid)}x{len(grid[0])}')
    if suptitle is not None:
        plt.suptitle(suptitle)
        plt.tight_layout(pad=0.2)
    plt.show()

In [None]:
def visualize_train_task(task_id):
    print(task_id)

    inputs = [sample['input'] for sample in train_data[task_id]['train'] + train_data[task_id]['test']]
    outputs = [sample['output'] for sample in train_data[task_id]['train'] + train_data[task_id]['test']]
    plot_grids_with_shape(inputs, 'Task Inputs')
    plot_grids_with_shape(outputs, 'Ground truth Outputs', facecolor='gray')

    try:
        inputs = [getattr(training_inputs, f'task_{task_id}')() for _ in range(5)]
        plot_grids_with_shape(inputs, 'Generated Inputs')
    except AttributeError:
        logger.warning('Input generation function not found')
    except NameError:
        logger.warning('Input generation is implemented, but it is calling not implemented functions')
    try:
        outputs = [getattr(training_tasks, f'task_{task_id}')(i) for i in inputs]
        plot_grids_with_shape(outputs, 'Generated Outputs', facecolor='gray')
    except AttributeError:
        logger.warning('Task function not found')
    except NameError:
        logger.warning('Task function is implemented, but it is calling not implemented functions')

In [None]:
from matplotlib.colors import LinearSegmentedColormap

# Define the colors and the corresponding positions
colors = [(1, 0, 0), (1, 1, 0), (0, 1, 0)]  # Red, Yellow, Green
positions = [0, 0.5, 1]  # At 0 -> red, 0.5 -> yellow, 1 -> green
# Create the colormap
custom_cmap = LinearSegmentedColormap.from_list("custom_cmap", list(zip(positions, colors)))

def measure_progress(module):
    progress = []
    task_ids = list(train_data.keys())
    for task_id in task_ids:
        try:
            task_function = getattr(module, f'task_{task_id}')
            function_parameters = inspect.signature(task_function).parameters
            if function_parameters:
                task_function(**create_dummy_parameters(function_parameters))
            else:
                task_function()
            progress.append('done')
        except AttributeError as e:
            progress.append('not implemented')
        except NameError:
            progress.append('implemented but not functional')
    numeric_progress = map({'done': 1, 'not implemented': 0, 'implemented but not functional': 0.5}.get, progress)
    numeric_progress = np.array(list(numeric_progress))
    print(f'Fully functional tasks: {np.mean(numeric_progress == 1):.1%} ({np.sum(numeric_progress == 1)})')
    print(f'Implemented tasks: {np.mean(numeric_progress > 0):.1%} ({np.sum(numeric_progress > 0)})')
    plt.imshow(np.array(list(numeric_progress)).reshape(1, -1), cmap=custom_cmap, aspect='auto')

def create_dummy_parameters(function_parameters):
    kwargs = {}
    if 'grid' in function_parameters:
        kwargs['grid'] = np.zeros((3, 3), dtype=int).tolist()
    return kwargs

## Visualize tasks

In [None]:
plot_grid([np.arange(10).tolist()], write_numbers=True)

In [None]:
visualize_train_task(task_id=list(train_data.keys())[1])

## Measure progress

In [None]:
measure_progress(training_tasks); plt.title('Task implementation progress');

In [None]:
measure_progress(training_inputs); plt.title('Input generation progress');

## TODO

- [ ] Create stats about the progress of the tasks implementation