# Generate new inputs

## Goal

Check if we can use a fine-tuned LLM to generate new inputs for the ARC tasks.

## Configuration

In [None]:
class cfg:
    base_model_path: str = '/home/gbarbadillo/data/Qwen2-0.5B-Instruct'
    # model_checkpoint: str = '/mnt/hdd0/Kaggle/arc24/models/20240910_debug_input_from_inputs/01_baseline/checkpoint-1000'
    # model_checkpoint: str = '/mnt/hdd0/Kaggle/arc24/models/20240910_debug_input_from_inputs/03_input-from-inputs-continuation/checkpoint-3000'
    model_checkpoint: str = '/mnt/hdd0/Kaggle/arc24/models/20240910_debug_input_from_inputs/04_input-from-inputs-continuation/checkpoint-8000'
    model_checkpoint: str = '/mnt/hdd0/MEGA/projects/temp/20240903_submission_models/08_inputs-RE-ARC-task-augmentation-050-1111_Qwen2-0.5B-Instruct_lr1e-4_r128_4e4steps_10240msl/checkpoint-40000'
    merged_model_path: str = '/home/gbarbadillo/data/temp_model'
    max_model_len: int = 10240

## Imports

In [None]:
import sys
import os
import glob
import json
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
from tqdm.auto import tqdm

from vllm import LLM
from transformers import AutoTokenizer

# add path to python path
sys.path.append(os.path.realpath('../scripts/'))

from merge_lora import merge_lora
from arc24.data import load_arc_data_with_solutions
from arc24.prompting import create_prompts_from_task, parse_grid_from_response
from arc24.encoders import create_grid_encoder
from inference import get_sampling_params
from evaluation import plot_grids

plt.plot()
plt.close('all')
plt.rcParams["figure.figsize"] = (25, 4)
mpl.rcParams['lines.linewidth'] = 3
mpl.rcParams['font.size'] = 16

## Generate

### Create model

In [None]:
# merge_lora(cfg.base_model_path, cfg.model_checkpoint, cfg.merged_model_path)
# raise

In [None]:
llm = LLM(
    model=cfg.merged_model_path,
    trust_remote_code=True,
    dtype='half',
    tensor_parallel_size=2, # to use 2 gpus
    max_model_len=cfg.max_model_len,
    #kv_cache_dtype='fp8_e5m2', I have disabled kv cache quantization because it is hurtful
    enforce_eager=True, # without this 13.9GB of memory is used on each GPU, with this is 13.3GB,
    disable_log_stats=True,
    max_num_seqs=255, # default is supposed to be 256 I have used it to solve some weird illegal memory error
    )

In [None]:
tokenizer = AutoTokenizer.from_pretrained(cfg.merged_model_path)

### Load data

In [None]:
train_data = load_arc_data_with_solutions('/mnt/hdd0/Kaggle/arc24/data/new_partitions/train_rs7.json')
val_data = load_arc_data_with_solutions('/mnt/hdd0/Kaggle/arc24/data/new_partitions/val_rs7.json')

In [None]:
grid_encoder = create_grid_encoder("GridShapeEncoder(RowNumberEncoder(MinimalGridEncoder()))")

### Generate new samples

In [None]:
def generate_new_samples(task_id, tasks, n_generate=10, n_show=5, temperature=0.9):
    task = tasks[task_id]
    new_task = dict(train=task['train'] + task['test'], test=task['test'])
    prompt = create_prompts_from_task(new_task, grid_encoder, tokenizer, is_train_prompt=False, prompt_version='input-from-inputs-v0')[0]
    sampling_params = get_sampling_params(best_of=1, temperature=temperature, n=n_generate, max_output_tokens=1224)
    outputs = llm.generate(prompt, sampling_params=sampling_params, use_tqdm=True)
    grids = []
    for output in outputs[0].outputs[:n_show]:
        try:
            print(output.cumulative_logprob)
            grids.append(parse_grid_from_response(output.text, grid_encoder))
        except Exception as e:
            print(e)
            print(output.text)

    plot_grids([sample['input'] for sample in task['train'] + task['test']])
    plt.suptitle(f'Original inputs from task {task_id}')
    plt.show()

    plot_grids(grids)
    plt.suptitle(f'Generated samples from task {task_id}')
    plt.show()

In [None]:
for idx in range(10):
    generate_new_samples(list(train_data.keys())[idx], train_data)

In [None]:
for idx in range(10):
    generate_new_samples(list(val_data.keys())[idx], val_data)