In [164]:
import json
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import random

def load_json(file_path):
    with open(file_path) as f:
        data = json.load(f)
    return data

class ARC_Dataset(Dataset):
    def __init__(self, challenges, solution):
        challenges = load_json(challenges)
        solution = load_json(solution)
        self.data = []

        for key, value in challenges.items():
            for i in range(len(value['test'])):
                task_input = value['test'][i]['input']
                task_output = solution[key][i]
                example_input = [ex['input'] for ex in value['train']]
                example_output = [ex['output'] for ex in value['train']]
                
                # 데이터프레임으로 변환될 데이터를 리스트에 저장
                self.data.append({
                    'id': key,
                    'input': task_input,
                    'output': task_output,
                    'ex_input': example_input,
                    'ex_output': example_output
                })

        # 리스트를 데이터프레임으로 변환
        self.df = pd.DataFrame(self.data)
    
    def pad_to_30x30(self, tensor):
        if tensor.dim() == 2:
            tensor = tensor.unsqueeze(0)
        c, h, w = tensor.shape
        pad_h = max(0, 30 - h)
        pad_w = max(0, 30 - w)
        
        # 좌우 및 상하 패딩을 반반씩 나눠서 적용
        padding = (pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2)
        tensor = F.pad(tensor, padding, mode='constant', value=0)
        
        return tensor

    def augment_example_input(self, tensor):
        # 입력 데이터 증강 (아직 구현 필요)
        mapping = {
            1: random.randint(1, 10),
            2: random.randint(11, 20),
            3: random.randint(21, 30),
            4: random.randint(31, 40),
            5: random.randint(41, 50),
            6: random.randint(51, 60),
            7: random.randint(61, 70),
            8: random.randint(71, 80),
            9: random.randint(81, 90),
            10: random.randint(91, 100)
        }
        for k, v in mapping.items():
            tensor[tensor == k] = v
        return tensor
    
    def augment_example_output(self, tensor):
        # 출력 데이터 증강 (아직 구현 필요)
        return tensor

    def __getitem__(self, idx):
        task = self.df.iloc[idx]
        task_input = self.pad_to_30x30(torch.tensor(task['input'], dtype=torch.float32)+1)
        task_output = self.pad_to_30x30(torch.tensor(task['output'], dtype=torch.float32)+1)
        
        # 예제의 개수를 50개로 맞추기 위한 작업
        example_input = [self.pad_to_30x30(torch.tensor(ex, dtype=torch.float32)+1) for ex in task['ex_input']]
        example_output = [self.pad_to_30x30(torch.tensor(ex, dtype=torch.float32)+1) for ex in task['ex_output']]
        
        # 증강을 통해 예제의 수를 50개로 늘리기
        while len(example_input) < 50:
            index_to_duplicate = random.randint(0, len(example_input) - 1)
            augmented_input = self.augment_example_input(example_input[index_to_duplicate])
            augmented_output = self.augment_example_output(example_output[index_to_duplicate])
            example_input.append(augmented_input)
            example_output.append(augmented_output)
        
        # 예제의 개수가 50개를 초과하지 않도록 제한
        example_input = example_input[:50]
        example_output = example_output[:50]

        # 스택으로 텐서로 변환
        example_input = torch.stack(example_input).squeeze(1)
        example_output = torch.stack(example_output).squeeze(1)

        return task_input, task_output, example_input, example_output

    def __len__(self):
        return len(self.df)

# 사용 예제
train_challenge = './kaggle/input/arc-prize-2024/arc-agi_training_challenges.json'
train_solution = "./kaggle/input/arc-prize-2024/arc-agi_training_solutions.json"

train_dataset = ARC_Dataset(train_challenge, train_solution)
train_loader = DataLoader(train_dataset, batch_size=11, shuffle=True)

t_i, t_o, e_i, e_o = next(iter(train_loader))
print(t_i.shape, t_o.shape, e_i.shape, e_o.shape)


torch.Size([11, 1, 30, 30]) torch.Size([11, 1, 30, 30]) torch.Size([11, 50, 30, 30]) torch.Size([11, 50, 30, 30])


In [240]:
import torch
import random

tensor = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print("Original Tensor:")
print(tensor)

# 랜덤 매핑 생성
mapping = {
    1: random.randint(1, 10),
    2: random.randint(11, 20),
    3: random.randint(21, 30),
    4: random.randint(31, 40),
    5: random.randint(41, 50),
    6: random.randint(51, 60),
    7: random.randint(61, 70),
    8: random.randint(71, 80),
    9: random.randint(81, 90)
}

# 기존 값과 충돌을 방지하기 위한 임시 매핑 값 설정

temp_tensor = tensor.clone()
for k in mapping:
    temp_tensor[temp_tensor == k] = -k  # 임시로 기존 값에 음수를 취해 중복을 피함

# 최종 매핑 적용
for k, v in mapping.items():
    temp_tensor[temp_tensor == -k] = v

print("Augmented Tensor:")
print(temp_tensor)


Original Tensor:
tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])
Augmented Tensor:
tensor([[59, 19, 25],
        [35, 50, 59],
        [66, 71, 90]])
Augmented Tensor:
tensor([[59, 19, 25],
        [35, 50, 59],
        [66, 71, 90]])
Augmented Tensor:
tensor([[59, 19, 25],
        [35, 50, 59],
        [66, 71, 90]])
Augmented Tensor:
tensor([[59, 19, 25],
        [35, 50, 59],
        [66, 71, 90]])
Augmented Tensor:
tensor([[59, 19, 25],
        [35, 50, 59],
        [66, 71, 90]])
Augmented Tensor:
tensor([[59, 19, 25],
        [35, 50, 59],
        [66, 71, 90]])
Augmented Tensor:
tensor([[59, 19, 25],
        [35, 50, 59],
        [66, 71, 90]])
Augmented Tensor:
tensor([[59, 19, 25],
        [35, 50, 59],
        [66, 71, 90]])
Augmented Tensor:
tensor([[59, 19, 25],
        [35, 50, 59],
        [66, 71, 90]])
Augmented Tensor:
tensor([[59, 19, 25],
        [35, 50, 59],
        [66, 71, 90]])
Augmented Tensor:
tensor([[59, 19, 25],
        [35, 50, 59],
        [66, 

KeyboardInterrupt: 

In [163]:
import torch

def remove_padding(output):
    # 텐서가 2차원인지 3차원인지 확인
    if output.dim() == 2:
        # 2차원 텐서의 경우
        non_zero_indices = torch.nonzero(output)
        if non_zero_indices.nelement() == 0:
            return torch.tensor([])

        # 최소 및 최대 인덱스 계산 (행과 열)
        min_y, min_x = non_zero_indices[:, 0].min().item(), non_zero_indices[:, 1].min().item()
        max_y, max_x = non_zero_indices[:, 0].max().item(), non_zero_indices[:, 1].max().item()

        # 패딩이 제거된 부분을 자르기
        output_cleaned = output[min_y:max_y + 1, min_x:max_x + 1]
    
    else:
        raise ValueError("지원되지 않는 텐서 차원입니다. 2D 또는 3D 텐서만 지원됩니다.")
    
    return output_cleaned

# 사용 예제
asd = e_i[0][0]  # 3D 텐서일 것으로 예상
edd = remove_padding(asd)
print(edd.shape)  # 패딩이 제거된 실제 크기가 출력됨


torch.Size([6, 10])


|id|input|output|example_input|example_output|
|-|-|-|-|-|
1|[[1]]|[[2]]|[[],[],[]]|[[1],[2],[3]]|
1|[[1,2],[3,4],[5,6]]|[[2,3],[4,5],[6,7]]|[[],[],[]]|[[1],[2],[3]]|

In [70]:
import json
import pandas as pd
from torch.utils.data import Dataset, DataLoader

def load_json(file_path):
    with open(file_path) as f:
        data = json.load(f)
    return data

challenges = load_json(train_challenge)
solution = load_json(train_solution)

data = []

for key, value in challenges.items():
    for i in range(len(value['test'])):
        # 각 반복마다 새로운 입력과 출력을 저장
        task_input = value['test'][i]['input']
        task_output = solution[key][i]
        # 학습 데이터의 예제 입력 및 출력 수집
        example_input = [ex['input'] for ex in value['train']]
        example_output = [ex['output'] for ex in value['train']]
        # 데이터프레임으로 변환될 데이터를 리스트에 저장
        data.append({
            'id': key,
            'input': task_input,
            'output': task_output,
            'ex_input': example_input,
            'ex_output': example_output
        })
print(type(task_input),len(task_input))
t_i = torch.tensor(task_input,dtype=torch.float32)
print(type(task_output), len(task_output))
print(type(example_input),  len(example_input))
print(type(example_output),  len(example_output))
e_i = torch.tensor(example_input,dtype=torch.float32)

print(t_i.shape)

# 리스트를 데이터프레임으로 변환
df = pd.DataFrame(data)
df.tail()

<class 'list'> 24
<class 'list'> 5
<class 'list'> 3
<class 'list'> 3
torch.Size([24, 24])


Unnamed: 0,id,input,output,ex_input,ex_output
411,feca6190,"[[0, 6, 7, 8, 9]]","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...","[[[1, 0, 7, 0, 0]], [[0, 0, 2, 0, 0]], [[4, 0,...","[[[0, 0, 0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0,..."
412,ff28f65a,"[[0, 0, 0, 2, 2, 0], [2, 2, 0, 2, 2, 0], [2, 2...","[[1, 0, 1], [0, 1, 0], [0, 0, 0]]","[[[2, 2, 0, 0, 0], [2, 2, 0, 0, 0], [0, 0, 0, ...","[[[1, 0, 0], [0, 0, 0], [0, 0, 0]], [[1, 0, 1]..."
413,ff28f65a,"[[0, 0, 0, 0, 0, 0, 0], [2, 2, 0, 2, 2, 0, 0],...","[[1, 0, 1], [0, 1, 0], [1, 0, 0]]","[[[2, 2, 0, 0, 0], [2, 2, 0, 0, 0], [0, 0, 0, ...","[[[1, 0, 0], [0, 0, 0], [0, 0, 0]], [[1, 0, 1]..."
414,ff28f65a,"[[2, 2, 0, 2, 2, 0, 0], [2, 2, 0, 2, 2, 0, 0],...","[[1, 0, 1], [0, 1, 0], [1, 0, 1]]","[[[2, 2, 0, 0, 0], [2, 2, 0, 0, 0], [0, 0, 0, ...","[[[1, 0, 0], [0, 0, 0], [0, 0, 0]], [[1, 0, 1]..."
415,ff805c23,"[[4, 4, 4, 0, 4, 0, 0, 3, 3, 3, 0, 0, 0, 0, 3,...","[[8, 8, 8, 8, 8], [0, 0, 8, 8, 0], [0, 8, 0, 0...","[[[0, 3, 3, 3, 3, 0, 0, 2, 2, 2, 0, 0, 0, 0, 2...","[[[0, 3, 3, 3, 3], [0, 3, 3, 3, 3], [3, 0, 0, ..."


In [3]:
df.iloc[10]['ex_input'][0]

[[2, 0, 0, 5, 0, 6, 2, 5, 0, 0, 4],
 [0, 4, 3, 5, 4, 0, 8, 5, 3, 0, 6],
 [6, 0, 0, 5, 3, 0, 0, 5, 8, 0, 2],
 [5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
 [3, 8, 0, 5, 6, 2, 0, 5, 0, 4, 8],
 [0, 0, 4, 5, 0, 0, 4, 5, 6, 0, 0],
 [6, 2, 0, 5, 3, 8, 0, 5, 0, 3, 2],
 [5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
 [0, 3, 6, 5, 0, 2, 0, 5, 0, 6, 0],
 [2, 0, 0, 5, 4, 0, 8, 5, 0, 0, 8],
 [8, 0, 4, 5, 6, 3, 0, 5, 2, 3, 4]]