## Feature Extractor

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class ConvEmbedding(nn.Module):
    def __init__(self, in_channels=1, embed_dim=128):
        super(ConvEmbedding, self).__init__()
        self.conv = nn.Conv2d(in_channels, embed_dim, kernel_size=3, padding=1)
        self.flatten = nn.Flatten(2)  # Flatten the spatial dimensions (H, W)
    
    def forward(self, x):
        x = self.conv(x)  # Apply convolution
        x = self.flatten(x)  # Flatten to (batch_size, embed_dim, H*W)
        x = x.transpose(1, 2)  # Transpose to (batch_size, H*W, embed_dim)
        return x

class FeatureExtractor(nn.Module):
    def __init__(self, embed_dim=128, num_heads=4, seq_len=30*30, add_cls_token=True):
        super(FeatureExtractor, self).__init__()
        self.embedding = ConvEmbedding(embed_dim=embed_dim)
        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) if add_cls_token else None
        self.positional_encoding = nn.Parameter(torch.zeros(1, seq_len+1 if add_cls_token else seq_len, embed_dim))
        self.attention = nn.TransformerEncoderLayer(embed_dim, num_heads, dim_feedforward=embed_dim * 4)
        self.transformer_encoder = nn.TransformerEncoder(self.attention, num_layers=1)
    
    def forward(self, x):
        # x shape: (batch_size, in_channels, H, W)
        x = self.embedding(x)  # Convert to tokens
        
        if self.cls_token is not None:
            batch_size = x.size(0)
            cls_tokens = self.cls_token.expand(batch_size, -1, -1)  # (batch_size, 1, embed_dim)
            x = torch.cat((cls_tokens, x), dim=1)  # Prepend cls_token
        
        x = x + self.positional_encoding[:, :x.size(1), :]  # Add positional encoding
        x = self.transformer_encoder(x)  # Apply self-attention
        
        cls_feature = x[:, 0, :]  # Extract cls token feature
        token_features = x[:, 1:, :]  # Extract other token features
        
        return cls_feature, token_features

# Example usage:
input_tensor = torch.randn(1, 1, 30, 30)  # Batch size of 1, single channel, 30x30 input
model = FeatureExtractor()
cls_feature, token_features = model(input_tensor)

print(cls_feature.shape)  # Expected output: torch.Size([1, 128])
print(token_features.shape)  # Expected output: torch.Size([1, 900, 128])


torch.Size([1, 128])
torch.Size([1, 900, 128])




## Causal Inference - Cross Attention

In [2]:
import torch
import torch.nn as nn

class CrossAttention(nn.Module):
    def __init__(self, feature_dim=128, num_heads=4):
        super(CrossAttention, self).__init__()
        self.multihead_attn = nn.MultiheadAttention(feature_dim, num_heads)
        self.new_cls_token = nn.Parameter(torch.zeros(1, 1, feature_dim))  # New Cls token
    
    def forward(self, example_input_cls, example_output_cls):
        # example_input_cls, example_output_cls: Shape (batch_size, embed_dim)
        
        # Expand new_cls_token to match batch size
        batch_size = example_input_cls.size(0)
        query = self.new_cls_token.expand(batch_size, -1, -1)  # (batch_size, 1, embed_dim)
        
        # Reshape to (sequence_length, batch_size, embed_dim) as expected by MultiheadAttention
        query = query.permute(1, 0, 2)  # (1, batch_size, embed_dim)
        key = torch.stack([example_input_cls, example_output_cls], dim=0)  # (2, batch_size, embed_dim)
        value = torch.stack([example_input_cls, example_output_cls], dim=0)  # (2, batch_size, embed_dim)
        
        # Apply cross attention
        attn_output, _ = self.multihead_attn(query, key, value)
        
        # Return the output as the new Cls token, shape: (batch_size, embed_dim)
        return attn_output.squeeze(0)

# Example usage:
example_input_cls = torch.randn(1, 128)  # Example input Cls token
example_output_cls = torch.randn(1, 128)  # Example output Cls token

cross_attention_v1 = CrossAttention()
output_cls_v1 = cross_attention_v1(example_input_cls, example_output_cls)

print(output_cls_v1.shape)  # Expected output: torch.Size([1, 128])


torch.Size([1, 128])


## Causal Inference - Self Attention

In [3]:
import torch
import torch.nn as nn

class SelfAttentionWithThreeTokens(nn.Module):
    def __init__(self, feature_dim=128, num_heads=4):
        super(SelfAttentionWithThreeTokens, self).__init__()
        self.multihead_attn = nn.MultiheadAttention(feature_dim, num_heads)
        self.new_cls_token = nn.Parameter(torch.zeros(1, 1, feature_dim))  # New Cls token
    
    def forward(self, example_input_cls, example_output_cls):
        # example_input_cls, example_output_cls: Shape (batch_size, embed_dim)
        
        # Expand new_cls_token to match batch size
        batch_size = example_input_cls.size(0)
        new_cls_token_expanded = self.new_cls_token.expand(batch_size, -1, -1)  # (batch_size, 1, embed_dim)
        
        # Combine all Cls tokens: shape (3, batch_size, embed_dim)
        combined_cls_tokens = torch.cat([new_cls_token_expanded, 
                                         example_input_cls.unsqueeze(1), 
                                         example_output_cls.unsqueeze(1)], dim=1)
        
        # Apply self-attention
        combined_cls_tokens = combined_cls_tokens.permute(1, 0, 2)  # (seq_len, batch_size, embed_dim)
        attn_output, _ = self.multihead_attn(combined_cls_tokens, combined_cls_tokens, combined_cls_tokens)
        
        # Return the output as the new Cls token, shape: (batch_size, embed_dim)
        return attn_output[0]  # The first token corresponds to the new_cls_token

# Example usage:
example_input_cls = torch.randn(1, 128)  # Example input Cls token
example_output_cls = torch.randn(1, 128)  # Example output Cls token

self_attention_v2 = SelfAttentionWithThreeTokens()
output_cls_v2 = self_attention_v2(example_input_cls, example_output_cls)

print(output_cls_v2.shape)  # Expected output: torch.Size([1, 128])


torch.Size([1, 128])


In [4]:
import torch
import torch.nn as nn

class CombineModule(nn.Module):
    def __init__(self, feature_dim):
        super(CombineModule, self).__init__()
        
        # Self-Attention Layer for combining causals
        self.self_attention = nn.MultiheadAttention(embed_dim=feature_dim, num_heads=4)
        
        # Fully connected layer to produce the final causal representation
        self.fc = nn.Linear(feature_dim, feature_dim)
        
    def forward(self, causals):
        # causals: shape (num_causals, batch_size, feature_dim)
        
        # Apply self-attention to combine the causals
        # Permute causals to match expected input shape for MultiheadAttention
        causals = causals.permute(1, 0, 2)  # Shape: (batch_size, num_causals, feature_dim)
        
        attn_output, _ = self.self_attention(causals, causals, causals)
        
        # Mean pooling over the sequence dimension (num_causals)
        combined_causal = attn_output.mean(dim=1)  # Shape: (batch_size, feature_dim)
        
        # Pass through a fully connected layer to get the final causal representation
        final_causal = self.fc(combined_causal)  # Shape: (batch_size, feature_dim)
        
        return final_causal

# Example usage:
batch_size = 1
feature_dim = 128
num_causals = 5

# Assume causals is the output from the Causal Inference module
causals = torch.randn(num_causals, batch_size, feature_dim)

combine_module = CombineModule(feature_dim=feature_dim)
final_causal = combine_module(causals)

print(final_causal.shape)  # Expected output: torch.Size([batch_size, feature_dim])


torch.Size([1, 128])


In [5]:
import torch
import torch.nn as nn

class Head(nn.Module):
    def __init__(self, embed_dim=128, output_dim=1, seq_len=30*30):
        super(Head, self).__init__()
        # FC layers to transform features
        self.fc1 = nn.Linear(embed_dim, embed_dim)  # Project final_causal to match cls_token
        self.fc2 = nn.Linear(embed_dim * 2, embed_dim)  # Combine Cls and final_causal
        self.fc3 = nn.Linear(embed_dim + (seq_len * embed_dim), seq_len)  # Combine with flattened token features
        self.fc4 = nn.Linear(seq_len, output_dim * seq_len)  # Final output adjustment
        
        # Output reshape and upsample
        self.output_reshape = nn.Sequential(
            nn.Unflatten(1, (output_dim, int(seq_len ** 0.5), int(seq_len ** 0.5))),
            nn.Upsample(size=(30, 30), mode='bilinear', align_corners=True)
        )
    
    def forward(self, cls_token, token_features, final_causal):
        # cls_token: shape (batch_size, embed_dim)
        # token_features: shape (batch_size, seq_len, embed_dim)
        # final_causal: shape (batch_size, embed_dim)
        
        # Project final_causal to the same dimension as cls_token
        final_causal_proj = self.fc1(final_causal)  # (batch_size, embed_dim)
        
        # Combine cls_token and final_causal_proj
        cls_combined = torch.cat((cls_token, final_causal_proj), dim=-1)  # (batch_size, 2 * embed_dim)
        cls_combined = self.fc2(cls_combined)  # (batch_size, embed_dim)
        
        # Flatten token features to (batch_size, seq_len * embed_dim)
        token_features_flat = token_features.view(token_features.size(0), -1)  # (batch_size, seq_len * embed_dim)
        
        # Combine cls_combined with token_features_flat
        combined_features = torch.cat((cls_combined, token_features_flat), dim=-1)  # (batch_size, embed_dim + seq_len * embed_dim)
        x = self.fc3(combined_features)  # (batch_size, seq_len)
        x = self.fc4(x)  # (batch_size, output_dim * seq_len)
        
        # Reshape and upsample to get (batch_size, 1, 30, 30)
        output = self.output_reshape(x)  # Final output shape (batch_size, 1, 30, 30)
        
        return output

# Example usage:
cls_token = torch.randn(1, 128)  # Example Cls token
token_features = torch.randn(1, 30*30, 128)  # Example token features
final_causal = torch.randn(1, 128)  # Example Final Causal

head = Head()
output = head(cls_token, token_features, final_causal)

print(output.shape)  # Expected output: torch.Size([1, 1, 30, 30])


torch.Size([1, 1, 30, 30])


## Data

In [6]:
import json
import numpy as np
import pandas as pd

train_challenge = './kaggle/input/arc-prize-2024/arc-agi_training_challenges.json'
train_solution = "./kaggle/input/arc-prize-2024/arc-agi_training_solutions.json"

eval_challenge = './kaggle/input/arc-prize-2024/arc-agi_evaluation_challenges.json'
eval_solution = './kaggle/input/arc-prize-2024/arc-agi_evaluation_solutions.json'


def DataMaker(challenge_data, solution_data):
    
    # Loading the data that contains the "challenge"
    challenge = pd.read_json(challenge_data)
    
    # Loading the data that contains the "Solution"
    with open(solution_data) as json_data:
        solution = json.load(json_data) 
        
    # getting alll the id values present in the dataset
    all_ids = list(challenge.columns)
    
    # concatinating along the test the way it is done for the train part
    for i in all_ids:
        
        # Getting the value of each cell for challenge dataset
        substitute = challenge[f'{i}']['test'][0]
        
        # Creating a new "output" key value pair
        substitute['output'] = solution[f'{i}'][0] 
        
        # Changing the value to "input : []" and "output : []"
        # instead of "input : []"
        challenge[f'{i}']['test'] = substitute       
        
        
    return challenge

def InputOutputDataset(df):
    
    all_ids = list(df.columns)
    new_df = pd.DataFrame(columns= ['id','input','output','input_shape','output_shape'])
    for i in all_ids:
        size = len(df[i]['train'])
        for j in range(size) :
            ip = df[i]['train'][j]['input']
            op = df[i]['train'][j]['output']
            ip_shape = np.array(df[i]['train'][j]['input']).shape
            op_shape = np.array(df[i]['train'][j]['output']).shape
            temp_df = pd.DataFrame()
            temp_df['id'] = [f'{i}_train_{j}']
            temp_df['input'] = [ip]
            temp_df['output'] = [op]
            temp_df['input_shape'] = [ip_shape]
            temp_df['output_shape'] = [op_shape]

            new_df = new_df._append(temp_df,ignore_index = True)
    
        ip = df[i]['test']['input']
        op = df[i]['test']['output']
        ip_shape = np.array(df[i]['test']['input']).shape
        op_shape = np.array(df[i]['test']['output']).shape
        temp_df = pd.DataFrame()
        temp_df['id'] = [f'{i}_test']
        temp_df['input'] = [ip]
        temp_df['output'] = [op]
        temp_df['input_shape'] = [ip_shape]
        temp_df['output_shape'] = [op_shape]
        new_df = new_df._append(temp_df,ignore_index = True)

    return new_df

new_train_data = DataMaker(train_challenge,train_solution)
new_train_data = InputOutputDataset(new_train_data)

new_train_data.head()

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  challenge[f'{i}']['test'] = substitute


Unnamed: 0,id,input,output,input_shape,output_shape
0,007bbfb7_train_0,"[[0, 7, 7], [7, 7, 7], [0, 7, 7]]","[[0, 0, 0, 0, 7, 7, 0, 7, 7], [0, 0, 0, 7, 7, ...","(3, 3)","(9, 9)"
1,007bbfb7_train_1,"[[4, 0, 4], [0, 0, 0], [0, 4, 0]]","[[4, 0, 4, 0, 0, 0, 4, 0, 4], [0, 0, 0, 0, 0, ...","(3, 3)","(9, 9)"
2,007bbfb7_train_2,"[[0, 0, 0], [0, 0, 2], [2, 0, 2]]","[[0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, ...","(3, 3)","(9, 9)"
3,007bbfb7_train_3,"[[6, 6, 0], [6, 0, 0], [0, 6, 6]]","[[6, 6, 0, 6, 6, 0, 0, 0, 0], [6, 0, 0, 6, 0, ...","(3, 3)","(9, 9)"
4,007bbfb7_train_4,"[[2, 2, 2], [0, 0, 0], [0, 2, 2]]","[[2, 2, 2, 2, 2, 2, 2, 2, 2], [0, 0, 0, 0, 0, ...","(3, 3)","(9, 9)"


In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
#from bw_net import FeatureExtractor, CausalInference, CombineModule, Head

class BWNet(nn.Module):
    def __init__(self, feature_dim=128, num_examples=5):
        super(BWNet, self).__init__()
        self.feature_extractor = FeatureExtractor(embed_dim=feature_dim)
        self.causal_inference = SelfAttentionWithThreeTokens(feature_dim=feature_dim)
        self.combine_module = CombineModule(feature_dim=feature_dim)
        self.head = Head()

    def forward(self, input_tensor, example_input, example_output):
        # Pad inputs and example tensors to 30x30
        input_padded = F.pad(input_tensor, (0, 30 - input_tensor.size(3), 0, 30 - input_tensor.size(2)), mode='constant', value=0)
        example_input_padded = F.pad(example_input, (0, 30 - example_input.size(3), 0, 30 - example_input.size(2)), mode='constant', value=0)
        example_output_padded = F.pad(example_output, (0, 30 - example_output.size(3), 0, 30 - example_output.size(2)), mode='constant', value=0)
        
        # Feature extraction
        cls_feature, input_features = self.feature_extractor(input_padded)
        example_cls_feature, example_features = self.feature_extractor(example_input_padded)
        
        # Causal inference
        causals = self.causal_inference(example_features, example_output_padded)
        
        # Combine module
        final_causal = self.combine_module(causals)
        
        # Head
        output = self.head(cls_feature, input_features, final_causal)
        
        # Remove padding values for final output
        output = self.remove_padding(output)
        
        return output

    def remove_padding(self, output):
        # Assumes padding value is 0; modify if necessary
        mask = output != 0
        output_cleaned = output[mask].view(output.size(0), 1, -1)
        output_cleaned = F.interpolate(output_cleaned, size=(30, 30), mode='bilinear', align_corners=True)
        return output_cleaned

# Example usage:
input_tensor = torch.randn(1, 1, 20, 20)  # Example input tensor of size (1, 1, 20, 20)
example_input = torch.randn(1, 1, 20, 20)  # Example input tensor of size (1, 1, 20, 20)
example_output = torch.randn(1, 1, 20, 20)  # Example output tensor of size (1, 1, 20, 20)

model = BWNet(feature_dim=128)  # Set appropriate values for feature_dim and num_examples
output = model(input_tensor, example_input, example_output)

print(output.shape)  # Should output torch.Size([1, 1, 30, 30]) after processing




RuntimeError: The size of tensor a (1471) must match the size of tensor b (901) at non-singleton dimension 1

In [9]:
# Example usage:
input_tensor = torch.randn(1, 1, 20, 20)  # Example input tensor of size (1, 1, 20, 20)
example_input = torch.randn(1, 1, 20, 20)  # Example input tensor of size (1, 1, 20, 20)
example_output = torch.randn(1, 1, 20, 20)  # Example output tensor of size (1, 1, 20, 20)

input_padded = F.pad(input_tensor, (0, 30 - input_tensor.size(3), 0, 30 - input_tensor.size(2)), mode='constant', value=0)
example_input_padded = F.pad(example_input, (0, 30 - example_input.size(3), 0, 30 - example_input.size(2)), mode='constant', value=0)
example_output_padded = F.pad(example_output, (0, 30 - example_output.size(3), 0, 30 - example_output.size(2)), mode='constant', value=0)