In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
import random
import torch
import torch.nn as nn

# Define possible layer types
layer_types = ['Linear', 'Conv1d', 'Conv2d', 'LSTM', 'GRU']

def generate_layer(layer_type, last_output_shape=None):
    if layer_type == 'Linear':
        if last_output_shape is not None:
            in_features = last_output_shape[-1]
        else:
            in_features = random.randint(10, 100)
        out_features = random.randint(10, 100)
        return f"nn.Linear({in_features}, {out_features})", [out_features]

    if layer_type == 'Conv1d':
        if last_output_shape is not None and len(last_output_shape) == 2:
            in_channels = last_output_shape[0]
            length = last_output_shape[1]
        else:
            in_channels = random.randint(10, 100)
            length = random.randint(50, 100)
        out_channels = random.randint(1, 10)
        kernel_size = random.randint(1, 5)
        new_length = length - kernel_size + 1  # Simplified length calculation for example
        return f"nn.Conv1d({in_channels}, {out_channels}, kernel_size={kernel_size})", [out_channels, new_length]

    if layer_type == 'Conv2d':
        if last_output_shape is not None and len(last_output_shape) == 3:
            in_channels = last_output_shape[0]
            height = last_output_shape[1]
            width = last_output_shape[2]
        else:
            in_channels = random.randint(10, 100)
            height = width = random.randint(50, 100)
        out_channels = random.randint(1, 10)
        kernel_size = random.randint(1, 5)
        new_height = height - kernel_size + 1  # Simplified height calculation for example
        new_width = width - kernel_size + 1  # Simplified width calculation for example
        return f"nn.Conv2d({in_channels}, {out_channels}, kernel_size={kernel_size})", [out_channels, new_height, new_width]

    if layer_type in ['LSTM', 'GRU']:
        if last_output_shape is not None and len(last_output_shape) > 0:
            input_size = last_output_shape[-1]
            seq_len = last_output_shape[0]
        else:
            input_size = random.randint(10, 100)
            seq_len = random.randint(1, 10)
        hidden_size = random.randint(10, 100)
        return f"nn.{layer_type}({input_size}, {hidden_size})", [seq_len, hidden_size]

def generate_synthetic_data(num_samples):
    data = []
    for _ in range(num_samples):
        num_layers = random.randint(1, 5)
        architecture = []
        last_output_shape = None

        for _ in range(num_layers):
            layer_type = random.choice(layer_types)
            layer, output_shape = generate_layer(layer_type, last_output_shape)
            architecture.append(layer)
            last_output_shape = output_shape

        architecture_str = "\n".join(architecture)
        input_shape_str = f"[{', '.join(map(str, architecture[0].split('(')[1].split(')')[0].split(', ')))}]"
        output_shape_str = f"[{', '.join(map(str, last_output_shape))}]"
        description = f"Input shape: {input_shape_str}, Output shape: {output_shape_str}"
        data.append((architecture_str, description))

    return data

# Parameters
num_samples = 60000

# Generate synthetic data
synthetic_data = generate_synthetic_data(num_samples)

# Display generated synthetic data
for i in range(5):
    print(f"Network Architecture:\n{synthetic_data[i][0]}\nDescription: {synthetic_data[i][1]}\n")


Network Architecture:
nn.Linear(57, 16)
nn.Conv1d(19, 1, kernel_size=5)
nn.Linear(73, 67)
Description: Input shape: [57, 16], Output shape: [67]

Network Architecture:
nn.LSTM(47, 96)
nn.Conv2d(61, 9, kernel_size=1)
nn.LSTM(60, 100)
nn.LSTM(100, 82)
nn.LSTM(82, 43)
Description: Input shape: [47, 96], Output shape: [9, 43]

Network Architecture:
nn.LSTM(12, 49)
Description: Input shape: [12, 49], Output shape: [1, 49]

Network Architecture:
nn.GRU(42, 52)
nn.GRU(52, 49)
nn.GRU(49, 41)
nn.GRU(41, 69)
nn.GRU(69, 72)
Description: Input shape: [42, 52], Output shape: [6, 72]

Network Architecture:
nn.Conv2d(64, 1, kernel_size=1)
nn.Conv2d(1, 6, kernel_size=4)
nn.Conv2d(6, 8, kernel_size=3)
nn.Conv1d(78, 5, kernel_size=3)
Description: Input shape: [64, 1, kernel_size=1], Output shape: [5, 74]



In [2]:
import torch
import torch.nn as nn
from torchtext.vocab import build_vocab_from_iterator
from torchtext.data.utils import get_tokenizer
from torch.utils.data import DataLoader, Dataset

# Assuming synthetic_data and vocab are already defined from your previous steps

tokenizer = get_tokenizer('basic_english')

# Function to yield tokens from both architectures and descriptions
def yield_tokens(data):
    for architecture, description in data:
        yield tokenizer(architecture)
        yield tokenizer(description)

# Build vocabulary from both architectures and descriptions
vocab = build_vocab_from_iterator(yield_tokens(synthetic_data), specials=["<unk>", "<pad>", "<bos>", "<eos>"])
vocab.set_default_index(vocab["<unk>"])

# Function to process data into tokenized tensors
def process_data(data, vocab):
    processed_data = []
    for architecture, description in data:
        arch_tensor = torch.tensor([vocab["<bos>"]] + [vocab[token] for token in tokenizer(architecture)] + [vocab["<eos>"]], dtype=torch.long)
        desc_tensor = torch.tensor([vocab["<bos>"]] + [vocab[token] for token in tokenizer(description)] + [vocab["<eos>"]], dtype=torch.long)
        processed_data.append((arch_tensor, desc_tensor))
    return processed_data

# Split data into train, validation, and test sets
train_data = process_data(synthetic_data[:55000], vocab)
#valid_data = process_data(synthetic_data[8000:9000], vocab)
test_data = process_data(synthetic_data[56000:], vocab)

# # Creating DataLoaders
# train_loader = DataLoader(train_data, batch_size=1, shuffle=True)
# valid_loader = DataLoader(valid_data, batch_size=1, shuffle=False)
# test_loader = DataLoader(test_data, batch_size=1, shuffle=False)

# Reverse vocabulary for decoding
reverse_vocab = {index: token for token, index in vocab.get_stoi().items()}


In [3]:
for i in train_data[:3]:
    print(i[0].shape,i[1].shape)

torch.Size([28]) torch.Size([11])
torch.Size([44]) torch.Size([13])
torch.Size([10]) torch.Size([13])


In [4]:
len(vocab)

416

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

# Define the Encoder
class Encoder(nn.Module):
    def __init__(self, input_dim, emb_dim, hidden_dim):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, emb_dim)
        self.rnn = nn.RNN(emb_dim, hidden_dim, batch_first=True)
        
    def forward(self, src):
        embedded = self.embedding(src)
        outputs, hidden = self.rnn(embedded)
        return hidden

# Define the Decoder
class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hidden_dim):
        super().__init__()
        self.embedding = nn.Embedding(output_dim, emb_dim)
        self.rnn = nn.RNN(emb_dim, hidden_dim, batch_first=True)
        self.fc_out = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, input, hidden):
        input = input.unsqueeze(1)  # Add time dimension
        embedded = self.embedding(input)
        output, hidden = self.rnn(embedded, hidden)
        prediction = self.fc_out(output.squeeze(1))
        return prediction, hidden

# Define the Seq2Seq Model
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        
    def forward(self, src, trg, teacher_forcing_ratio=0.5):
        batch_size = trg.shape[0]
        trg_len = trg.shape[1]
        trg_vocab_size = self.decoder.fc_out.out_features
        
        outputs = torch.zeros(batch_size, trg_len, trg_vocab_size).to(trg.device)
        hidden = self.encoder(src)
        
        input = trg[:, 0]
        
        for t in range(1, trg_len):
            output, hidden = self.decoder(input, hidden)
            outputs[:, t, :] = output
            teacher_force = torch.rand(1).item() < teacher_forcing_ratio
            top1 = output.argmax(1)
            input = trg[:, t] if teacher_force else top1
        
        return outputs

# Custom Dataset
class TranslationDataset(Dataset):
    def __init__(self, data):
        self.data = data
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx]

# Dummy data
vocab_size = len(vocab)  # Example vocabulary size
input_dim = vocab_size
output_dim = vocab_size
emb_dim = 10
hidden_dim = 20

# Create encoder, decoder, and Seq2Seq model
encoder = Encoder(input_dim, emb_dim, hidden_dim)
decoder = Decoder(output_dim, emb_dim, hidden_dim)
model = Seq2Seq(encoder, decoder)

# Define optimizer and loss function
optimizer = optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss(ignore_index=0)  # Assuming 0 is the padding index

# Sample train data
# train_data = [
#     (torch.randint(1, vocab_size, (28,)), torch.randint(1, vocab_size, (15,))),
#     (torch.randint(1, vocab_size, (20,)), torch.randint(1, vocab_size, (13,))),
#     (torch.randint(1, vocab_size, (12,)), torch.randint(1, vocab_size, (17,)))
# ]

# Create DataLoader
batch_size = 800
train_dataset = TranslationDataset(train_data)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=lambda x: zip(*x))

# Training loop
for epoch in range(19):  # Number of epochs
    for batch in train_loader:
        src_batch, trg_batch = batch
        src_batch = nn.utils.rnn.pad_sequence(src_batch, padding_value=0).T
        trg_batch = nn.utils.rnn.pad_sequence(trg_batch, padding_value=0).T

        # Ensure trg is LongTensor
        trg_batch = trg_batch.long()

        optimizer.zero_grad()
        
        output = model(src_batch, trg_batch)
        
        # Reshape for loss calculation
        output_dim = output.shape[-1]
        output = output[:, 1:].reshape(-1, output_dim)
        trg_batch = trg_batch[:, 1:].reshape(-1)

        loss = criterion(output, trg_batch)
        
        loss.backward()
        optimizer.step()
        
    print(f'Epoch {epoch + 1}, Loss: {loss.item()}')

# Verifying output shape for one sample batch
src_batch, trg_batch = next(iter(train_loader))
src_batch = nn.utils.rnn.pad_sequence(src_batch, padding_value=0).T
trg_batch = nn.utils.rnn.pad_sequence(trg_batch, padding_value=0).T
output = model(src_batch, trg_batch)
print("Output shape:", output.shape)

Epoch 1, Loss: 4.515774726867676
Epoch 2, Loss: 3.352123975753784
Epoch 3, Loss: 3.1667118072509766
Epoch 4, Loss: 2.9304709434509277
Epoch 5, Loss: 2.7308504581451416
Epoch 6, Loss: 2.5927176475524902
Epoch 7, Loss: 2.2517282962799072
Epoch 8, Loss: 2.152487277984619
Epoch 9, Loss: 2.0454771518707275
Epoch 10, Loss: 2.007946491241455
Epoch 11, Loss: 1.9130191802978516
Epoch 12, Loss: 1.976943016052246
Epoch 13, Loss: 1.8415275812149048
Epoch 14, Loss: 1.7984631061553955
Epoch 15, Loss: 1.7742600440979004
Epoch 16, Loss: 1.7413744926452637
Epoch 17, Loss: 1.7583849430084229
Epoch 18, Loss: 1.6891485452651978
Epoch 19, Loss: 1.643355369567871
Output shape: torch.Size([800, 17, 416])


In [6]:
model

Seq2Seq(
  (encoder): Encoder(
    (embedding): Embedding(416, 10)
    (rnn): RNN(10, 20, batch_first=True)
  )
  (decoder): Decoder(
    (embedding): Embedding(416, 10)
    (rnn): RNN(10, 20, batch_first=True)
    (fc_out): Linear(in_features=20, out_features=416, bias=True)
  )
)

In [7]:
from sklearn.metrics import accuracy_score, f1_score
import numpy as np

# Sample test data, same format as train_data
# test_data = [
#     (torch.randint(1, vocab_size, (28,)), torch.randint(1, vocab_size, (15,))),
#     (torch.randint(1, vocab_size, (20,)), torch.randint(1, vocab_size, (13,))),
#     (torch.randint(1, vocab_size, (12,)), torch.randint(1, vocab_size, (17,)))
# ]

test_dataset = TranslationDataset(test_data)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=lambda x: zip(*x))



def evaluate(model, data_loader, criterion):
    model.eval()
    epoch_loss = 0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for batch in data_loader:
            src_batch, trg_batch = batch
            src_batch = nn.utils.rnn.pad_sequence(src_batch, padding_value=0).T
            trg_batch = nn.utils.rnn.pad_sequence(trg_batch, padding_value=0).T

            trg_batch = trg_batch.long()

            output = model(src_batch, trg_batch, teacher_forcing_ratio=0)
            
            output_dim = output.shape[-1]
            output = output[:, 1:].reshape(-1, output_dim)
            trg_batch = trg_batch[:, 1:].reshape(-1)

            loss = criterion(output, trg_batch)
            epoch_loss += loss.item()

            preds = output.argmax(1).cpu().numpy()
            labels = trg_batch.cpu().numpy()

            non_pad_elements = (labels != 0)
            all_preds.extend(preds[non_pad_elements])
            all_labels.extend(labels[non_pad_elements])

    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='weighted')
    
    return epoch_loss / len(data_loader), accuracy, f1

test_loss, test_accuracy, test_f1 = evaluate(model, test_loader, criterion)

print(f'Test Loss: {test_loss:.3f}')
print(f'Test Accuracy: {test_accuracy:.3f}')
print(f'Test F1 Score: {test_f1:.3f}')

Test Loss: 1.910
Test Accuracy: 0.547
Test F1 Score: 0.501


In [9]:
test_loader = DataLoader(test_data, batch_size=1, shuffle=False)

# Function to decode tensors back to strings
def decode_tensor(tensor, reverse_vocab):
    tokens = [reverse_vocab[idx.item()] for idx in tensor if idx.item() in reverse_vocab]
    return ' '.join(tokens)

# Evaluation loop with decoding
model.eval()
with torch.no_grad():
    for i, (src, trg) in enumerate(test_loader):
        if i >= 10:  # Limit to the top 10 batches
            break
        src, trg = src.cpu(), trg.cpu()  # Move tensors to CPU
        
        # Forward pass
        output = model(src, trg, 0)  # Turn off teacher forcing
        
        # Get the top predicted token from each timestep
        top_pred = output.argmax(2)
        
        # Decode tensors to strings
        decoded_src = decode_tensor(src[0], reverse_vocab)
        decoded_trg = decode_tensor(trg[0], reverse_vocab)
        decoded_pred = decode_tensor(top_pred[0], reverse_vocab)
        
        print(f"Source: {decoded_src}")
        print(f"Target: {decoded_trg}")
        print(f"Prediction: {decoded_pred}")
        print("="*50)


Source: <bos> nn . gru ( 57 , 85 ) nn . gru ( 85 , 11 ) nn . gru ( 11 , 44 ) nn . conv2d ( 22 , 9 , kernel_size=1 ) <eos>
Target: <bos> input shape [57 , 85] , output shape [9 , 66 , 66] <eos>
Prediction: <unk> input shape [40 , 4 , output shape output , output <eos> output <eos>
Source: <bos> nn . conv1d ( 88 , 9 , kernel_size=3 ) nn . gru ( 71 , 84 ) nn . linear ( 84 , 55 ) nn . lstm ( 55 , 75 ) nn . conv2d ( 70 , 1 , kernel_size=4 ) <eos>
Target: <bos> input shape [88 , 9 , kernel_size=3] , output shape [1 , 75 , 75] <eos>
Prediction: <unk> input shape [40 , 4 , output shape output , output <eos> output <eos> output <eos>
Source: <bos> nn . gru ( 15 , 82 ) nn . lstm ( 82 , 95 ) nn . linear ( 95 , 67 ) nn . conv1d ( 49 , 3 , kernel_size=4 ) nn . conv1d ( 3 , 10 , kernel_size=4 ) <eos>
Target: <bos> input shape [15 , 82] , output shape [10 , 64] <eos>
Prediction: <unk> input shape [40 , 4 , output shape output , output <eos>
Source: <bos> nn . conv1d ( 24 , 2 , kernel_size=4 ) <eos>
T