In [36]:
import random
from typing import Dict, Any

In [37]:
CONV_BLOCKS_RANGE = (1, 4)
CONV_LAYERS_PER_CONV_BLOCK_RANGE = (1, 3)
LINEAR_BLOCKS_RANGE = (1, 2)
LINEAR_LAYERS_PER_LINEAR_BLOCK_RANGE = (1, 2)
CONV_OUTPUT_CHANNELS = [4, 8, 16, 32, 64, 128, 256, 512]
CONV_KERNEL_SIZES = [1, 2, 3, 4, 5, 6]
POOL_KERNEL_SIZES = [1, 2, 3]
STRIDES = [1, 1, 2]
PADDINGS = [0, 1, 2]
ACTIVATIONS = ["relu", "leaky_relu", "gelu", "tanh", "sigmoid", None]
POOLINGS = ["max", "avg", None]
DROPOUT_PROBS = [0.0, 0.05, 0.1, 0.15, 0.2, 0.25]
LINEAR_UNITS = [32, 64, 128, 256, 512]
TRANSITIONS_OPTIONS = ["flatten", "global_avg_pool", "global_max_pool"]

def create_random_genome() -> Dict[str, Any]:
    genome = {
        "conv_blocks": [],
        "transition_block": [],
        "linear_blocks": []
    }

    NUM_CONV_BLOCKS = random.choice(range(CONV_BLOCKS_RANGE[0], CONV_BLOCKS_RANGE[1] + 1))
    for _ in range(NUM_CONV_BLOCKS):
        conv_layers_for_block = random.choice(range(CONV_LAYERS_PER_CONV_BLOCK_RANGE[0], CONV_LAYERS_PER_CONV_BLOCK_RANGE[1] + 1))
        for _ in range(conv_layers_for_block):
            conv_layer = {
                "filters": random.choice(CONV_OUTPUT_CHANNELS),
                "kernel_size": random.choice(CONV_KERNEL_SIZES),
                "stride": random.choice(STRIDES),
                "padding": random.choice(PADDINGS),
                "batch_norm": random.choice([True, False]),
                "activation": random.choice(ACTIVATIONS),
                "dropout": random.choice(DROPOUT_PROBS)
            } 
            pooling_type = random.choice(POOLINGS)
            if pooling_type is None:
                conv_layer.update(
                    {
                        "pool": {}
                    }
                )
            else:
                conv_layer.update({
                    "pool": {
                        "type": pooling_type,
                        "kernel_size": random.choice(POOL_KERNEL_SIZES),
                        "strides": random.choice(STRIDES)
                    }
                })
            genome["conv_blocks"].append(conv_layer)
        
        genome["transition_block"] = random.choice(TRANSITIONS_OPTIONS)

    NUM_LINEAR_BLOCKS = random.choice(range(LINEAR_BLOCKS_RANGE[0], LINEAR_BLOCKS_RANGE[1] + 1))
    for _ in range(NUM_LINEAR_BLOCKS):
        linear_layer = {
            "units": random.choice(LINEAR_UNITS),
            "norm": random.choice([True, False]),
            "activation": random.choice(ACTIVATIONS),
            "dropout": random.choice(DROPOUT_PROBS)
        }
        genome["linear_blocks"].append(linear_layer)
    
    return genome

In [38]:
create_random_genome()

{'conv_blocks': [{'filters': 16,
   'kernel_size': 2,
   'stride': 1,
   'padding': 0,
   'batch_norm': False,
   'activation': 'sigmoid',
   'dropout': 0.15,
   'pool': {}}],
 'transition_block': 'global_avg_pool',
 'linear_blocks': [{'units': 512,
   'norm': False,
   'activation': 'sigmoid',
   'dropout': 0.1},
  {'units': 32, 'norm': True, 'activation': 'sigmoid', 'dropout': 0.15}]}

In [44]:
import torch
import torch.nn as nn
from typing import Dict, Optional, Union

# Mappings para activaciones y pooling
def get_activation(name: Optional[str]) -> nn.Module:
    return {
        "relu": nn.ReLU(),
        "leaky_relu": nn.LeakyReLU(),
        "gelu": nn.GELU(),
        "tanh": nn.Tanh(),
        "sigmoid": nn.Sigmoid()
    }[name]

def get_pooling(pool: Dict[str, Union[str, int]]) -> nn.Module:
    pool_type = pool["type"]
    kernel_size = pool["kernel_size"]
    strides = pool["strides"]
    if pool_type == "max":
        return nn.MaxPool2d(kernel_size=kernel_size, stride=strides)
    elif pool_type == "avg":
        return nn.AvgPool2d(kernel_size=kernel_size, stride=strides)

class GenomeNet(nn.Module):
    def __init__(self, genome: Dict[str, Any], input_channels: int, num_classes: int):
        super().__init__()

        self.genome = genome
        self.input_channels = input_channels
        self.num_classes = num_classes
        self.conv = self._build_conv_layers()
        self.transition = self._build_transition()
        self.linear = None

    def _build_conv_layers(self) -> nn.Sequential:
        layers = []
        in_ch = self.input_channels
        for cfg in self.genome["conv_blocks"]:
            layers.append(nn.Conv2d(
                in_channels=in_ch,
                out_channels=cfg["filters"],
                kernel_size=cfg["kernel_size"],
                stride=cfg["stride"],
                padding=cfg["padding"]
            ))
            if cfg["batch_norm"]:
                layers.append(nn.BatchNorm2d(cfg["filters"]))
            if cfg["activation"] != None:
                layers.append(get_activation(cfg["activation"]))
            if cfg["pool"] != {}:
                layers.append(get_pooling(cfg["pool"]))
            if cfg.get("dropout", 0.0) > 0:
                layers.append(nn.Dropout2d(cfg["dropout"]))
            in_ch = cfg["filters"]
        return nn.Sequential(*layers)

    def _build_transition(self) -> nn.Module:
        transition = self.genome["transition_block"]
        if transition == "flatten":
            return nn.Flatten()
        elif transition == "global_avg_pool":
            return nn.AdaptiveAvgPool2d((1, 1))
        elif transition == "global_max_pool":
            return nn.AdaptiveMaxPool2d((1, 1))

    def _build_linear_layers(self, input_dim: int) -> nn.Sequential:
        layers = []
        in_f = input_dim
        for cfg in self.genome["linear_blocks"]:
            layers.append(nn.Linear(in_f, cfg["units"]))
            if cfg["norm"]:
                layers.append(nn.LayerNorm(cfg["units"]))
            if cfg["activation"] != None:
                layers.append(get_activation(cfg["activation"]))
            if cfg.get("dropout", 0.0) > 0:
                layers.append(nn.Dropout(cfg["dropout"]))
            in_f = cfg["units"]
        layers.append(nn.Linear(in_f, self.num_classes))
        return nn.Sequential(*layers)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.conv(x)
        x = self.transition(x)

        if isinstance(self.transition, (nn.AdaptiveAvgPool2d, nn.AdaptiveMaxPool2d)):
            x = torch.flatten(x, 1)

        if self.linear is None:
            self.linear = self._build_linear_layers(x.shape[1])
        x = self.linear(x)
        return x


In [46]:
x = torch.randn(1, 3, 64, 64) # Batch size de 2
model = GenomeNet(create_random_genome(), input_channels=x.shape[1], num_classes=10)
model.eval()
out = model(x)

In [47]:
model

GenomeNet(
  (conv): Sequential(
    (0): Conv2d(3, 8, kernel_size=(5, 5), stride=(2, 2), padding=(1, 1))
    (1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): GELU(approximate='none')
    (3): AvgPool2d(kernel_size=2, stride=1, padding=0)
    (4): Dropout2d(p=0.05, inplace=False)
    (5): Conv2d(8, 32, kernel_size=(6, 6), stride=(1, 1), padding=(1, 1))
    (6): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): GELU(approximate='none')
    (8): AvgPool2d(kernel_size=2, stride=1, padding=0)
    (9): Dropout2d(p=0.25, inplace=False)
    (10): Conv2d(32, 4, kernel_size=(3, 3), stride=(1, 1), padding=(2, 2))
    (11): LeakyReLU(negative_slope=0.01)
    (12): Dropout2d(p=0.15, inplace=False)
  )
  (transition): AdaptiveMaxPool2d(output_size=(1, 1))
  (linear): Sequential(
    (0): Linear(in_features=4, out_features=256, bias=True)
    (1): Tanh()
    (2): Linear(in_features=256, out_features=64, bias=True)
   

In [17]:
def count_params(model: torch.nn.Module) -> int:
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

# Example usage:
num_params = count_params(model) / 1_000
print(f"Trainable parameters: {num_params}")

Trainable parameters: 650.306


In [19]:
def get_model_size_mb(model: torch.nn.Module) -> float:
    param_size = 0
    for param in model.parameters():
        param_size += param.numel() * param.element_size()  # bytes

    buffer_size = 0
    for buffer in model.buffers():
        buffer_size += buffer.numel() * buffer.element_size()

    size_all = param_size + buffer_size  # total bytes
    size_mb = size_all / (1024 ** 2)  # convert bytes to MB
    return size_mb

# Usage:
model_size = get_model_size_mb(model)
print(f"Model size: {model_size:.2f} MB")

Model size: 2.48 MB
