In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
import os
import random
import wandb
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset
from torchvision.datasets import ImageFolder
from torchvision import models

In [2]:
wandb.login(key="acdc26d2fc17a56e83ea3ae6c10e496128dee648")

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mviinod9[0m ([33mviinod9-iitm[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [5]:
class FlexibleCNN(nn.Module):
    def __init__(
        self,
        input_channels=3,
        conv_filters=[32, 64, 128, 256, 512],
        kernel_sizes=[3, 3, 3, 3, 3],
        activation_fn=F.relu,
        dense_neurons=256,
        dense_activation_fn=F.relu,
        dropout=0.0,
        batch_norm=False,
        num_classes=10
    ):
        super(FlexibleCNN, self).__init__()
        self.activation_fn = activation_fn
        self.dense_activation_fn = dense_activation_fn
        self.dropout = dropout
        self.batch_norm = batch_norm

        # Convolutional layers
        self.conv_layers = nn.ModuleList()
        in_channels = input_channels
        for out_channels, kernel_size in zip(conv_filters, kernel_sizes):
            layers = [nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=1)]
            if batch_norm:
                layers.append(nn.BatchNorm2d(out_channels))
            # Activation will be applied in forward pass for flexibility
            layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
            self.conv_layers.append(nn.Sequential(*layers))
            in_channels = out_channels

        # Dummy forward pass to calculate flatten size
        self._dummy_input = torch.zeros(1, input_channels, 224, 224)
        self.flattened_size = self._get_flattened_size()

        # Dense layers
        fc1_layers = []
        if dropout > 0:
            fc1_layers.append(nn.Dropout(dropout))
        fc1_layers.append(nn.Linear(self.flattened_size, dense_neurons))
        self.fc1 = nn.Sequential(*fc1_layers)

        self.fc2 = nn.Linear(dense_neurons, num_classes)

    def _get_flattened_size(self):
        x = self._dummy_input
        for block in self.conv_layers:
            for layer in block:
                if isinstance(layer, nn.Conv2d) or isinstance(layer, nn.BatchNorm2d):
                    x = layer(x)
                elif isinstance(layer, nn.MaxPool2d):
                    x = layer(x)
        return x.view(1, -1).size(1)

    def forward(self, x):
        for block in self.conv_layers:
            for layer in block:
                if isinstance(layer, nn.Conv2d):
                    x = layer(x)
                    x = self.activation_fn(x)  # Apply after conv
                else:
                    x = layer(x)  # Either BN or MaxPool
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.dense_activation_fn(x)
        x = self.fc2(x)
        return x


In [6]:
# Example instantiation with custom filters, kernels, and activations
model = FlexibleCNN(
    input_channels=3,
    conv_filters=[16, 32, 64, 128, 256],
    kernel_sizes=[3, 3, 3, 3, 3],
    activation_fn=F.relu,
    dense_neurons=128,
    dense_activation_fn=F.relu,
    dropout=0.3,
    batch_norm=True,
    num_classes=10
)
print(model)

FlexibleCNN(
  (conv_layers): ModuleList(
    (0): Sequential(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (1): Sequential(
      (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (2): Sequential(
      (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (3): Sequential(
      (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchN

In [4]:
# ------------- Activation map
activation_map = {
    "relu": nn.ReLU(),
    "gelu": nn.GELU(),
    "silu": nn.SiLU(),
    "mish": nn.Mish()
}

def get_activation_fn(name):
    return lambda x: activation_map[name.lower()](x)

# ------------- Dataset Loading and Stratified Split

def get_dataloaders(data_dir, batch_size, val_split=0.2, augment=False):
    # Set transforms
    transform_train = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ToTensor()
    ]) if augment else transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor()
    ])

    transform_val = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor()
    ])

    # Load dataset with training transform
    full_dataset = ImageFolder(os.path.join(data_dir, 'train'), transform=transform_train)

    # Collect indices for each class label
    label_to_indices = {}
    for idx, (_, label) in enumerate(full_dataset.samples):
        if label not in label_to_indices:
            label_to_indices[label] = []
        label_to_indices[label].append(idx)

    train_idx = []
    val_idx = []

    # Perform stratified split manually
    for label in label_to_indices:
        indices = label_to_indices[label]
        random.shuffle(indices)
        split = int(len(indices) * val_split)
        val_idx.extend(indices[:split])
        train_idx.extend(indices[split:])

    # Create train and val subsets
    train_data = Subset(full_dataset, train_idx)
    val_data = Subset(ImageFolder(os.path.join(data_dir, 'train'), transform=transform_val), val_idx)

    # Loaders
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False, num_workers=2)

    return train_loader, val_loader, len(full_dataset.classes)




# ------------- Training and Evaluation Functions

def train_one_epoch(model, optimizer, criterion, dataloader, device):
    model.train()
    running_loss, correct, total = 0, 0, 0
    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    return running_loss / len(dataloader), 100. * correct / total

def evaluate(model, criterion, dataloader, device):
    model.eval()
    loss, correct, total = 0, 0, 0
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss += criterion(outputs, labels).item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    return loss / len(dataloader), 100. * correct / total

# ------------- Train Loop for wandb Sweep

def train(config=None):
    with wandb.init(config=config):
        config = wandb.config


                # 🔽 Unique name for the run
        run_name = (
            f"filt-{config.base_filter}_{config.filter_organization}_"
            f"act-{config.activation_fn}_bn-{config.batch_norm}_"
            f"do-{config.dropout}_dense-{config.dense_neurons}_"
            f"bs-{config.batch_size}_lr-{config.lr}_aug-{config.data_augmentation}"
                    )
        wandb.run.name = run_name
        

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        # Load data
        train_loader, val_loader, num_classes = get_dataloaders(
            data_dir='/kaggle/input/inaturalist-dataset/inaturalist_12K',
            batch_size=config.batch_size,
            val_split=0.2,
            augment=config.data_augmentation
        )

        # Create model
        conv_filters = {
            'same': [config.base_filter]*5,
            'double': [config.base_filter*(2**i) for i in range(5)],
            'half': [config.base_filter//(2**i) for i in range(5)],
        }[config.filter_organization]


        model = FlexibleCNN(
            conv_filters=conv_filters,
            kernel_sizes=[3]*5,
            activation_fn=get_activation_fn(config.activation_fn),
            dense_neurons=config.dense_neurons,
            dense_activation_fn=F.relu,
            dropout=config.dropout,
            batch_norm=config.batch_norm,  # ✅ add this line!
            num_classes=num_classes
        )


        model.to(device)

        # Loss and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=config.lr)

        # Training loop
        for epoch in range(config.epochs):
            train_loss, train_acc = train_one_epoch(model, optimizer, criterion, train_loader, device)
            val_loss, val_acc = evaluate(model, criterion, val_loader, device)
            # Print in TensorFlow/Keras style
    
            print(f"Epoch {epoch + 1}/{config.epochs}")
            print(f" - train_acc: {train_acc:.4f} - val_acc: {val_acc:.4f}")
            
            wandb.log({
                "epoch": epoch + 1,
                "train_loss": train_loss,
                "train_acc": train_acc,
                "val_loss": val_loss,
                "val_acc": val_acc
            })

# ------------- Sweep Config

sweep_config = {
    'method': 'random',
    'metric': {'name': 'val_acc', 'goal': 'maximize'},
    'parameters': {
        'base_filter': {'values': [32, 64]},
        'filter_organization': {'values': ['same', 'double', 'half']},
        'activation_fn': {'values': ['relu', 'gelu', 'silu', 'mish']},
        'data_augmentation': {'values': [True, False]},
        'batch_norm': {'values': [True, False]},
        'dropout': {'values': [0.2, 0.3]},
        'dense_neurons': {'values': [128, 256]},
        'batch_size': {'values': [32, 64, 128]},
        'lr': {'values': [1e-3, 1e-4]},
        'epochs': {'values': [5, 7, 10, 15, 17, 20]}
    }
}

sweep_id = wandb.sweep(sweep_config, project='iNaturalist-CNN')
wandb.agent(sweep_id, function=train, count = 10)



# sweep_config = {
#     'method': 'random',
#     'metric': {'name': 'val_acc', 'goal': 'maximize'},
#     'parameters': {
#         'base_filter': {'values': [64]},
#         'filter_organization': {'values': ['double']},
#         'activation_fn': {'values': ['relu']},
#         'data_augmentation': {'values': [True]},
#         'batch_norm': {'values': [True]},
#         'dropout': {'values': [0.3]},
#         'dense_neurons': {'values': [128]},
#         'batch_size': {'values': [64]},
#         'lr': {'values': [1e-3]},
#         'epochs': {'values': [5]}
#     }
# }

# sweep_id = wandb.sweep(sweep_config, project='iNaturalist-CNN')
# wandb.agent(sweep_id, function=train, count = 1)


Create sweep with ID: rhf91n3b
Sweep URL: https://wandb.ai/viinod9-iitm/iNaturalist-CNN/sweeps/rhf91n3b


[34m[1mwandb[0m: Agent Starting Run: 3rvrrarz with config:
[34m[1mwandb[0m: 	activation_fn: gelu
[34m[1mwandb[0m: 	base_filter: 64
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	lr: 0.0001


Epoch 1/20
 - train_acc: 24.5125 - val_acc: 27.2136
Epoch 2/20
 - train_acc: 35.5125 - val_acc: 33.3667
Epoch 3/20
 - train_acc: 39.1875 - val_acc: 35.8679
Epoch 4/20
 - train_acc: 43.9750 - val_acc: 37.0685
Epoch 5/20
 - train_acc: 48.1125 - val_acc: 37.5188
Epoch 6/20
 - train_acc: 52.9750 - val_acc: 39.1696
Epoch 7/20
 - train_acc: 57.6125 - val_acc: 38.1691
Epoch 8/20
 - train_acc: 62.0625 - val_acc: 38.3692
Epoch 9/20
 - train_acc: 66.3875 - val_acc: 37.8689
Epoch 10/20
 - train_acc: 70.2875 - val_acc: 39.0195
Epoch 11/20
 - train_acc: 74.1375 - val_acc: 39.7199
Epoch 12/20
 - train_acc: 77.6250 - val_acc: 39.7199
Epoch 13/20
 - train_acc: 81.1875 - val_acc: 38.1191
Epoch 14/20
 - train_acc: 82.7125 - val_acc: 38.6193
Epoch 15/20
 - train_acc: 85.8500 - val_acc: 37.9690
Epoch 16/20
 - train_acc: 87.7000 - val_acc: 38.1691
Epoch 17/20
 - train_acc: 88.9125 - val_acc: 38.5693
Epoch 18/20
 - train_acc: 90.4750 - val_acc: 38.8194
Epoch 19/20
 - train_acc: 91.5500 - val_acc: 37.4187
Ep

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▂▃▃▃▄▄▅▅▆▆▆▇▇▇█████
train_loss,█▇▇▆▆▅▅▄▄▄▃▃▂▂▂▂▁▁▁▁
val_acc,▁▄▆▇▇█▇▇▇███▇▇▇▇▇▇▇█
val_loss,▅▃▂▂▁▁▁▂▂▂▂▃▄▅▆▆▆▇▇█

0,1
epoch,20.0
train_acc,92.2875
train_loss,0.29609
val_acc,39.21961
val_loss,2.24192


[34m[1mwandb[0m: Agent Starting Run: v29mxmos with config:
[34m[1mwandb[0m: 	activation_fn: relu
[34m[1mwandb[0m: 	base_filter: 32
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	epochs: 17
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	lr: 0.0001


Epoch 1/17
 - train_acc: 20.6750 - val_acc: 23.4617
Epoch 2/17
 - train_acc: 28.3500 - val_acc: 30.5153
Epoch 3/17
 - train_acc: 31.1875 - val_acc: 32.1661
Epoch 4/17
 - train_acc: 33.7500 - val_acc: 34.1171
Epoch 5/17
 - train_acc: 35.6625 - val_acc: 34.8674
Epoch 6/17
 - train_acc: 36.7625 - val_acc: 34.7674
Epoch 7/17
 - train_acc: 37.6625 - val_acc: 35.4677
Epoch 8/17
 - train_acc: 39.2375 - val_acc: 35.7679
Epoch 9/17
 - train_acc: 39.4625 - val_acc: 36.7684
Epoch 10/17
 - train_acc: 41.3750 - val_acc: 36.4682
Epoch 11/17
 - train_acc: 42.0000 - val_acc: 36.3682
Epoch 12/17
 - train_acc: 43.0500 - val_acc: 38.0190
Epoch 13/17
 - train_acc: 43.3125 - val_acc: 37.6188
Epoch 14/17
 - train_acc: 44.4500 - val_acc: 38.1691
Epoch 15/17
 - train_acc: 45.4375 - val_acc: 36.9685
Epoch 16/17
 - train_acc: 46.1000 - val_acc: 38.9695
Epoch 17/17
 - train_acc: 46.3125 - val_acc: 38.0690


0,1
epoch,▁▁▂▂▃▃▄▄▅▅▅▆▆▇▇██
train_acc,▁▃▄▅▅▅▆▆▆▇▇▇▇▇███
train_loss,█▆▅▅▄▄▄▃▃▃▂▂▂▂▁▁▁
val_acc,▁▄▅▆▆▆▆▇▇▇▇█▇█▇██
val_loss,█▅▄▄▃▃▂▂▂▂▂▂▁▁▁▁▁

0,1
epoch,17.0
train_acc,46.3125
train_loss,1.53621
val_acc,38.06903
val_loss,1.76564


[34m[1mwandb[0m: Agent Starting Run: jhkdq6ks with config:
[34m[1mwandb[0m: 	activation_fn: silu
[34m[1mwandb[0m: 	base_filter: 32
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 7
[34m[1mwandb[0m: 	filter_organization: double
[34m[1mwandb[0m: 	lr: 0.001


Epoch 1/7
 - train_acc: 20.9375 - val_acc: 22.1111
Epoch 2/7
 - train_acc: 28.5625 - val_acc: 27.8139
Epoch 3/7
 - train_acc: 32.0125 - val_acc: 30.7654
Epoch 4/7
 - train_acc: 35.9000 - val_acc: 30.6653
Epoch 5/7
 - train_acc: 38.9000 - val_acc: 31.2656
Epoch 6/7
 - train_acc: 42.0375 - val_acc: 34.7174
Epoch 7/7
 - train_acc: 46.3500 - val_acc: 32.7664


0,1
epoch,▁▂▃▅▆▇█
train_acc,▁▃▄▅▆▇█
train_loss,█▄▄▃▂▂▁
val_acc,▁▄▆▆▆█▇
val_loss,█▄▂▃▃▁▄

0,1
epoch,7.0
train_acc,46.35
train_loss,1.5309
val_acc,32.76638
val_loss,2.06289


[34m[1mwandb[0m: Agent Starting Run: 6vdsv8y0 with config:
[34m[1mwandb[0m: 	activation_fn: gelu
[34m[1mwandb[0m: 	base_filter: 32
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_organization: double
[34m[1mwandb[0m: 	lr: 0.001


Epoch 1/20
 - train_acc: 14.2875 - val_acc: 23.1616
Epoch 2/20
 - train_acc: 23.5875 - val_acc: 26.2631
Epoch 3/20
 - train_acc: 28.0375 - val_acc: 30.7654
Epoch 4/20
 - train_acc: 30.8500 - val_acc: 32.0160
Epoch 5/20
 - train_acc: 32.7500 - val_acc: 32.8164
Epoch 6/20
 - train_acc: 33.9375 - val_acc: 33.8169
Epoch 7/20
 - train_acc: 36.2750 - val_acc: 34.9675
Epoch 8/20
 - train_acc: 38.2875 - val_acc: 34.7674
Epoch 9/20
 - train_acc: 40.1375 - val_acc: 36.0180
Epoch 10/20
 - train_acc: 41.7750 - val_acc: 36.0180
Epoch 11/20
 - train_acc: 44.3875 - val_acc: 38.2691
Epoch 12/20
 - train_acc: 46.0375 - val_acc: 35.9180
Epoch 13/20
 - train_acc: 49.6250 - val_acc: 37.9690
Epoch 14/20
 - train_acc: 52.3375 - val_acc: 39.2696
Epoch 15/20
 - train_acc: 55.4125 - val_acc: 36.9685
Epoch 16/20
 - train_acc: 58.6875 - val_acc: 37.4687
Epoch 17/20
 - train_acc: 62.7625 - val_acc: 37.7689
Epoch 18/20
 - train_acc: 66.3875 - val_acc: 38.7694
Epoch 19/20
 - train_acc: 70.2000 - val_acc: 38.3692
Ep

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▂▃▃▃▃▄▄▄▄▅▅▅▆▆▆▇▇██
train_loss,█▇▇▇▆▆▆▆▅▅▅▅▄▄▃▃▂▂▁▁
val_acc,▁▂▄▅▅▆▆▆▇▇█▇▇█▇▇▇███
val_loss,▅▄▃▂▂▁▁▁▁▁▁▁▂▂▂▃▄▆▇█

0,1
epoch,20.0
train_acc,72.925
train_loss,0.79685
val_acc,38.91946
val_loss,2.36633


[34m[1mwandb[0m: Agent Starting Run: thtdd7hq with config:
[34m[1mwandb[0m: 	activation_fn: mish
[34m[1mwandb[0m: 	base_filter: 32
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	filter_organization: double
[34m[1mwandb[0m: 	lr: 0.001


Epoch 1/15
 - train_acc: 16.4125 - val_acc: 22.9115
Epoch 2/15
 - train_acc: 23.4000 - val_acc: 28.1141
Epoch 3/15
 - train_acc: 29.4125 - val_acc: 28.6143
Epoch 4/15
 - train_acc: 31.5875 - val_acc: 27.9140
Epoch 5/15
 - train_acc: 35.0500 - val_acc: 30.7654
Epoch 6/15
 - train_acc: 37.5375 - val_acc: 29.9150
Epoch 7/15
 - train_acc: 42.5750 - val_acc: 32.8164
Epoch 8/15
 - train_acc: 50.1375 - val_acc: 32.0660
Epoch 9/15
 - train_acc: 61.3125 - val_acc: 33.6668
Epoch 10/15
 - train_acc: 75.8625 - val_acc: 30.9655
Epoch 11/15
 - train_acc: 87.2750 - val_acc: 29.4647
Epoch 12/15
 - train_acc: 93.2625 - val_acc: 31.1656
Epoch 13/15
 - train_acc: 95.0375 - val_acc: 30.6153
Epoch 14/15
 - train_acc: 97.2375 - val_acc: 32.3162
Epoch 15/15
 - train_acc: 97.9875 - val_acc: 30.3152


0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train_acc,▁▂▂▂▃▃▃▄▅▆▇████
train_loss,██▇▇▇▇▆▅▄▃▂▁▁▁▁
val_acc,▁▄▅▄▆▆▇▇█▆▅▆▆▇▆
val_loss,▁▁▁▁▁▁▁▁▂▃▅▆▆▇█

0,1
epoch,15.0
train_acc,97.9875
train_loss,0.06509
val_acc,30.31516
val_loss,5.16532


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 0eyfdgzj with config:
[34m[1mwandb[0m: 	activation_fn: gelu
[34m[1mwandb[0m: 	base_filter: 32
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	lr: 0.001


Epoch 1/20
 - train_acc: 12.7750 - val_acc: 15.7079
Epoch 2/20
 - train_acc: 20.1375 - val_acc: 22.1111
Epoch 3/20
 - train_acc: 23.0375 - val_acc: 25.0125
Epoch 4/20
 - train_acc: 25.6750 - val_acc: 26.5633
Epoch 5/20
 - train_acc: 28.5375 - val_acc: 28.3642
Epoch 6/20
 - train_acc: 30.6750 - val_acc: 30.9155
Epoch 7/20
 - train_acc: 33.5750 - val_acc: 32.8664
Epoch 8/20
 - train_acc: 35.0625 - val_acc: 33.9670
Epoch 9/20
 - train_acc: 36.2250 - val_acc: 34.5673
Epoch 10/20
 - train_acc: 38.3875 - val_acc: 34.7174
Epoch 11/20
 - train_acc: 38.6375 - val_acc: 37.1686
Epoch 12/20
 - train_acc: 40.3625 - val_acc: 35.3677
Epoch 13/20
 - train_acc: 42.2000 - val_acc: 38.9695
Epoch 14/20
 - train_acc: 43.6375 - val_acc: 39.3697
Epoch 15/20
 - train_acc: 44.9625 - val_acc: 40.1201
Epoch 16/20
 - train_acc: 44.6875 - val_acc: 38.5693
Epoch 17/20
 - train_acc: 46.6500 - val_acc: 39.4697
Epoch 18/20
 - train_acc: 48.4250 - val_acc: 40.4702
Epoch 19/20
 - train_acc: 49.7625 - val_acc: 39.4197
Ep

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▂▃▃▄▄▅▅▅▆▆▆▆▇▇▇▇▇██
train_loss,█▇▇▆▆▅▅▅▄▄▄▃▃▃▃▂▂▂▁▁
val_acc,▁▃▄▄▅▅▆▆▆▆▇▇███▇████
val_loss,█▇▆▆▅▄▃▃▂▂▂▂▁▁▁▁▁▁▂▂

0,1
epoch,20.0
train_acc,51.7375
train_loss,1.38903
val_acc,38.76938
val_loss,1.80903


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: loygrwzb with config:
[34m[1mwandb[0m: 	activation_fn: silu
[34m[1mwandb[0m: 	base_filter: 64
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	filter_organization: half
[34m[1mwandb[0m: 	lr: 0.0001


Epoch 1/20
 - train_acc: 13.4875 - val_acc: 16.7584
Epoch 2/20
 - train_acc: 20.3625 - val_acc: 23.9120
Epoch 3/20
 - train_acc: 24.1500 - val_acc: 25.8129
Epoch 4/20
 - train_acc: 25.2625 - val_acc: 27.2636
Epoch 5/20
 - train_acc: 27.4000 - val_acc: 29.3647
Epoch 6/20
 - train_acc: 27.8875 - val_acc: 29.8149
Epoch 7/20
 - train_acc: 29.0750 - val_acc: 30.5153
Epoch 8/20
 - train_acc: 31.2625 - val_acc: 31.3157
Epoch 9/20
 - train_acc: 31.5625 - val_acc: 31.1656
Epoch 10/20
 - train_acc: 32.1875 - val_acc: 31.6658
Epoch 11/20
 - train_acc: 32.7000 - val_acc: 31.7659
Epoch 12/20
 - train_acc: 33.3875 - val_acc: 32.8664
Epoch 13/20
 - train_acc: 34.5500 - val_acc: 33.3667
Epoch 14/20
 - train_acc: 34.3875 - val_acc: 32.5663
Epoch 15/20
 - train_acc: 35.5750 - val_acc: 33.1666
Epoch 16/20
 - train_acc: 35.2125 - val_acc: 34.9175
Epoch 17/20
 - train_acc: 36.2125 - val_acc: 32.2161
Epoch 18/20
 - train_acc: 37.0125 - val_acc: 33.9170
Epoch 19/20
 - train_acc: 37.6875 - val_acc: 34.6173
Ep

0,1
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
train_acc,▁▃▄▄▅▅▅▆▆▆▆▇▇▇▇▇▇███
train_loss,█▇▆▅▅▄▄▄▃▃▃▃▂▂▂▂▁▁▁▁
val_acc,▁▄▄▅▆▆▆▇▇▇▇▇▇▇▇█▇███
val_loss,█▆▅▄▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁

0,1
epoch,20.0
train_acc,38.0125
train_loss,1.76952
val_acc,34.46723
val_loss,1.87033


[34m[1mwandb[0m: Agent Starting Run: qud7o5yt with config:
[34m[1mwandb[0m: 	activation_fn: mish
[34m[1mwandb[0m: 	base_filter: 32
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	filter_organization: half
[34m[1mwandb[0m: 	lr: 0.0001


Epoch 1/15
 - train_acc: 13.5750 - val_acc: 18.2091
Epoch 2/15
 - train_acc: 19.9375 - val_acc: 21.1606
Epoch 3/15
 - train_acc: 22.7000 - val_acc: 22.1111
Epoch 4/15
 - train_acc: 25.3125 - val_acc: 22.8614
Epoch 5/15
 - train_acc: 25.6750 - val_acc: 24.3122
Epoch 6/15
 - train_acc: 27.6125 - val_acc: 24.9625
Epoch 7/15
 - train_acc: 27.7875 - val_acc: 25.1626
Epoch 8/15
 - train_acc: 28.3125 - val_acc: 25.9630
Epoch 9/15
 - train_acc: 28.8750 - val_acc: 27.1136
Epoch 10/15
 - train_acc: 29.9125 - val_acc: 27.3637
Epoch 11/15
 - train_acc: 29.5000 - val_acc: 27.6638
Epoch 12/15
 - train_acc: 30.6000 - val_acc: 28.8144
Epoch 13/15
 - train_acc: 30.8500 - val_acc: 29.7649
Epoch 14/15
 - train_acc: 31.0625 - val_acc: 28.7644
Epoch 15/15
 - train_acc: 32.0000 - val_acc: 29.1646


0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train_acc,▁▃▄▅▆▆▆▇▇▇▇▇███
train_loss,█▆▅▄▄▃▃▃▂▂▂▂▁▁▁
val_acc,▁▃▃▄▅▅▅▆▆▇▇▇█▇█
val_loss,█▆▅▄▄▃▃▂▂▂▂▂▁▁▁

0,1
epoch,15.0
train_acc,32.0
train_loss,1.93201
val_acc,29.16458
val_loss,2.02178


[34m[1mwandb[0m: Agent Starting Run: t8rojjoe with config:
[34m[1mwandb[0m: 	activation_fn: silu
[34m[1mwandb[0m: 	base_filter: 64
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	data_augmentation: False
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	filter_organization: same
[34m[1mwandb[0m: 	lr: 0.0001


Epoch 1/5
 - train_acc: 26.6375 - val_acc: 28.3142
Epoch 2/5
 - train_acc: 36.3125 - val_acc: 32.8164
Epoch 3/5
 - train_acc: 40.9875 - val_acc: 34.9175
Epoch 4/5
 - train_acc: 45.3000 - val_acc: 35.9180
Epoch 5/5
 - train_acc: 49.4500 - val_acc: 36.7184


0,1
epoch,▁▃▅▆█
train_acc,▁▄▅▇█
train_loss,█▅▄▂▁
val_acc,▁▅▆▇█
val_loss,█▃▂▁▁

0,1
epoch,5.0
train_acc,49.45
train_loss,1.49929
val_acc,36.71836
val_loss,1.8236


[34m[1mwandb[0m: Agent Starting Run: g0p4uj7t with config:
[34m[1mwandb[0m: 	activation_fn: relu
[34m[1mwandb[0m: 	base_filter: 64
[34m[1mwandb[0m: 	batch_norm: True
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	data_augmentation: True
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 15
[34m[1mwandb[0m: 	filter_organization: double
[34m[1mwandb[0m: 	lr: 0.001


Epoch 1/15
 - train_acc: 11.6875 - val_acc: 12.8564
Epoch 2/15
 - train_acc: 12.7375 - val_acc: 10.8554
Epoch 3/15
 - train_acc: 13.2750 - val_acc: 15.4077
Epoch 4/15
 - train_acc: 13.6250 - val_acc: 14.7574
Epoch 5/15
 - train_acc: 14.2625 - val_acc: 13.9070
Epoch 6/15
 - train_acc: 15.2375 - val_acc: 14.6573
Epoch 7/15
 - train_acc: 15.5375 - val_acc: 15.0575
Epoch 8/15
 - train_acc: 15.3125 - val_acc: 15.7579
Epoch 9/15
 - train_acc: 15.7875 - val_acc: 15.3077
Epoch 10/15
 - train_acc: 15.7500 - val_acc: 16.3082
Epoch 11/15
 - train_acc: 16.3625 - val_acc: 15.1576
Epoch 12/15
 - train_acc: 16.7875 - val_acc: 15.1076
Epoch 13/15
 - train_acc: 16.8250 - val_acc: 15.9080
Epoch 14/15
 - train_acc: 17.1375 - val_acc: 17.8589
Epoch 15/15
 - train_acc: 17.2625 - val_acc: 16.2081


0,1
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█
train_acc,▁▂▃▃▄▅▆▆▆▆▇▇▇██
train_loss,█▂▂▂▂▂▂▂▂▂▁▁▁▁▁
val_acc,▃▁▆▅▄▅▅▆▅▆▅▅▆█▆
val_loss,▃█▂▂▃▂▂▃▂▁▂▂▁▁▁

0,1
epoch,15.0
train_acc,17.2625
train_loss,2.17865
val_acc,16.2081
val_loss,2.1991


In [None]:
# ------------- Activation map
activation_map = {
    "relu": nn.ReLU(),
    "gelu": nn.GELU(),
    "silu": nn.SiLU(),
    "mish": nn.Mish()
}

def get_activation_fn(name):
    return lambda x: activation_map[name.lower()](x)

# ------------- Dataset Loading and Stratified Split

def get_dataloaders(data_dir, batch_size, val_split=0.2, augment=False):
    # Set transforms
    transform_train = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ToTensor()
    ]) if augment else transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor()
    ])

    transform_val = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor()
    ])

    # Load dataset with training transform
    full_dataset = ImageFolder(os.path.join(data_dir, 'train'), transform=transform_train)

    # Collect indices for each class label
    label_to_indices = {}
    for idx, (_, label) in enumerate(full_dataset.samples):
        if label not in label_to_indices:
            label_to_indices[label] = []
        label_to_indices[label].append(idx)

    train_idx = []
    val_idx = []

    # Perform stratified split manually
    for label in label_to_indices:
        indices = label_to_indices[label]
        random.shuffle(indices)
        split = int(len(indices) * val_split)
        val_idx.extend(indices[:split])
        train_idx.extend(indices[split:])

    # Create train and val subsets
    train_data = Subset(full_dataset, train_idx)
    val_data = Subset(ImageFolder(os.path.join(data_dir, 'train'), transform=transform_val), val_idx)

    # Loaders
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False, num_workers=2)

    return train_loader, val_loader, len(full_dataset.classes)




# ------------- Training and Evaluation Functions

def train_one_epoch(model, optimizer, criterion, dataloader, device):
    model.train()
    running_loss, correct, total = 0, 0, 0
    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    return running_loss / len(dataloader), 100. * correct / total

def evaluate(model, criterion, dataloader, device):
    model.eval()
    loss, correct, total = 0, 0, 0
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss += criterion(outputs, labels).item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    return loss / len(dataloader), 100. * correct / total

# ------------- Train Loop for wandb Sweep

def train(config=None):
    with wandb.init(config=config):
        config = wandb.config


                # 🔽 Unique name for the run
        run_name = (
            f"filt-{config.base_filter}_{config.filter_organization}_"
            f"act-{config.activation_fn}_bn-{config.batch_norm}_"
            f"do-{config.dropout}_dense-{config.dense_neurons}_"
            f"bs-{config.batch_size}_lr-{config.lr}_aug-{config.data_augmentation}"
                    )
        wandb.run.name = run_name
        

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        # Load data
        train_loader, val_loader, num_classes = get_dataloaders(
            data_dir='/kaggle/input/inaturalist-dataset/inaturalist_12K',
            batch_size=config.batch_size,
            val_split=0.2,
            augment=config.data_augmentation
        )

        # Create model
        conv_filters = {
            'same': [config.base_filter]*5,
            'double': [config.base_filter*(2**i) for i in range(5)],
            'half': [config.base_filter//(2**i) for i in range(5)],
        }[config.filter_organization]


        model = FlexibleCNN(
            conv_filters=conv_filters,
            kernel_sizes=[3]*5,
            activation_fn=get_activation_fn(config.activation_fn),
            dense_neurons=config.dense_neurons,
            dense_activation_fn=F.relu,
            dropout=config.dropout,
            batch_norm=config.batch_norm,  # ✅ add this line!
            num_classes=num_classes
        )


        model.to(device)

        # Loss and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=config.lr)

        # Training loop
        for epoch in range(config.epochs):
            train_loss, train_acc = train_one_epoch(model, optimizer, criterion, train_loader, device)
            val_loss, val_acc = evaluate(model, criterion, val_loader, device)
            # Print in TensorFlow/Keras style
    
            print(f"Epoch : {epoch + 1}/{config.epochs}")
            print(f" -- Train Accuracy: {train_acc:.4f} -- Validation Accuracy: {val_acc:.4f}")
            
            wandb.log({
                "epoch": epoch + 1,
                "train_loss": train_loss,
                "train_acc": train_acc,
                "val_loss": val_loss,
                "val_acc": val_acc
            })

# ------------- Sweep Config

sweep_config = {
    'method': 'random',
    'metric': {'name': 'val_acc', 'goal': 'maximize'},
    'parameters': {
        'base_filter': {'values': [32, 64]},
        'filter_organization': {'values': ['same', 'double', 'half']},
        'activation_fn': {'values': ['relu', 'gelu', 'silu', 'mish']},
        'data_augmentation': {'values': [True, False]},
        'batch_norm': {'values': [True, False]},
        'dropout': {'values': [0.2, 0.3]},
        'dense_neurons': {'values': [128, 256]},
        'batch_size': {'values': [32, 64, 128]},
        'lr': {'values': [1e-3, 1e-4]},
        'epochs': {'values': [5, 10, 15, 20]}
    }
}

sweep_id = wandb.sweep(sweep_config, project='iNaturalist-CNN-PartA-RandomSearch')
wandb.agent(sweep_id, function=train, count = 50)



# sweep_config = {
#     'method': 'random',
#     'metric': {'name': 'val_acc', 'goal': 'maximize'},
#     'parameters': {
#         'base_filter': {'values': [64]},
#         'filter_organization': {'values': ['double']},
#         'activation_fn': {'values': ['relu']},
#         'data_augmentation': {'values': [True]},
#         'batch_norm': {'values': [True]},
#         'dropout': {'values': [0.3]},
#         'dense_neurons': {'values': [128]},
#         'batch_size': {'values': [64]},
#         'lr': {'values': [1e-3]},
#         'epochs': {'values': [5]}
#     }
# }

# sweep_id = wandb.sweep(sweep_config, project='iNaturalist-CNN')
# wandb.agent(sweep_id, function=train, count = 1)
