In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

def build_cnn(filter_list, kernel_dim, act_func, dense_units, class_count=10, input_depth=3):
    network_layers = []
    prev_channels = input_depth
    
    for num_filters in filter_list:
        network_layers.append(nn.Conv2d(prev_channels, num_filters, kernel_size=kernel_dim, padding=1))
        network_layers.append(select_activation(act_func))
        network_layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
        prev_channels = num_filters
    
    conv_net = nn.Sequential(*network_layers)
    flatten_layer = nn.Flatten()
    dense_layer1 = nn.Linear(filter_list[-1] * (64 // (2**5))**2, dense_units)  # Assuming input size 64x64
    dense_layer2 = nn.Linear(dense_units, class_count)
    
    return conv_net, flatten_layer, dense_layer1, dense_layer2

def select_activation(act_func):
    if act_func == 'relu':
        return nn.ReLU()
    elif act_func == 'sigmoid':
        return nn.Sigmoid()
    elif act_func == 'tanh':
        return nn.Tanh()
    else:
        raise ValueError("Unsupported activation function")

def run_forward(data, conv_net, flatten_layer, dense_layer1, dense_layer2, act_func):
    data = conv_net(data)
    data = flatten_layer(data)
    data = dense_layer1(data)
    data = select_activation(act_func)(data)
    data = dense_layer2(data)
    return data

# Example usage
conv_net, flatten_layer, dense_layer1, dense_layer2 = build_cnn(
    filter_list=[32, 64, 128, 256, 512],  # Filters per layer
    kernel_dim=3,  # Kernel size
    act_func='relu',  # Activation function
    dense_units=256,  # Number of neurons in dense layer
    class_count=10,  # 10 classes for classification
    input_depth=3  # RGB images
)

print(conv_net, dense_layer1, dense_layer2)

Sequential(
  (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU()
  (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (4): ReLU()
  (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (7): ReLU()
  (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (9): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (10): ReLU()
  (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (12): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU()
  (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
) Linear(in_features=2048, out_features=256, bias=True) Linear(in_features=256, out_features=10, bias=True)


In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import wandb
from torch.utils.data import DataLoader, random_split

import wandb
wandb.login()


def build_cnn(filter_list, kernel_dim, act_func, dense_units, dropout_rate, batch_norm, class_count=10, input_depth=3):
    network_layers = []
    prev_channels = input_depth
    
    for num_filters in filter_list:
        network_layers.append(nn.Conv2d(prev_channels, num_filters, kernel_size=kernel_dim, padding=1))
        if batch_norm:
            network_layers.append(nn.BatchNorm2d(num_filters))
        network_layers.append(select_activation(act_func))
        network_layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
        if dropout_rate > 0:
            network_layers.append(nn.Dropout(dropout_rate))
        prev_channels = num_filters
    
    conv_net = nn.Sequential(*network_layers)
    flatten_layer = nn.Flatten()
    dense_layer1 = nn.Linear(filter_list[-1] * (64 // (2**5))**2, dense_units)  # Assuming input size 64x64
    dense_layer2 = nn.Linear(dense_units, class_count)
    
    return conv_net, flatten_layer, dense_layer1, dense_layer2

def select_activation(act_func):
    activations = {
        'relu': nn.ReLU(),
        'gelu': nn.GELU(),
        'silu': nn.SiLU(),
        'mish': nn.Mish()
    }
    return activations.get(act_func, nn.ReLU())

def run_forward(data, conv_net, flatten_layer, dense_layer1, dense_layer2, act_func):
    data = conv_net(data)
    data = flatten_layer(data)
    data = dense_layer1(data)
    data = select_activation(act_func)(data)
    data = dense_layer2(data)
    return data

# Initialize wandb
wandb.init(project='cnn-hyperparam-sweep')

def train():
    config = wandb.config
    
    # Load dataset
    transform = transforms.Compose([
        transforms.RandomHorizontalFlip() if config.data_aug else transforms.ToTensor(),
        transforms.ToTensor()
    ])
    dataset = torchvision.datasets.ImageFolder(root='iNaturalist/train', transform=transform)
    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
    
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
    
    conv_net, flatten_layer, dense_layer1, dense_layer2 = build_cnn(
        filter_list=config.num_filters,
        kernel_dim=config.kernel_size,
        act_func=config.activation,
        dense_units=config.dense_neurons,
        dropout_rate=config.dropout,
        batch_norm=config.batch_norm
    )
    
    model = nn.Sequential(conv_net, flatten_layer, dense_layer1, nn.ReLU(), dense_layer2)
    optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)
    criterion = nn.CrossEntropyLoss()
    
    for epoch in range(10):
        model.train()
        for images, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        
        # Validation
        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for images, labels in val_loader:
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        accuracy = 100 * correct / total
        wandb.log({'val_accuracy': accuracy})
        
    return accuracy

# Hyperparameter sweep configuration
sweep_config = {
    'method': 'random',
    'metric': {'name': 'val_accuracy', 'goal': 'maximize'},
    'parameters': {
        'num_filters': {'values': [[32, 64, 128], [64, 128, 256]]},
        'kernel_size': {'values': [3, 5]},
        'activation': {'values': ['relu', 'gelu', 'silu', 'mish']},
        'dense_neurons': {'values': [128, 256]},
        'dropout': {'values': [0.2, 0.3]},
        'batch_norm': {'values': [True, False]},
        'data_aug': {'values': [True, False]},
        'learning_rate': {'values': [0.001, 0.0001]}
    }
}

sweep_id = wandb.sweep(sweep_config, project='cnn-hyperparam-sweep')
wandb.agent(sweep_id, train, count=2)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

KeyboardInterrupt: 

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import wandb
from torch.utils.data import DataLoader, random_split

def build_cnn(filter_list, kernel_dim, act_func, dense_units, dropout_rate, batch_norm, class_count=10, input_depth=3):
    network_layers = []
    prev_channels = input_depth
    
    for num_filters in filter_list:
        network_layers.append(nn.Conv2d(prev_channels, num_filters, kernel_size=kernel_dim, padding=1))
        if batch_norm:
            network_layers.append(nn.BatchNorm2d(num_filters))
        network_layers.append(select_activation(act_func))
        network_layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
        if dropout_rate > 0:
            network_layers.append(nn.Dropout(dropout_rate))
        prev_channels = num_filters
    
    conv_net = nn.Sequential(*network_layers)
    flatten_layer = nn.Flatten()
    dense_layer1 = nn.Linear(filter_list[-1] * (64 // (2**5))**2, dense_units)  # Assuming input size 64x64
    dense_layer2 = nn.Linear(dense_units, class_count)
    
    return conv_net, flatten_layer, dense_layer1, dense_layer2

def select_activation(act_func):
    activations = {
        'relu': nn.ReLU(),
        'gelu': nn.GELU(),
        'silu': nn.SiLU(),
        'mish': nn.Mish()
    }
    return activations.get(act_func, nn.ReLU())

def train():
    wandb.init(project='cnn-hyperparam-sweep')
    config = wandb.config
    
    # Load dataset
    transform_list = [transforms.ToTensor()]
    if config.data_aug:
        transform_list.insert(0, transforms.RandomHorizontalFlip())
    transform = transforms.Compose(transform_list)
    
    dataset = torchvision.datasets.ImageFolder(root='iNaturalist/train', transform=transform)
    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
    
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
    
    conv_net, flatten_layer, dense_layer1, dense_layer2 = build_cnn(
        filter_list=config.num_filters,
        kernel_dim=config.kernel_size,
        act_func=config.activation,
        dense_units=config.dense_neurons,
        dropout_rate=config.dropout,
        batch_norm=config.batch_norm
    )
    
    model = nn.Sequential(conv_net, flatten_layer, dense_layer1, select_activation(config.activation), dense_layer2)
    optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)
    criterion = nn.CrossEntropyLoss()
    
    for epoch in range(10):
        model.train()
        for images, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        
        # Validation
        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for images, labels in val_loader:
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        accuracy = 100 * correct / total
        wandb.log({'val_accuracy': accuracy})
    
    wandb.finish()
    return accuracy

# Hyperparameter sweep configuration
sweep_config = {
    'method': 'random',
    'metric': {'name': 'val_accuracy', 'goal': 'maximize'},
    'parameters': {
        'num_filters': {'values': [[32, 64, 128], [64, 128, 256]]},
        'kernel_size': {'values': [3, 5]},
        'activation': {'values': ['relu', 'gelu', 'silu', 'mish']},
        'dense_neurons': {'values': [128, 256]},
        'dropout': {'values': [0.2, 0.3]},
        'batch_norm': {'values': [True, False]},
        'data_aug': {'values': [True, False]},
        'learning_rate': {'values': [0.001, 0.0001]}
    }
}

sweep_id = wandb.sweep(sweep_config, project='cnn-hyperparam-sweep')
wandb.agent(sweep_id, train, count=20)
