In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, SubsetRandomSampler
!pip install wandb
import wandb
wandb.init(entity="vishnukundhan333", project="DL Assignment-2")
# Q1
# Define a CNN model class with 5 convolutional layers, activation, and max-pooling functions between layers
class CNN(nn.Module):
    def __init__(self, filters, denseLayer, activation, inputShape, outputShape, dropout, learningRate, batchNorm, weightDecay):
        super(CNN, self).__init__()
        # Define convolutional layers
        self.conv1 = nn.Conv2d(inputShape[0], filters[0], kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(filters[0], filters[1], kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(filters[1], filters[2], kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(filters[2], filters[3], kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(filters[3], filters[4], kernel_size=3, padding=1)
        # Max-pooling layer
        self.pool = nn.MaxPool2d(2, 2)
        # Fully connected layers
        self.fc1 = nn.Linear(4*4*filters[4], denseLayer)
        self.fc2 = nn.Linear(denseLayer, outputShape)
        # Activation function
        self.activation = getattr(nn, activation)()
        self.dropout = nn.Dropout(dropout)
        # Batch normalization
        self.batch_norm = nn.BatchNorm2d(filters[0]) if batchNorm else None
        self.weight_decay = weightDecay
        self.learning_rate = learningRate
        self.y = filters[4]
    # Forward function to propagate input data through the layers of the network
    def forward(self, x):
        x = self.pool(self.activation(self.conv1(x)))
        x = self.pool(self.activation(self.conv2(x)))
        x = self.pool(self.activation(self.conv3(x)))
        x = self.pool(self.activation(self.conv4(x)))
        x = self.pool(self.activation(self.conv5(x)))
        x = x.view(-1, self.y*4*4)  
        x = self.dropout(self.activation(self.fc1(x)))
        x = self.fc2(x)
        return x

# Data preprocessing: resizing and converting to tensors
transform = transforms.Compose([transforms.Resize((128, 128)), transforms.ToTensor()])

# Splitting dataset into training and validation sets (80%, 20%)
trainDataset = datasets.ImageFolder("/kaggle/input/inaturalist12k/Data/inaturalist_12K/train/", transform=transform)
trainIndices, valIndices = train_test_split(list(range(len(trainDataset))), test_size=0.2, random_state=42)
trainSampler, valSampler = SubsetRandomSampler(trainIndices), SubsetRandomSampler(valIndices)

# Function to train the CNN model
def trainNetwork():
    with wandb.init() as run:
        config = wandb.config
        trainLoader = DataLoader(trainDataset, batch_size=config.batchSize, sampler=trainSampler)
        valLoader = DataLoader(trainDataset, batch_size=config.batchSize, sampler=valSampler)
        # Initialize CNN model
        model = CNN(
            config.filters,
            config.denseLayer,
            config.activation,
            (3, 128, 128),
            10,
            config.dropout,
            config.learningRate,
            config.batchNorm,
            config.weightDecay
        )
        # Define loss criterion and optimizer
        lossCriteria = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=config.learningRate, weight_decay=config.weightDecay)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model.to(device)
        # Training loop
        for i in range(config.epochs):
            model.train()
            tempLoss = 0.0
            true = 0
            total = 0
            for inputs, labels in trainLoader:
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = lossCriteria(outputs, labels)
                loss.backward()
                optimizer.step()
                tempLoss += loss.item()
                _, predicted = outputs.max(1)
                total += labels.size(0)
                true += predicted.eq(labels).sum().item()

            #Calculating validation loss and validation accuracy
            trainLoss = tempLoss/len(trainLoader)
            trainAccuracy = 100*(true/total)
            valLoss, valAccuracy = validateNetwork(valLoader, model)

            #wandb log
            wandb.log({"train loss": trainLoss, "train accuracy": trainAccuracy, "val loss": valLoss, "val accuracy": valAccuracy,"epoch": i+1})

# Validation function
def validateNetwork(valLoader,model):
    model.eval()
    valLoss = 0.0
    true = 0
    total = 0
    lossCriteria = nn.CrossEntropyLoss()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    with torch.no_grad():
        for inputs, labels in valLoader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            loss = lossCriteria(outputs, labels)
            valLoss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            true += predicted.eq(labels).sum().item()

    valLoss /= len(valLoader)
    valAccuracy = 100*(true/total)
    return valLoss, valAccuracy

# Sweep configuration
sweepConfig = {
    'method': 'bayes',
    'metric': {'name': 'Validation Accuracy','goal': 'maximize'},
    'parameters': {
        'weightDecay': {'values': [0, 0.0001, 0.0005]},
        'dropout': {'values': [0, 0.1, 0.2]},
        'learningRate': {'values': [0.0001, 0.00001]},
        'activation': {'values': ['ReLU', 'ELU', 'SELU']},
        'batchNorm': {'values': [True, False]},
        'filters': {
            'values': [[32, 32, 32, 32, 32], 
                       [128, 64, 64, 32, 32], 
                       [32, 64, 128, 256, 512]]
        },
        'dataAug': {'values': [True, False]},
        'batchSize': {'values': [32, 64]},
        'denseLayer': {'values': [64, 128, 256, 512]},
        'epochs': {'values': [5,10]}
    }
}

# Initialize sweep
sweep_id = wandb.sweep(sweepConfig, project="DL Assignment-2", entity="vishnukundhan333")

# Run the sweep
wandb.agent(sweep_id, trainNetwork, project="DL Assignment-2", entity="vishnukundhan333")

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, SubsetRandomSampler
!pip install wandb
import wandb
wandb.init(entity="vishnukundhan333", project="DL Assignment-2")

# Define a CNN model class with 5 convolutional layers, activation, and max-pooling functions between layers
class CNN(nn.Module):
    def __init__(self, filters, denseLayer, activation, inputShape, outputShape, dropout, learningRate, batchNorm, weightDecay):
        super(CNN, self).__init__()
        # Define convolutional layers
        self.conv1 = nn.Conv2d(inputShape[0], filters[0], kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(filters[0], filters[1], kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(filters[1], filters[2], kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(filters[2], filters[3], kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(filters[3], filters[4], kernel_size=3, padding=1)
        # Max-pooling layer
        self.pool = nn.MaxPool2d(2, 2)
        # Fully connected layers
        self.fc1 = nn.Linear(4*4*filters[4], denseLayer)
        self.fc2 = nn.Linear(denseLayer, outputShape)
        # Activation function
        self.activation = getattr(nn, activation)()
        # Dropout layer
        self.dropout = nn.Dropout(dropout)
        # Batch normalization
        self.batch_norm = nn.BatchNorm2d(filters[0]) if batchNorm else None
        self.weight_decay = weightDecay
        self.learning_rate = learningRate
        self.y = filters[4]

    # Forward function to propagate input data through the layers of the network
    def forward(self, x):
        x = self.pool(self.activation(self.conv1(x)))
        x = self.pool(self.activation(self.conv2(x)))
        x = self.pool(self.activation(self.conv3(x)))
        x = self.pool(self.activation(self.conv4(x)))
        x = self.pool(self.activation(self.conv5(x)))
        x = x.view(-1, self.y*4*4)  
        x = self.dropout(self.activation(self.fc1(x)))
        x = self.fc2(x)
        return x

# Data preprocessing: resizing and converting to tensors
transform = transforms.Compose([transforms.Resize((128, 128)), transforms.ToTensor()])

# Getting test data set
testDataset = datasets.ImageFolder("/kaggle/input/inaturalist12k/Data/inaturalist_12K/val/", transform=transform)
testLoader = DataLoader(testDataset, batch_size=32)

# Function to train the CNN model
def trainNetwork():
    with wandb.init() as run:
        config = wandb.config
        # Initialize CNN model
        model = CNN(
            config.filters,
            config.denseLayer,
            config.activation,
            (3, 128, 128),
            10,
            config.dropout,
            config.learningRate,
            config.batchNorm,
            config.weightDecay
        )
        # Define loss criterion and optimizer
        lossCriteria = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=config.learningRate, weight_decay=config.weightDecay)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model.to(device)
        # Testing loop
        for i in range(config.epochs):
            model.train()
            tempLoss = 0.0
            true = 0
            total = 0
            for inputs, labels in testLoader:
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = lossCriteria(outputs, labels)
                loss.backward()
                optimizer.step()
                tempLoss += loss.item()
                _, predicted = outputs.max(1)
                total += labels.size(0)
                true += predicted.eq(labels).sum().item()

            testLoss = tempLoss/len(testLoader)
            testAccuracy = 100*(true/total)

            wandb.log({"test loss": testLoss, "test accuracy": testAccuracy, "epoch": i+1})

# Sweep configuration
sweepConfig = {
    'method': 'bayes',
    'metric': {'name': 'Validation Accuracy','goal': 'maximize'},
    'parameters': {
        'weightDecay': {'values': [0]},
        'dropout': {'values': [0.1]},
        'learningRate': {'values': [0.0001]},
        'activation': {'values': ['ELU']},
        'batchNorm': {'values': [True]},
        'filters': {'values': [[32, 64, 128, 256, 512]]},
        'dataAug': {'values': [True]},
        'batchSize': {'values': [32]},
        'denseLayer': {'values': [256]},
        'epochs': {'values': [10]}
    }
}

# Initialize sweep
sweep_id = wandb.sweep(sweepConfig, project="DL Assignment-2", entity="vishnukundhan333")

# Run the sweep
wandb.agent(sweep_id, trainNetwork, project="DL Assignment-2", entity="vishnukundhan333")