In [18]:
import random
import datasets
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the CNN Model
class SimpleCNN(nn.Module):
    def __init__(self, num_channels, use_avg_pooling, kernel_size, padding):
        super(SimpleCNN, self).__init__()
        
        # Convolutional layers
        self.conv1 = nn.Conv2d(1, num_channels, kernel_size=kernel_size, padding=padding)
        self.conv2 = nn.Conv2d(num_channels, num_channels, kernel_size=kernel_size, padding=padding)
        self.conv3 = nn.Conv2d(num_channels, num_channels, kernel_size=kernel_size, padding=padding)
        self.conv4 = nn.Conv2d(num_channels, num_channels, kernel_size=kernel_size, padding=padding)
        self.conv5 = nn.Conv2d(num_channels, num_channels, kernel_size=kernel_size, padding=padding)

        self.use_avg_pooling = use_avg_pooling
        if self.use_avg_pooling:
            self.pool = nn.AvgPool2d(2, 2)
        else:
            self.pool = None

        # Dummy input for size calculation
        self._to_linear = None
        self.calculate_to_linear_size()

        self.fc = nn.Linear(self._to_linear, 10)

    def calculate_to_linear_size(self):
        dummy_input = torch.zeros(1, 1, 28, 28)
        x = self.conv1(dummy_input)
        if self.use_avg_pooling:
            x = self.pool(x)
        x = self.conv2(x)
        if self.use_avg_pooling:
            x = self.pool(x)
        x = self.conv3(x)
        if self.use_avg_pooling:
            x = self.pool(x)
        x = self.conv4(x)
        if self.use_avg_pooling:
            x = self.pool(x)
        x = self.conv5(x)
        if self.use_avg_pooling:
            x = self.pool(x)
        self._to_linear = x.view(x.size(0), -1).size(1)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        if self.use_avg_pooling:
            x = self.pool(x)
        x = F.relu(self.conv2(x))
        if self.use_avg_pooling:
            x = self.pool(x)
        x = F.relu(self.conv3(x))
        if self.use_avg_pooling:
            x = self.pool(x)
        x = F.relu(self.conv4(x))
        if self.use_avg_pooling:
            x = self.pool(x)
        x = F.relu(self.conv5(x))
        if self.use_avg_pooling:
            x = self.pool(x)
        x = x.view(-1, self._to_linear) # Flatten
        x = self.fc(x)
        return F.log_softmax(x, dim=1)


# Function for Training and Evaluating the Model
def train_and_evaluate(model, train_loader, test_loader, epochs=10, lr=0.001):
    optimizer = optim.Adam(model.parameters(), lr=lr)
    model.train()

    for epoch in range(epochs):
        total_loss = 0
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        average_loss = total_loss / len(train_loader)
        print(f'Epoch {epoch + 1}/{epochs}, Loss: {average_loss:.4f}')


    correct = 0
    total = 0
    model.eval()
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)  # Move data and target to the device
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

    accuracy = correct / total
    return accuracy


# Random Search Hyperparameter Tuning
def random_search_hyperparameters(num_trials, train_loader, test_loader):
    trial_results = {}

    for trial in range(num_trials):
        print(f"currently in trial {trial}")
        num_channels = random.choice([10, 20, 30, 40, 50])
        #use_avg_pooling = random.choice([True, False])
        use_avg_pooling = random.choice([False])
        kernel_size, padding = random.choice([(3, 1), (5, 2), (7, 3)])

        model = SimpleCNN(num_channels, use_avg_pooling, kernel_size, padding)
        model.to(device)
        
        accuracy = train_and_evaluate(model, train_loader, test_loader, epochs=10, lr=0.001)

        trial_results[trial] = {
            'num_channels': num_channels,
            'use_avg_pooling': use_avg_pooling,
            'kernel_size': kernel_size,
            'padding': padding,
            'accuracy': accuracy
        }

    return trial_results

# Define the transformation for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load and transform the dataset
train_dataset = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Perform Random Search
trial_results = random_search_hyperparameters(20, train_loader, test_loader)
for trial, results in trial_results.items():
    print(f"Trial {trial}, Results: {results}")


currently in trial 0
Epoch 1/10, Loss: 0.4730
Epoch 2/10, Loss: 0.3110
Epoch 3/10, Loss: 0.2713
Epoch 4/10, Loss: 0.2470
Epoch 5/10, Loss: 0.2257
Epoch 6/10, Loss: 0.2086
Epoch 7/10, Loss: 0.1920
Epoch 8/10, Loss: 0.1775
Epoch 9/10, Loss: 0.1662
Epoch 10/10, Loss: 0.1555
currently in trial 1
Epoch 1/10, Loss: 0.3949
Epoch 2/10, Loss: 0.2394
Epoch 3/10, Loss: 0.1887
Epoch 4/10, Loss: 0.1493
Epoch 5/10, Loss: 0.1155
Epoch 6/10, Loss: 0.0880
Epoch 7/10, Loss: 0.0661
Epoch 8/10, Loss: 0.0532
Epoch 9/10, Loss: 0.0413
Epoch 10/10, Loss: 0.0337
currently in trial 2
Epoch 1/10, Loss: 0.4216
Epoch 2/10, Loss: 0.2718
Epoch 3/10, Loss: 0.2234
Epoch 4/10, Loss: 0.1919
Epoch 5/10, Loss: 0.1647
Epoch 6/10, Loss: 0.1405
Epoch 7/10, Loss: 0.1195
Epoch 8/10, Loss: 0.0992
Epoch 9/10, Loss: 0.0840
Epoch 10/10, Loss: 0.0723
currently in trial 3
Epoch 1/10, Loss: 0.4230
Epoch 2/10, Loss: 0.2531
Epoch 3/10, Loss: 0.2058
Epoch 4/10, Loss: 0.1701
Epoch 5/10, Loss: 0.1372
Epoch 6/10, Loss: 0.1130
Epoch 7/10, L

In [5]:
model = SimpleCNN(40, False, 3, 1)
model.to(device)
        
accuracy = train_and_evaluate(model, train_loader, test_loader, epochs=40, lr=0.001)

print(accuracy)

Epoch 1/40, Loss: 0.3882
Epoch 2/40, Loss: 0.2347
Epoch 3/40, Loss: 0.1858
Epoch 4/40, Loss: 0.1494
Epoch 5/40, Loss: 0.1183
Epoch 6/40, Loss: 0.0921
Epoch 7/40, Loss: 0.0707
Epoch 8/40, Loss: 0.0543
Epoch 9/40, Loss: 0.0425
Epoch 10/40, Loss: 0.0373
Epoch 11/40, Loss: 0.0324
Epoch 12/40, Loss: 0.0303
Epoch 13/40, Loss: 0.0284
Epoch 14/40, Loss: 0.0225
Epoch 15/40, Loss: 0.0199
Epoch 16/40, Loss: 0.0234
Epoch 17/40, Loss: 0.0231
Epoch 18/40, Loss: 0.0184
Epoch 19/40, Loss: 0.0183
Epoch 20/40, Loss: 0.0171
Epoch 21/40, Loss: 0.0153
Epoch 22/40, Loss: 0.0175
Epoch 23/40, Loss: 0.0150
Epoch 24/40, Loss: 0.0167
Epoch 25/40, Loss: 0.0136
Epoch 26/40, Loss: 0.0165
Epoch 27/40, Loss: 0.0120
Epoch 28/40, Loss: 0.0149
Epoch 29/40, Loss: 0.0138
Epoch 30/40, Loss: 0.0125
Epoch 31/40, Loss: 0.0147
Epoch 32/40, Loss: 0.0112
Epoch 33/40, Loss: 0.0145
Epoch 34/40, Loss: 0.0148
Epoch 35/40, Loss: 0.0132
Epoch 36/40, Loss: 0.0091
Epoch 37/40, Loss: 0.0152
Epoch 38/40, Loss: 0.0149
Epoch 39/40, Loss: 0.