In [48]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torchvision import transforms
import mnist_loader  
from torch.utils.data import Dataset, DataLoader


# Load data using load_data_wrapper
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()

training_data = list(training_data)
validation_data = list(validation_data)
test_data = list(test_data)

class MNISTDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image, label = self.data[idx]
        image = torch.tensor(image, dtype=torch.float32)
        label = torch.tensor(label, dtype=torch.long)
        return image, label

train_dataset = MNISTDataset(training_data)
validation_dataset = MNISTDataset(validation_data)
test_dataset = MNISTDataset(test_data)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)


# Define the neural network model
class NeuralNetwork(nn.Module):
    def __init__(self, hidden_sizes, init_method=None):
        super(NeuralNetwork, self).__init__()
        self.hidden_layer_1 = nn.Linear(784, hidden_sizes[0])
        self.hidden_layer_2 = nn.Linear(hidden_sizes[0], hidden_sizes[1])
        self.output_layer = nn.Linear(hidden_sizes[1], 10)

        if init_method == "xavier":
            nn.init.xavier_uniform_(self.hidden_layer_1.weight)
            nn.init.xavier_uniform_(self.hidden_layer_2.weight)
        elif init_method == "he":
            nn.init.kaiming_uniform_(self.hidden_layer_1.weight, nonlinearity='sigmoid')
            nn.init.kaiming_uniform_(self.hidden_layer_2.weight, nonlinearity='sigmoid')

    def forward(self, x):
        x = torch.sigmoid(self.hidden_layer_1(x))
        x = torch.sigmoid(self.hidden_layer_2(x))
        x = self.output_layer(x)
        return torch.log_softmax(x, dim=1)
    
# Check if CUDA is available
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("CUDA is available. Using GPU.")
else:
    device = torch.device("cpu")
    print("CUDA is not available. Using CPU.")


# Initialize the model, loss function, and optimizer
def train_eval_model(hidden_sizes, init_method, lr):
    model = NeuralNetwork(hidden_sizes, init_method)
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=lr)

# Train the neural network
    epochs = 5

    for epoch in range(epochs):

        model.train()
        train_correct = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs.view(-1, 784))
            # print(outputs)
            # print(labels)
            labels = torch.argmax(labels.squeeze(), dim=1)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            _, id = torch.max(outputs.data, 1)
            train_correct += torch.sum(id == labels.data)
    
    model.eval()
    test_correct = 0

    for inputs, labels in test_loader:
        inputs, lables = inputs.to(device), labels.to(device)
        outputs = model(inputs.view(-1, 784))
        _, id = torch.max(outputs.data, 1)
        test_correct += torch.sum(id == lables.data)
        
    accuracy = test_correct / len(test_dataset)
    print(f"Config: Hidden Sizes: {hidden_sizes}, Init: {init_method}, LR: {lr}, Accuracy: {accuracy * 100:.2f}%")


hidden_sizes_configs = [(512, 256), (1024, 512), (2048, 1024)]
init_methods = [None, "xavier", "he"]
learning_rates = [0.1, 0.01, 0.001]

for hidden_sizes in hidden_sizes_configs:
    for init_method in init_methods:
        for lr in learning_rates:
            train_eval_model(hidden_sizes, init_method, lr)


CUDA is not available. Using CPU.
Config: Hidden Sizes: (512, 256), Init: None, LR: 0.1, Accuracy: 89.98%
Config: Hidden Sizes: (512, 256), Init: None, LR: 0.01, Accuracy: 26.03%
Config: Hidden Sizes: (512, 256), Init: None, LR: 0.001, Accuracy: 11.35%
Config: Hidden Sizes: (512, 256), Init: xavier, LR: 0.1, Accuracy: 90.90%
Config: Hidden Sizes: (512, 256), Init: xavier, LR: 0.01, Accuracy: 60.56%
Config: Hidden Sizes: (512, 256), Init: xavier, LR: 0.001, Accuracy: 12.02%
Config: Hidden Sizes: (512, 256), Init: he, LR: 0.1, Accuracy: 91.17%
Config: Hidden Sizes: (512, 256), Init: he, LR: 0.01, Accuracy: 64.58%
Config: Hidden Sizes: (512, 256), Init: he, LR: 0.001, Accuracy: 11.35%
Config: Hidden Sizes: (1024, 512), Init: None, LR: 0.1, Accuracy: 84.63%


KeyboardInterrupt: 