In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torchvision import transforms
import mnist_loader  

# Load MNIST data
training_data, _, test_data = mnist_loader.load_data_wrapper()

# Convert data to PyTorch tensors
train_images, train_labels = zip(*training_data)
test_images, test_labels = zip(*test_data)

train_images = torch.tensor(train_images, dtype=torch.float32)
# Convert one-hot labels to class indices for train_labels
train_labels = torch.tensor([label.argmax() for label in train_labels], dtype=torch.long)
test_images = torch.tensor(test_images, dtype=torch.float32)
# Convert one-hot labels to class indices for test_labels
test_labels = torch.tensor([label.argmax() for label in test_labels], dtype=torch.long)

# Create DataLoader for training and testing data
train_dataset = TensorDataset(train_images, train_labels)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

test_dataset = TensorDataset(test_images, test_labels)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Define the neural network model
class NeuralNetwork(nn.Module):
    def __init__(self, hidden_sizes, init_method=None):
        super(NeuralNetwork, self).__init__()
        self.hidden_layer_1 = nn.Linear(784, hidden_sizes[0])
        self.hidden_layer_2 = nn.Linear(hidden_sizes[0], hidden_sizes[1])
        self.output_layer = nn.Linear(hidden_sizes[1], 10)

        if init_method == "xavier":
            nn.init.xavier_uniform_(self.hidden_layer_1.weight)
            nn.init.xavier_uniform_(self.hidden_layer_2.weight)
        elif init_method == "he":
            nn.init.kaiming_uniform_(self.hidden_layer_1.weight, nonlinearity='sigmoid')
            nn.init.kaiming_uniform_(self.hidden_layer_2.weight, nonlinearity='sigmoid')

    def forward(self, x):
        x = torch.sigmoid(self.hidden_layer_1(x))
        x = torch.sigmoid(self.hidden_layer_2(x))
        x = self.output_layer(x)
        return torch.log_softmax(x, dim=1)
    
# Check if CUDA is available
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("CUDA is available. Using GPU.")
else:
    device = torch.device("cpu")
    print("CUDA is not available. Using CPU.")



# Initialize the model, loss function, and optimizer
def train_eval_model(hidden_sizes, init_method, lr):
    model = NeuralNetwork(hidden_sizes, init_method)
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=lr)

# Train the neural network
    epochs = 10

    for epoch in range(epochs):
        model.train()
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs.view(-1, 784))
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

# Evaluate the model on the test set
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs.view(-1, 784))
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total
    print(f"Config: Hidden Sizes: {hidden_sizes}, Init: {init_method}, LR: {lr}, Accuracy: {accuracy * 100:.2f}%")


hidden_sizes_configs = [(512, 256), (1024, 512), (2048, 1024)]
init_methods = [None, "xavier", "he"]
learning_rates = [0.1, 0.01, 0.001]

for hidden_sizes in hidden_sizes_configs:
    for init_method in init_methods:
        for lr in learning_rates:
            train_eval_model(hidden_sizes, init_method, lr)