In [1]:
import neura
import neura.nn as nn
import neura.optim as optim
import numpy as np
from sklearn.datasets import fetch_openml

In [2]:
class MNISTDataset(neura.data.Dataset):
    def __init__(self, train=True, transform=None):
        """
        Initializes the MNIST dataset.

        Args:
            train (bool): If True, loads the training data, otherwise loads test data.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.transform = transform

        # Fetch the data. It's a large download the first time.
        print("Fetching MNIST dataset...")
        # fetch_openml is a reliable way to get the original MNIST data
        mnist = fetch_openml('mnist_784', version=1, as_frame=False, parser='auto')
        print("Dataset fetched.")

        # The data is in a dictionary-like object
        # Images are 784-dimensional vectors (28*28)
        # Labels are strings '0', '1', ...
        images = mnist.data
        labels = mnist.target

        # Preprocessing Steps
        # 1. Normalize pixel values from [0, 255] to [0, 1.0]
        images = images / 255.0
        # 2. Convert labels from strings to integers
        labels = labels.astype(int)
        
        # 3. Cast data to a more memory-efficient type if desired
        images = images.astype(np.float32)

        # Split into training and testing sets (standard MNIST split is 60k/10k)
        if train:
            self.images = images[:60000]
            self.labels = labels[:60000]
        else:
            self.images = images[60000:]
            self.labels = labels[60000:]

    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        """
        Returns a tuple of (image, label) for a given index.
        The image is now reshaped to (1, 28, 28).
        """
        # Get the flattened image vector (784,)
        image_flat = self.images[index]
        
        # Reshape the flat vector into a 3D tensor: (Channels, Height, Width)
        image_reshaped = image_flat.reshape(1, 28, 28)
    
        label = self.labels[index]
    
        if self.transform:
            # Note: transforms would now operate on a 3D tensor
            image_reshaped = self.transform(image_reshaped)
    
        return image_reshaped, label
    

trainset = MNISTDataset()
testset = MNISTDataset(train=False)

Fetching MNIST dataset...
Dataset fetched.
Fetching MNIST dataset...
Dataset fetched.


In [3]:
trainloader = neura.data.DataLoader(trainset, batch_size=32, shuffle=True)
testloader = neura.data.DataLoader(testset, batch_size=32, shuffle=True)

In [4]:
class LeNet(nn.Module):
    def __init__(self):
        super().__init__()
        
        # --- Feature Extractor ---
        # Block 1: Input (B, 1, 28, 28) -> Output (B, 6, 13, 13)
        self.conv_block1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=2, padding=1)
        self.bn1 = nn.BatchNorm2d(m=6)
        
        # Block 2: Input (B, 6, 13, 13) -> Output (B, 16, 5, 5)
        self.conv_block2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=2, padding=0)
        self.bn2 = nn.BatchNorm2d(m=16)

        # --- Classifier (using only Conv2d layers) ---
        # Input (B, 16, 5, 5)
        
        # Layer 1 (replaces fc1): Output (B, 120, 1, 1)
        # Using a 5x5 kernel to collapse the spatial dimensions
        self.classifier1 = nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5)
        self.bn_class1 = nn.BatchNorm2d(m=120)
        
        # Layer 2 (replaces fc2): Output (B, 84, 1, 1)
        # Input is now 1x1, so we use a 1x1 kernel
        self.classifier2 = nn.Conv2d(in_channels=120, out_channels=84, kernel_size=1)
        self.bn_class2 = nn.BatchNorm2d(m=84)
        
        # Layer 3 (replaces fc3 - output layer): Output (B, 10, 1, 1)
        self.output_layer = nn.Conv2d(in_channels=84, out_channels=10, kernel_size=1)

    def forward(self, x: neura.Tensor) -> neura.Tensor:
        x = x.view(x.shape[0], 1, 28, 28)
    
        # Now the rest of the forward pass will work correctly
        # because x is a 4D tensor.
        
        # Feature Extractor
        x = nn.ReLU(self.bn1(self.conv_block1(x)))()
        x = nn.ReLU(self.bn2(self.conv_block2(x)))()
        
        # Classifier
        x = nn.ReLU(self.bn_class1(self.classifier1(x)))()
        x = nn.ReLU(self.bn_class2(self.classifier2(x)))()
        x = self.output_layer(x)
        
        # Reshape the output from (B, C, 1, 1) to (B, C) for the loss function
        logits = x.view(x.shape[0], -1)
        
        return logits
model = LeNet()

In [5]:
EPOCHS = 5
lr = 1e-3
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.BCEWithLogitLoss("sum")

In [6]:
# A utility function for one-hot encoding
def to_one_hot(labels: neura.Tensor, num_classes=10):
    """Converts a batch of labels to one-hot encoding."""
    
    # Get the flattened numpy array from the tensor
    labels_flat = labels.data.flatten()
    
    # ===================================================================
    # THE FIX IS HERE:
    # Ensure the labels are integers before using them for indexing.
    # ===================================================================
    labels_int = labels_flat.astype(int)
    
    # Now, use the integer labels for indexing
    one_hot = np.zeros((labels_int.shape[0], num_classes))
    one_hot[np.arange(labels_int.shape[0]), labels_int] = 1
    
    # Return a new tensor. Note: The one-hot labels should be floats
    # to be compatible with the model's float outputs for the loss function.
    return neura.Tensor(one_hot) 

In [None]:
from neura import Tensor

# --- BEFORE THE LOOP ---
# Make sure to instantiate the criterion with reduction='sum'
criterion = nn.BCEWithLogitLoss(reduction="sum") 
optimizer = optim.Adam(model.parameters(), lr=lr)

print("--- Starting Training ---")
for epoch in range(EPOCHS):
    
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0
    
    for batch_idx, (inputs, labels) in enumerate(trainloader):
        # a. Zero the parameter gradients
        optimizer.zero_grad()

        # b. Forward pass
        outputs = model(inputs) 

        # c. Prepare labels for BCE Loss
        one_hot_labels = to_one_hot(labels, num_classes=10)

        # d. Calculate the loss (criterion now returns the sum for the batch)
        loss = criterion(outputs, one_hot_labels)

        # e. Backward pass
        loss.backward()

        # f. Update weights
        optimizer.step()
        
        # --- Logging and Metrics ---
        # Add the sum of loss for the batch to the running total
        running_loss += loss.data.item()
        
        # --- Calculate Accuracy ---
        predicted_probs = 1 / (1 + np.exp(-outputs.data))
        predicted_labels = np.argmax(predicted_probs, axis=1)
        
        # FIX 1: Flatten the true labels to prevent broadcasting errors
        true_labels = labels.data.flatten()
        
        correct_predictions += (predicted_labels == true_labels).sum()
        total_samples += len(true_labels)
        
        # --- Real-time Progress Printing ---
        # FIX 2: Calculate average loss and accuracy based on total samples seen
        avg_loss = running_loss / total_samples
        avg_acc = correct_predictions / total_samples
        
        progress_string = (
            f"Epoch {epoch + 1}/{EPOCHS} | "
            f"Batch [{batch_idx + 1}/{len(trainloader)}] | "
            f"Loss: {avg_loss:.4f} | "
            f"Acc: {avg_acc:.2%}"
        )
        print(progress_string + "  ", end='\r')

    print() # Newline after the epoch

    # Final epoch summary is now consistent
    final_epoch_loss = running_loss / total_samples
    final_epoch_acc = correct_predictions / total_samples
    print(f"End of Epoch {epoch + 1} Summary | Average Loss: {final_epoch_loss:.4f} | Accuracy: {final_epoch_acc:.2%}")

print("\n--- Training Finished ---")

--- Starting Training ---
Epoch 1/5 | Batch [1875/1875] | Loss: 35.3308 | Acc: 12.68%  
End of Epoch 1 Summary | Average Loss: 35.3308 | Accuracy: 12.68%
Epoch 2/5 | Batch [1875/1875] | Loss: 33.9181 | Acc: 17.30%  
End of Epoch 2 Summary | Average Loss: 33.9181 | Accuracy: 17.30%
Epoch 3/5 | Batch [1019/1875] | Loss: 33.7817 | Acc: 18.38%  