1-Neural Network from Scratch:

In [9]:
import numpy as np

# Define sigmoid and its derivative
def sigmoid(x):
    x = np.clip(x, -500, 500)  # Prevent overflow
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

# Initialize weights with Xavier initialization for better convergence
def initialize_weights(input_size, output_size):
    return np.random.randn(input_size, output_size) * np.sqrt(1 / input_size)

# Build a simple feed-forward neural network
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.weights_input_hidden = initialize_weights(input_size, hidden_size)
        self.bias_hidden = np.zeros(hidden_size)
        self.weights_hidden_output = initialize_weights(hidden_size, output_size)
        self.bias_output = np.zeros(output_size)

    def forward(self, X):
        # Input to hidden
        self.hidden_input = np.dot(X, self.weights_input_hidden) + self.bias_hidden
        self.hidden_output = sigmoid(self.hidden_input)

        # Hidden to output
        self.output_input = np.dot(self.hidden_output, self.weights_hidden_output) + self.bias_output
        self.output = sigmoid(self.output_input)
        return self.output

    def backward(self, X, y, learning_rate):
        # Calculate output error
        output_error = y - self.output
        output_delta = output_error * sigmoid_derivative(self.output)

        # Calculate hidden layer error
        hidden_error = output_delta.dot(self.weights_hidden_output.T)
        hidden_delta = hidden_error * sigmoid_derivative(self.hidden_output)

        # Update weights and biases
        self.weights_hidden_output += self.hidden_output.T.dot(output_delta) * learning_rate
        self.bias_output += np.sum(output_delta, axis=0) * learning_rate
        self.weights_input_hidden += X.T.dot(hidden_delta) * learning_rate
        self.bias_hidden += np.sum(hidden_delta, axis=0) * learning_rate

    def train(self, X, y, epochs=1000, learning_rate=0.01):
        for epoch in range(epochs):
            # Forward pass
            self.forward(X)
            # Backward pass and update weights
            self.backward(X, y, learning_rate)

            # Compute loss (optional)
            loss = np.mean(np.square(y - self.output))
            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss}")




Build with PyTorch or TensorFlow:

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import fetch_openml
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

# Load and preprocess MNIST data
mnist = fetch_openml('mnist_784', version=1)
X = mnist.data / 255.0  # Normalize pixel values to [0, 1]
y = mnist.target.astype(int).values

# One-hot encode labels
encoder = OneHotEncoder(sparse=False)
y_one_hot = encoder.fit_transform(y.reshape(-1, 1))

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y_one_hot, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train_tensor = torch.FloatTensor(X_train.values)
y_train_tensor = torch.FloatTensor(y_train)
X_test_tensor = torch.FloatTensor(X_test.values)
y_test_tensor = torch.FloatTensor(y_test)

# Define the neural network
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(784, 64)
        self.fc2 = nn.Linear(64, 10)
        self.loss_fn = nn.CrossEntropyLoss()

    def forward(self, x):
        x = torch.sigmoid(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize the network, optimizer, and loss function
model = NeuralNetwork()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Training the network
def train(model, optimizer, epochs=10):
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train_tensor)
        loss = model.loss_fn(outputs, torch.max(y_train_tensor, 1)[1])
        loss.backward()
        optimizer.step()

        if epoch % 1 == 0:
            # Evaluate on test set
            model.eval()
            with torch.no_grad():
                test_outputs = model(X_test_tensor)
                _, predicted = torch.max(test_outputs, 1)
                test_labels = torch.max(y_test_tensor, 1)[1]
                accuracy = torch.sum(predicted == test_labels).item() / y_test_tensor.size(0)
                print(f'Epoch {epoch + 1}/{epochs}, Loss: {loss.item():.4f}, Test Accuracy: {accuracy:.4f}')

# Run training
train(model, optimizer)


  warn(


Epoch 1/10, Loss: 2.3566, Test Accuracy: 0.3766
Epoch 2/10, Loss: 2.1650, Test Accuracy: 0.4261
Epoch 3/10, Loss: 2.0311, Test Accuracy: 0.5309
Epoch 4/10, Loss: 1.8946, Test Accuracy: 0.6154
Epoch 5/10, Loss: 1.7600, Test Accuracy: 0.6609
Epoch 6/10, Loss: 1.6357, Test Accuracy: 0.6797
Epoch 7/10, Loss: 1.5220, Test Accuracy: 0.7046
Epoch 8/10, Loss: 1.4150, Test Accuracy: 0.7366
Epoch 9/10, Loss: 1.3123, Test Accuracy: 0.7649
Epoch 10/10, Loss: 1.2149, Test Accuracy: 0.7841
