<a href="https://colab.research.google.com/github/yashkapur0403/Neural-Networks-Practise/blob/main/NNDLchp3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import json
import random
import sys

class QuadraticCost:
    @staticmethod
    def fn(a, y):
        return 0.5 * np.linalg.norm(a - y) ** 2

    @staticmethod
    def delta(z, a, y):
        return (a - y) * sigmoid_prime(z)

class CrossEntropyCost:
    @staticmethod
    def fn(a, y):
        return np.sum(np.nan_to_num(-y * np.log(a) - (1 - y) * np.log(1 - a)))

    @staticmethod
    def delta(z, a, y):
        return (a - y)

class Network:
    def __init__(self, sizes, cost=CrossEntropyCost):
        self.num_layers = len(sizes)
        self.sizes = sizes
        self.initialize_weights_and_biases()
        self.cost = cost

    def initialize_weights_and_biases(self):
        self.biases = [np.random.randn(layer_size, 1) for layer_size in self.sizes[1:]]
        self.weights = [np.random.randn(y, x) / np.sqrt(x) for x, y in zip(self.sizes[:-1], self.sizes[1:])]

    def large_weight_initializer(self):
        self.biases = [np.random.randn(layer_size, 1) for layer_size in self.sizes[1:]]
        self.weights = [np.random.randn(y, x) for x, y in zip(self.sizes[:-1], self.sizes[1:])]

    def feedforward(self, activation):
        for bias, weight in zip(self.biases, self.weights):
            activation = sigmoid(np.dot(weight, activation) + bias)
        return activation

    def SGD(self, training_data, epochs, mini_batch_size, learning_rate,
            lmbda=0.0,
            evaluation_data=None,
            monitor_evaluation_cost=False,
            monitor_evaluation_accuracy=False,
            monitor_training_cost=False,
            monitor_training_accuracy=False):

        if evaluation_data:
            num_evaluation_data = len(evaluation_data)
        num_training_data = len(training_data)

        evaluation_cost_results, evaluation_accuracy_results = [], []
        training_cost_results, training_accuracy_results = [], []

        for epoch in range(epochs):
            random.shuffle(training_data)
            mini_batches = [training_data[k:k + mini_batch_size] for k in range(0, num_training_data, mini_batch_size)]

            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, learning_rate, lmbda, num_training_data)

            print(f"Epoch {epoch} training complete")

            if monitor_training_cost:
                cost = self.total_cost(training_data, lmbda)
                training_cost_results.append(cost)
                print(f"Cost on training data: {cost}")

            if monitor_training_accuracy:
                accuracy = self.accuracy(training_data, convert=True)
                training_accuracy_results.append(accuracy)
                print(f"Accuracy on training data: {accuracy} / {num_training_data}")

            if monitor_evaluation_cost:
                cost = self.total_cost(evaluation_data, lmbda, convert=True)
                evaluation_cost_results.append(cost)
                print(f"Cost on evaluation data: {cost}")

            if monitor_evaluation_accuracy:
                accuracy = self.accuracy(evaluation_data)
                evaluation_accuracy_results.append(accuracy)
                print(f"Accuracy on evaluation data: {accuracy} / {num_evaluation_data}")

            print()

        return evaluation_cost_results, evaluation_accuracy_results, \
               training_cost_results, training_accuracy_results

    def update_mini_batch(self, mini_batch, learning_rate, lmbda, n):
        nabla_b = [np.zeros(bias.shape) for bias in self.biases]
        nabla_w = [np.zeros(weight.shape) for weight in self.weights]

        for x, y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backprop(x, y)
            nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]

        self.weights = [(1 - learning_rate * (lmbda / n)) * weight - (learning_rate / len(mini_batch)) * nw for weight, nw in zip(self.weights, nabla_w)]
        self.biases = [bias - (learning_rate / len(mini_batch)) * nb for bias, nb in zip(self.biases, nabla_b)]

    def backprop(self, x, y):
        nabla_b = [np.zeros(bias.shape) for bias in self.biases]
        nabla_w = [np.zeros(weight.shape) for weight in self.weights]

        activation = x
        activations = [x]
        zs = []

        for bias, weight in zip(self.biases, self.weights):
            z = np.dot(weight, activation) + bias
            zs.append(z)
            activation = sigmoid(z)
            activations.append(activation)

        delta = (self.cost).delta(zs[-1], activations[-1], y)
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())

        for layer in range(2, self.num_layers):
            z = zs[-layer]
            sp = sigmoid_prime(z)
            delta = np.dot(self.weights[-layer + 1].transpose(), delta) * sp
            nabla_b[-layer] = delta
            nabla_w[-layer] = np.dot(delta, activations[-layer - 1].transpose())

        return (nabla_b, nabla_w)

    def accuracy(self, data, convert=False):
        if convert:
            results = [(np.argmax(self.feedforward(x)), np.argmax(y)) for (x, y) in data]
        else:
            results = [(np.argmax(self.feedforward(x)), y) for (x, y) in data]
        return sum(int(x == y) for (x, y) in results)

    def total_cost(self, data, lmbda, convert=False):
        cost = 0.0
        for x, y in data:
            activation = self.feedforward(x)
            if convert:
                y = vectorized_result(y)
            cost += self.cost.fn(activation, y) / len(data)
        cost += 0.5 * (lmbda / len(data)) * sum(
            np.linalg.norm(weight) ** 2 for weight in self.weights)
        return cost

    def save(self, filename):
        data = {"sizes": self.sizes,
                "weights": [w.tolist() for w in self.weights],
                "biases": [b.tolist() for b in self.biases],
                "cost": str(self.cost.__name__)}
        with open(filename, "w") as f:
            json.dump(data, f)

def load_network(filename):
    with open(filename, "r") as f:
        data = json.load(f)
    cost = getattr(sys.modules[__name__], data["cost"])
    net = Network(data["sizes"], cost=cost)
    net.weights = [np.array(w) for w in data["weights"]]
    net.biases = [np.array(b) for b in data["biases"]]
    return net

def vectorized_result(j):
    e = np.zeros((10, 1))
    e[j] = 1.0
    return e

def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

def sigmoid_prime(z):
    return sigmoid(z) * (1 - sigmoid(z))




In [None]:
import os
import urllib.request

# Create the data directory structure that the original code expects
os.makedirs('../data', exist_ok=True)

# Download the MNIST data file to the expected location
if not os.path.exists('../data/mnist.pkl.gz'):
    print("Downloading MNIST data...")
    url = 'https://github.com/mnielsen/neural-networks-and-deep-learning/raw/master/data/mnist.pkl.gz'
    urllib.request.urlretrieve(url, '../data/mnist.pkl.gz')
    print("Download complete!")

# Now your original code will work
import mnist_loader_fixed as mnist_loader
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()

In [None]:
# Step 1: Upload the mnist_loader_fixed.py file
from google.colab import files
files.upload()  # Upload only mnist_loader_fixed.py file here

# Step 2: Import loader and load the data
import sys
sys.path.append('/content')

import mnist_loader_fixed as mnist_loader  # No .py

training_data, validation_data, test_data = mnist_loader.load_data_wrapper()

# Step 3: Create and train the network
net = Network([784, 30, 10], cost=CrossEntropyCost)

net.SGD(training_data,
        epochs=10,
        mini_batch_size=10,
        learning_rate=0.5,
        lmbda=5.0,
        evaluation_data=validation_data,
        monitor_evaluation_accuracy=True,
        monitor_evaluation_cost=True,
        monitor_training_accuracy=True,
        monitor_training_cost=True);



Saving mnist_loader_fixed.py to mnist_loader_fixed (7).py
Epoch 0 training complete
Cost on training data: 0.5366133243359368
Accuracy on training data: 46452 / 50000
Cost on evaluation data: 0.8471310249122694
Accuracy on evaluation data: 9315 / 10000

Epoch 1 training complete
Cost on training data: 0.456281190893021
Accuracy on training data: 47472 / 50000
Cost on evaluation data: 0.8723586610709314
Accuracy on evaluation data: 9484 / 10000

Epoch 2 training complete
Cost on training data: 0.48242220904362254
Accuracy on training data: 47333 / 50000
Cost on evaluation data: 0.9437604683018006
Accuracy on evaluation data: 9433 / 10000

Epoch 3 training complete
Cost on training data: 0.3988924029429988
Accuracy on training data: 47994 / 50000
Cost on evaluation data: 0.8989545988695677
Accuracy on evaluation data: 9547 / 10000

Epoch 4 training complete
Cost on training data: 0.4018199941265468
