<a href="https://colab.research.google.com/github/udupa-varun/pyimagesearch_uni/blob/main/deep_learning/103/backpropagation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!wget https://pyimagesearch-code-downloads.s3-us-west-2.amazonaws.com/backpropagation-python/backpropagation-python.zip
!unzip -qq backpropagation-python.zip
%cd backpropagation-python

--2023-02-10 10:48:49--  https://pyimagesearch-code-downloads.s3-us-west-2.amazonaws.com/backpropagation-python/backpropagation-python.zip
Resolving pyimagesearch-code-downloads.s3-us-west-2.amazonaws.com (pyimagesearch-code-downloads.s3-us-west-2.amazonaws.com)... 52.218.225.129, 52.92.224.162, 52.92.248.226, ...
Connecting to pyimagesearch-code-downloads.s3-us-west-2.amazonaws.com (pyimagesearch-code-downloads.s3-us-west-2.amazonaws.com)|52.218.225.129|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 20243 (20K) [binary/octet-stream]
Saving to: ‘backpropagation-python.zip’


2023-02-10 10:48:50 (321 KB/s) - ‘backpropagation-python.zip’ saved [20243/20243]

/content/backpropagation-python


In [3]:
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn import datasets
import numpy as np

In [26]:
class NeuralNetwork:
    def __init__(self, layers, alpha=0.1):
        # init list of weights matrices,
        # then store network architecture and learning rate
        self.W = []
        self.layers = layers
        self.alpha = alpha

        # start looping from index of first layer
        # stop before reaching last two layers
        for i in np.arange(0, len(layers) - 2):
            # randomly init a weight matrix
            # connecting number of nodes in each respective layer together
            # add extra node for bias
            w = np.random.randn(layers[i] + 1, layers[i + 1] + 1)
            self.W.append(w / np.sqrt(layers[1]))

        # last two layers are a special case
        # input connections need a bias term but output does not
        w = np.random.randn(layers[-2] + 1, layers[-1])
        self.W.append(w / np.sqrt(layers[-2]))


    def __repr__(self):
        # construct and return a string that represents the net arch
        return "NeuralNetwork: {}".format("-".join(str(l) for l in self.layers))


    def sigmoid(self, x):
        # compute sigmoid activation value for input
        return 1.0 / (1 + np.exp(-x))


    def sigmoid_deriv(self, x):
        # compute derivative of sigmoid function ASSUMING
        # input x is the output sigmoid activation value
        return x * (1 - x)


    def fit(self, X, y, epochs=1000, display_update=100):
        # insert a column of 1s as last entry in feature matrix
        # bias trick
        X = np.c_[X, np.ones((X.shape[0]))]

        # loop over number of epochs
        for epoch in np.arange(0, epochs):
            # loop over data points and train network
            for (x, target) in zip(X, y):
                self.fit_partial(x, target)
            
            # check to see if we should display training update
            if epoch == 0 or (epoch + 1) % display_update == 0:
                loss = self.calculate_loss(X, y)
                print(f"[INFO] epoch={epoch + 1}, loss={loss:.7f}")


    def fit_partial(self, x, target):
        # construct list of output activations for each layer,
        # as our data point flows through the network
        # first activation is a special case - it's the input vector itself
        A = [np.atleast_2d(x)]

        # FEEDFORWARD:
        # loop over layers in network
        for layer in np.arange(0, len(self.W)):
            # feedforward activation at the current layer
            # take dot product between activation and weight matrix
            # "net input" to the current layer
            net = A[layer].dot(self.W[layer])

            # compute "net output" - apply activation function
            out = self.sigmoid(net)

            # add net output to list of activations
            A.append(out)

        # BACKPROPAGATION
        # first phase of backpropagation
        # compute difference between prediction (final out activation)
        # and the true target value
        error = A[-1] - target

        # apply chain rule and build our list of deltas 'D'
        # first entry is the error of output layer times derivative of
        # activation function for the output value
        D = [error * self.sigmoid_deriv(A[-1])]

        # loop over layers in reverse order (ignoring last two, accounted)
        for layer in np.arange(len(A) - 2, 0, -1):
            # delta for current layer is equal to 
            # delta of previous layer dotted with weight matrix of current layer, 
            # followed by multiplying delta by deriv of activation func for
            # activations of the current layer
            delta = D[-1].dot(self.W[layer].T)
            delta = delta * self.sigmoid_deriv(A[layer])
            D.append(delta)

        # since we looped over layers in reverse order, reverse deltas
        D = D[::-1]

        # WEIGHT UPDATE
        # loop over the layers
        for layer in np.arange(0, len(self.W)):
            # update weights by taking dot prod of layer activations with
            # their respective deltas, then multiplying this value with a small
            # learning rate and adding to our weight matrix
            self.W[layer] += -self.alpha * A[layer].T.dot(D[layer])

    
    def predict(self, X, add_bias=True):
        # init output prediction as input features
        # this value will be forward propagated to obtain final prediction
        p = np.atleast_2d(X)

        # check if bias column should be added
        if add_bias:
            # insert a column of 1s as last entry in feature matrix
            # bias trick
            p = np.c_[p, np.ones((p.shape[0]))]

        # loop over layers in network
        for layer in np.arange(0, len(self.W)):
            # compute output prediction
            # take dot prod of current activation value p and 
            # weight matrix for current layer, then pass through activation
            p = self.sigmoid(np.dot(p, self.W[layer]))

        # return predicted value
        return p


    def calculate_loss(self, X, targets):
        # make predictions for the input data points, then compute loss
        targets = np.atleast_2d(targets)
        predictions = self.predict(X, add_bias=False)
        loss = 0.5 * np.sum((predictions - targets) ** 2)

        return loss


In [27]:
# construct the XOR dataset
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])

In [28]:
# define our 2-2-1 NN and train it
nn = NeuralNetwork([2, 2, 1], alpha=0.5)
nn.fit(X, y, epochs=20000)

[INFO] epoch=1, loss=0.5033469
[INFO] epoch=100, loss=0.4988608
[INFO] epoch=200, loss=0.4946892
[INFO] epoch=300, loss=0.4818631
[INFO] epoch=400, loss=0.4500477
[INFO] epoch=500, loss=0.3997506
[INFO] epoch=600, loss=0.3463599
[INFO] epoch=700, loss=0.2954147
[INFO] epoch=800, loss=0.2482711
[INFO] epoch=900, loss=0.2125601
[INFO] epoch=1000, loss=0.1884188
[INFO] epoch=1100, loss=0.1724714
[INFO] epoch=1200, loss=0.1614985
[INFO] epoch=1300, loss=0.1531116
[INFO] epoch=1400, loss=0.1441845
[INFO] epoch=1500, loss=0.0912863
[INFO] epoch=1600, loss=0.0257524
[INFO] epoch=1700, loss=0.0172918
[INFO] epoch=1800, loss=0.0134560
[INFO] epoch=1900, loss=0.0110945
[INFO] epoch=2000, loss=0.0094531
[INFO] epoch=2100, loss=0.0082348
[INFO] epoch=2200, loss=0.0072913
[INFO] epoch=2300, loss=0.0065378
[INFO] epoch=2400, loss=0.0059220
[INFO] epoch=2500, loss=0.0054091
[INFO] epoch=2600, loss=0.0049754
[INFO] epoch=2700, loss=0.0046041
[INFO] epoch=2800, loss=0.0042825
[INFO] epoch=2900, loss=0.

In [29]:
# loop over XOR data points and evaluate trained NN
for (x, target) in zip(X, y):
    # make a prediction and display result
    pred = nn.predict(x)[0][0]
    step = 1 if pred > 0.5 else 0
    print(f"[INFO] data={x}, ground-truth={target[0]}, pred={pred:.4f}, step={step}")

[INFO] data=[0 0], ground-truth=0, pred=0.0107, step=0
[INFO] data=[0 1], ground-truth=1, pred=0.9888, step=1
[INFO] data=[1 0], ground-truth=1, pred=0.9884, step=1
[INFO] data=[1 1], ground-truth=0, pred=0.0144, step=0


## MNIST Sample

In [30]:
# load MNIST dataset
# apply min/max scaling to scale pixel intensities to [0, 1]
# each image is represented by an 8x8=64-dim feature vector
print("[INFO] loading MNIST (sample) dataset...")
digits = datasets.load_digits()
data = digits.data.astype("float")
data = (data - data.min()) / (data.max() - data.min())

print(f"[INFO] samples: {data.shape[0]}, dim: {data.shape[1]}")

[INFO] loading MNIST (sample) dataset...
[INFO] samples: 1797, dim: 64


In [31]:
# construct training and testing splits
(train_x, test_x, train_y, test_y) = train_test_split(
    data, digits.target, test_size=0.25
)

# convert labels from integers to vectors
train_y = LabelBinarizer().fit_transform(train_y)
test_y = LabelBinarizer().fit_transform(test_y)

In [32]:
# train network
print("[INFO] training network...")
nn = NeuralNetwork([train_x.shape[1], 32, 16, 10])
print(f"[INFO] {nn}")
nn.fit(train_x, train_y, epochs=1000)

[INFO] training network...
[INFO] NeuralNetwork: 64-32-16-10
[INFO] epoch=1, loss=606.2714559
[INFO] epoch=100, loss=5.2762401
[INFO] epoch=200, loss=1.5183176
[INFO] epoch=300, loss=1.0435741
[INFO] epoch=400, loss=0.8643565
[INFO] epoch=500, loss=0.7716486
[INFO] epoch=600, loss=0.7154595
[INFO] epoch=700, loss=0.6779529
[INFO] epoch=800, loss=0.6512295
[INFO] epoch=900, loss=0.6312708
[INFO] epoch=1000, loss=0.6158239


In [33]:
# evaluate network
print("[INFO] evaluating network...")
predictions = nn.predict(test_x)
predictions = predictions.argmax(axis=1)
print(classification_report(test_y.argmax(axis=1), predictions))

[INFO] evaluating network...
              precision    recall  f1-score   support

           0       1.00      0.98      0.99        45
           1       1.00      0.98      0.99        48
           2       0.98      1.00      0.99        48
           3       1.00      1.00      1.00        43
           4       1.00      0.93      0.96        40
           5       0.95      0.98      0.97        43
           6       1.00      1.00      1.00        48
           7       1.00      1.00      1.00        40
           8       0.98      0.98      0.98        50
           9       0.90      0.96      0.92        45

    accuracy                           0.98       450
   macro avg       0.98      0.98      0.98       450
weighted avg       0.98      0.98      0.98       450

