## Import packages & dependencies

In [1]:
import numpy as np
import matplotlib.pyplot as plt

## Input image
Here, say we assume a single grayscale image of size 8x8.
Input image size (h): 8x8x1

In [2]:
X = np.random.randn(8,8)

## Convolution layer

In [3]:
# define filters here, of dim 3x3x2
num_filters = 2
filter_size = 3

filters = np.random.randn(num_filters, filter_size, filter_size)
bias = np.zeros(num_filters)

# convolution operation
def convolve(X, filters, bias):
    h, w = X.shape
    num_filters, f, _ = filters.shape

    output_dim = h - f + 1
    out = np.zeros((num_filters, output_dim, output_dim))

    for k in range(num_filters):
        for i in range(output_dim):
            for j in range(output_dim):
                region = X[i:i+f, j:j+f]
                out[k, i, j] = np.sum(region * filters[k]) + bias[k]

    return out

conv_out = convolve(X, filters, bias)

## Relu Activation

In [4]:
def relu(X):
    return np.maximum(0, X)

relu_out = relu(conv_out)


## Max pooling

In [5]:
def max_pool(X, size=2, stride=2):
    num_filters, h, w = X.shape
    out_dim = h // stride
    out = np.zeros((num_filters, out_dim, out_dim))

    for k in range(num_filters):
        for i in range(0, h, stride):
            for j in range(0, w, stride):
                region = X[k, i:i+size, j:j+size]
                out[k, i//stride, j//stride] = np.max(region)

    return out

pool_out = max_pool(relu_out)

## Flatten

In [6]:
flattened = pool_out.reshape(-1)


## Fully Connected Layer

In [7]:
# initialize weights
num_classes = 2

W_fc = np.random.randn(18, num_classes)
b_fc = np.zeros(num_classes)

# Forward pass
logits = flattened @ W_fc + b_fc

## Softmax probabilities

In [8]:
def softmax(z):
    exp = np.exp(z - np.max(z))
    return exp / np.sum(exp)

probs = softmax(logits)

## Prediction

In [9]:
prediction = np.argmax(probs)
print(prediction)

1


## Loss function (Cross-Entropy)

In [10]:
y_true = 1
loss = -np.log(probs[y_true])


## Backprop through Fully Connected Layer

In [11]:
dlogits = probs.copy()
dlogits[y_true] -= 1

# gradient wrt weights
dW_fc = np.outer(flattened, dlogits)

# gradient wrt bias
db_fc = dlogits

# gradient wrt input
dflattened = W_fc @ dlogits


## Backprop through Flatten

In [12]:
dpool = dflattened.reshape(pool_out.shape)

## Backprop through Max Pooling

In [13]:
def max_pool_backward(dout, X, size=2, stride=2):
    num_filters, h, w = X.shape
    dX = np.zeros_like(X)

    for k in range(num_filters):
        for i in range(0, h, stride):
            for j in range(0, w, stride):
                region = X[k, i:i+size, j:j+size]
                max_val = np.max(region)

                for m in range(size):
                    for n in range(size):
                        if region[m, n] == max_val:
                            dX[k, i+m, j+n] = dout[k, i//stride, j//stride]
    return dX

drelu = max_pool_backward(dpool, relu_out)

## Backprop through ReLU

In [14]:
dconv = drelu.copy()
dconv[conv_out <= 0] = 0


## Backprop through Convolution (core part)

In [15]:
dfilters = np.zeros_like(filters)
dbias = np.zeros_like(bias)

for k in range(num_filters):
    dbias[k] = np.sum(dconv[k])

    for i in range(dconv.shape[1]):
        for j in range(dconv.shape[2]):
            region = X[i:i+filter_size, j:j+filter_size]
            dfilters[k] += dconv[k, i, j] * region

# Gradient w.r.t input
dX = np.zeros_like(X)

for k in range(num_filters):
    for i in range(dconv.shape[1]):
        for j in range(dconv.shape[2]):
            dX[i:i+filter_size, j:j+filter_size] += dconv[k, i, j] * filters[k]


## Weight update (Gradient Descent)

In [16]:
lr = 0.01

# update convolution layers
filters -= lr * dfilters
bias -= lr * dbias

# update FC layer
W_fc -= lr * dW_fc
b_fc -= lr * db_fc


## Output

In [17]:
print("Probabilities:", probs)
print("Predicted class:", prediction)
print("Loss:", loss)

Probabilities: [0.0072023 0.9927977]
Predicted class: 1
Loss: 0.007228358532233878
