# Week 8 Quiz

In [12]:
import os
import cv2
import math
import scipy
import skimage
import slgbuilder
import numpy as np
from skimage import io
import matplotlib.pyplot as plt

Consider a classification network

![](ANN_week8.png)

with ReLU activation in the hidden layer and softmax in the last layer. We pass the
input value $x_1 = 120$ through the network.

## 1)


What is the value of $y_1$, i.e. the predicted probability that $x_1 = 120$ belongs to
the class 1?

In [27]:
x = np.array([
    [1],
    [120]
])
x

array([[  1],
       [120]])

In [28]:
W1 = np.array([
    [10, -0.01],
    [0, 0.1]
])
W1

array([[10.  , -0.01],
       [ 0.  ,  0.1 ]])

In [29]:
W2 = np.array([
    [4, 0.05, 0],
    [0, -0.01, 0.2]
])
W2

array([[ 4.  ,  0.05,  0.  ],
       [ 0.  , -0.01,  0.2 ]])

In [35]:
h = np.maximum(W1@x, 0)
h = np.vstack([[1], h])
h

array([[ 1. ],
       [ 8.8],
       [12. ]])

In [36]:
y_hat = W2@h
y_hat

array([[4.44 ],
       [2.312]])

In [37]:
def softmax(y_hat):
    return np.exp(y_hat)/np.sum(np.exp(y_hat))

def softmax_math(y_hat):
    y1_hat = math.exp(y_hat[0])/(math.exp(y_hat[0]) + math.exp(y_hat[1]))
    y2_hat = math.exp(y_hat[1])/(math.exp(y_hat[0]) + math.exp(y_hat[1]))

    return np.array([[y1_hat], [y2_hat]])

y = softmax(y_hat)
y

array([[0.89359499],
       [0.10640501]])

Probability that $x_1 = 120$ belongs to class 1 is approx. 0.13 i.e. $13\%$

## 2)
We know that $x_1 = 120$ should belong to the class 2. What is the loss for this input value? We use cross entropy loss function (with the natural logarithm).

In [53]:
target = np.array([0,1])

cross_entropy_loss = -np.sum(target * np.log(y), axis=1)
cross_entropy_loss

array([0.11250264, 2.24050264])

In [55]:
print(f'x1 = 120 loss for class 2 i.e. y2 is {cross_entropy_loss[1]}')

x1 = 120 loss for class 2 i.e. y2 is 2.2405026355372013


## 3)
We back-propagate the loss for the input $x_1 = 120$. What is the partial derivative
$$
\frac{\partial L}{\partial w_{22}^{(2)}} = \delta_2^{(2)}h_2^{(2-1)}
$$
where in the general form
$$
\frac{\partial L}{\partial w_{ij}^{(l)}} = \delta_i^{(l)}h_j^{(l-1)}
$$
which we will need to update $w^{(2)}_{22}$? The weight $w^{(2)}_{22}$ is drawn as the edge having the value 0.2 before update.

For the last layer the derivative of $L$ with respect to $z_i^{(l^*)}$ i.e. $\delta_i^{(l)}$ is 
$$
\delta_i^{(l^*)} = -t_i + \frac{1}{\sum_k \exp z_k^{(l^*)}} \exp z_i^{(l^*)} = y_i - t_i
$$

In [56]:
delta_2 = y[1] - target[1]
delta_2

array([-0.89359499])

In [57]:
dL = delta_2 * h[2]
dL

array([-10.7231399])

## Solution

In [58]:
import math

x = 120

# hidden layer with ReLu
h1 = max(10 - 0.01*x, 0)
h2 = max(0.1*x, 0)
print(f'h1: {h1} and h2: {h2}')

# output layer
y1_hat = 4 + 0.05*h1
y2_hat = -0.01*h1 + 0.2*h2
print(f'y1_hat: {y1_hat} and y2_hat: {y2_hat}')

# softmax
y1 = math.exp(y1_hat)/(math.exp(y1_hat)+math.exp(y2_hat))
y2 = math.exp(y2_hat)/(math.exp(y1_hat)+math.exp(y2_hat))

print(f'y1: {y1} and y2: {y2}')

# loss
L = -math.log(y2) # since we know it belongs to class 2

print(f'Loss: {L}')

# partial derivative, we know that t2 = 1
dLdw22 = h2 * (y2-1)

print(f'dLw22: {dLdw22}')

h1: 8.8 and h2: 12.0
y1_hat: 4.44 and y2_hat: 2.3120000000000003
y1: 0.8935949920025679 and y2: 0.1064050079974321
Loss: 2.2405026355372013
dLw22: -10.723139904030814
