In [1]:
import sys, os
sys.path.append('../../')
from common.gradient import numerical_gradient
from common.functions import softmax
import numpy as np

# Slope on ANN = The slope of the loss function with respect to the weight parameter

In [2]:
class simpleNet:
    def __init__(self):
        self.W = np.random.randn(2,3)

    def predict(self, x):
        return np.dot(x, self.W)

    def cross_entropy_error(self, y, t):
        if y.ndim == 1:
            t = t.reshape(1, t.size)
            y = y.reshape(1, y.size)
        
        # 훈련 데이터가 원-핫 벡터라면 정답 레이블의 인덱스로 반환
        if t.size == y.size:
            t = t.argmax(axis=1)
             
        batch_size = y.shape[0]
        return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size
    
    def loss(self, x, t):
        z = self.predict(x)
        y = softmax(z)
        return self.cross_entropy_error(y, z)

In [3]:
net = simpleNet()

In [4]:
print(net.W.shape)
net.W

(2, 3)


array([[-2.18388879, -0.31394738,  0.30983899],
       [-0.12552112, -0.84188114, -1.3445247 ]])

In [5]:
net.W = np.array([[0.47355232, 0.9977393, 0.84668094], [0.855557411, 0.03563661, 0.69422093]])
net.W

array([[0.47355232, 0.9977393 , 0.84668094],
       [0.85555741, 0.03563661, 0.69422093]])

In [6]:
x = np.array([0.6, 0.9])

In [7]:
p = net.predict(x)
p

array([1.05413306, 0.63071653, 1.1328074 ])

In [8]:
np.argmax(p)

2

In [9]:
t = np.array([0,0,1])

In [10]:
net.loss(x, t)

0.9280627939898086

# Get slope of the loss function with respect to the weight parameter

In [11]:
def f(W):
    return net.loss(x, t)

In [12]:
dW = numerical_gradient(f, net.W, print_status=True)
dW

x.shape = (2, 3) x.ndim = 2 
x = [[0.47355232 0.9977393  0.84668094]
 [0.85555741 0.03563661 0.69422093]]
idx = (0, 0) x+h = 0.47365232 f(x+h) = 0.9280847189552238
idx = (0, 0) x-h = 0.47345232000000004 f(x-h) = 0.9280408698591798
idx = (0, 1) x+h = 0.9978393 f(x+h) = 0.928077150639272
idx = (0, 1) x-h = 0.9976393 f(x-h) = 0.9280484379956203
idx = (0, 2) x+h = 0.84678094 f(x+h) = 0.9280265135502431
idx = (0, 2) x-h = 0.8465809400000001 f(x-h) = 0.9280990752899237
idx = (1, 0) x+h = 0.855657411 f(x+h) = 0.9280956817509809
idx = (1, 0) x-h = 0.8554574110000001 f(x-h) = 0.9280299081069064
idx = (1, 1) x+h = 0.03573661 f(x+h) = 0.9280843292097382
idx = (1, 1) x-h = 0.035536609999999996 f(x-h) = 0.9280412602442478
idx = (1, 2) x+h = 0.69432093 f(x+h) = 0.9280083736531699
idx = (1, 2) x-h = 0.69412093 f(x-h) = 0.9281172162626843


array([[ 0.21924548,  0.14356322, -0.3628087 ],
       [ 0.32886822,  0.21534483, -0.54421305]])

# Use lambda

In [13]:
f = lambda w: net.loss(x, t)
dW = numerical_gradient(f, net.W, print_status=True)
dW

x.shape = (2, 3) x.ndim = 2 
x = [[0.47355232 0.9977393  0.84668094]
 [0.85555741 0.03563661 0.69422093]]
idx = (0, 0) x+h = 0.47365232 f(x+h) = 0.9280847189552238
idx = (0, 0) x-h = 0.47345232000000004 f(x-h) = 0.9280408698591798
idx = (0, 1) x+h = 0.9978393 f(x+h) = 0.928077150639272
idx = (0, 1) x-h = 0.9976393 f(x-h) = 0.9280484379956203
idx = (0, 2) x+h = 0.84678094 f(x+h) = 0.9280265135502431
idx = (0, 2) x-h = 0.8465809400000001 f(x-h) = 0.9280990752899237
idx = (1, 0) x+h = 0.855657411 f(x+h) = 0.9280956817509809
idx = (1, 0) x-h = 0.8554574110000001 f(x-h) = 0.9280299081069064
idx = (1, 1) x+h = 0.03573661 f(x+h) = 0.9280843292097382
idx = (1, 1) x-h = 0.035536609999999996 f(x-h) = 0.9280412602442478
idx = (1, 2) x+h = 0.69432093 f(x+h) = 0.9280083736531699
idx = (1, 2) x-h = 0.69412093 f(x-h) = 0.9281172162626843


array([[ 0.21924548,  0.14356322, -0.3628087 ],
       [ 0.32886822,  0.21534483, -0.54421305]])