In [96]:
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [97]:
import numpy as np

### Gradient check for linear layer

In [177]:
X = np.random.normal(loc=0.5,scale=0.5,size = (50,20))
input_grad = np.eye(20)
weights = np.random.normal(size=(20,20))
bias = np.random.normal(20)

def forward(X):
    Y = X.dot(weights.T) + bias
    return Y

def backward(input_grad):
    return input_grad.dot(weights)

eps = 1e-4
num_grad = np.zeros(shape=(Y.shape[1],X.shape[1]))
for l in xrange(X.shape[0]):
    num_grad_per_object = np.zeros(shape=(X.shape[1],Y.shape[1]))
    for i in xrange(Y.shape[1]):
        for j in xrange(X.shape[1]):
            X[l,j] += eps
            Y_plus = forward(X)
            X[l,j] -= 2*eps
            Y_minus = forward(X)
            X[l,j] += eps
            num_grad_per_object[i,j] = ((Y_plus[l,i] - Y_minus[l,i])/(2.*eps))
    num_grad += (1./X.shape[0])*num_grad_per_object

    
print "Frobenius norm is equal to:",np.linalg.norm(num_grad - backward(input_grad))    

Frobenius norm is equal to: 1.94882138933e-11


### Gradient check for other "Hadamar product"-layers (sigmoid, tanh,relu)

In [172]:
def sigmoid(x):
    return 1./(1. + np.exp(-x))

def forward(x):
    return sigmoid(x)

def backward(x,input_grad):
    return input_grad*sigmoid(x)*(1-sigmoid(x))

In [173]:
X = np.random.normal(loc=0.5,scale=0.5,size = (50,30))
input_grad = np.ones(shape=X.shape)
eps = 1e-4 

num_grad = np.zeros(shape=X.shape)
for i in xrange(X.shape[0]):
    for j in xrange(X.shape[1]):
        X[i,j] += eps
        Y_plus = forward(X)
        X[i,j] -= 2*eps
        Y_minus = forward(X)
        X[i,j] += eps
        num_grad[i,j] = (Y_plus[i,j] - Y_minus[i,j])/(2.*eps)

np.linalg.norm(num_grad - backward(X,input_grad))

5.8727581533706499e-09

### Softmax and LogSoftmax

In [174]:
def softmax(x):
    return np.exp(x)/np.sum(np.exp(x),axis=1,keepdims=True)

def forward(x):
    return softmax(x)

def backward(x,input_grad):
    return input_grad*softmax(x) - softmax(x)*np.sum(input_grad*softmax(x),axis=1,keepdims=True)

In [175]:
X = np.random.normal(loc=0.5,scale=0.5,size = (50,30))
eps = 1e-4 

num_grad = np.zeros(shape=(X.shape[1],X.shape[0],X.shape[1]))
an_grad = np.zeros(shape=(X.shape[1],X.shape[0],X.shape[1]))

for i in xrange(X.shape[1]):
    input_grad = np.zeros(shape=X.shape)
    input_grad[:,i] = np.ones(shape=input_grad.shape[0])
    an_grad[i,:,:] = backward(X,input_grad)
    for l in xrange(X.shape[0]):
        for j in xrange(X.shape[1]):
            X[l,j] += eps
            Y_plus = forward(X)
            X[l,j] -= 2*eps
            Y_minus = forward(X)
            num_grad[i,l,j] = (Y_plus[l,i] - Y_minus[l,i])/(2.*eps)
            X[l,j] += eps



### Gradient for MSE, CrossEntropy and NLL

In [193]:
def forward(X,T):
    return -np.sum(T*np.log(X))
def backward(X,T):
    return -T/X

In [194]:
X = np.random.uniform(low=1e-8,high = 1.0,size = (50,30))
T = 1.*(X>=np.max(X,axis=1,keepdims=True))
eps = 1e-4 

num_grad = np.zeros(shape=(X.shape[0],X.shape[1]))

for i in xrange(X.shape[0]):
    for j in xrange(X.shape[1]):
        X[i,j] += eps
        Y_plus = forward(X,T)
        X[i,j] -= 2*eps
        Y_minus = forward(X,T)
        X[i,j] += eps
        num_grad[i,j] = (Y_plus - Y_minus)/(2.*eps)

In [195]:
an_grad = backward(X,T)
np.linalg.norm(num_grad - an_grad)

2.6163998429313549e-08

10.267221358828609