In [3]:
import numpy as np
import tensorflow as tf
print(tf.__version__)

1.8.0


In [35]:
def sigmoid(X):
    return 1 / (1 + np.exp(-X))

## Calculate gradient for one sample manually

In [6]:
x = np.array([[3,4,8]]).astype(np.float32)
W_A = np.array([[0.2, 0.3],
              [0.1, 0.2],
              [0.4, 0.5]]).astype(np.float32)

In [7]:
z = np.matmul(x, W_A)
z

array([[4.2, 5.7]], dtype=float32)

In [361]:
W_B = np.array([
    [0.1,-0.3],
    [0.2,0.1]]).astype(np.float32)
logits = np.matmul(z, W_B)
logits

array([[ 1.56, -0.69]], dtype=float32)

In [362]:
y = sigmoid(logits)
y

array([[0.8263534 , 0.33403307]], dtype=float32)

In [363]:
grad = y*(1 - y)
grad

array([[0.14349347, 0.22245498]], dtype=float32)

In [364]:
back_grad = np.matmul(grad, np.transpose(W_B))
back_grad

array([[-0.05238715,  0.05094419]], dtype=float32)

In [367]:
grad_WA = np.outer(x, back_grad)
grad_WA

array([[-0.15716144,  0.15283258],
       [-0.2095486 ,  0.20377678],
       [-0.4190972 ,  0.40755355]], dtype=float32)

## Calculate gradient for one sample automatically

In [22]:
x = np.array([[3, 4, 8]]).astype(np.float32)
W_A_init = np.array([[0.2, 0.3],
                     [0.1, 0.2],
                     [0.4, 0.5]]).astype(np.float32)
# W_A = tf.Variable(tf.random_normal([3, 2]))
W_A = tf.Variable(W_A_init)

In [23]:
z = tf.matmul(x, W_A)

In [24]:
W_B_init = np.array([
    [0.1, -0.3],
    [0.2, 0.1]]).astype(np.float32)
# W_B = tf.Variable(tf.random_normal([2, 3]))
W_B = tf.Variable(W_B_init)
logits = tf.matmul(z, W_B)

In [25]:
y = tf.sigmoid(logits)
y_sum = tf.reduce_sum(y)
y_mean = tf.reduce_mean(y)

In [26]:
g_logits = tf.gradients(y, logits)
g_logits_sum = tf.gradients(y_sum, logits)
g_logits_mean = tf.gradients(y_mean, logits)
grad_WA = tf.gradients(y, W_A)

In [29]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print("logits:", logits.eval())
    print("y:", sess.run(y))
    print("y_sum:", sess.run(y_sum))
    print("g_logits: \n", sess.run(g_logits))
    print("g_logits_sum: \n", sess.run(g_logits_sum))
    print("g_logits_mean: \n", sess.run(g_logits_mean))
    print("grad_WA :\n", sess.run(grad_WA))

logits: [[ 1.56 -0.69]]
y: [[0.8263534  0.33403307]]
y_sum: 1.1603864
g_logits: 
 [array([[0.14349347, 0.22245498]], dtype=float32)]
g_logits_sum: 
 [array([[0.14349347, 0.22245498]], dtype=float32)]
g_logits_mean: 
 [array([[0.07174674, 0.11122749]], dtype=float32)]
grad_WA :
 [array([[-0.15716144,  0.15283258],
       [-0.2095486 ,  0.20377678],
       [-0.4190972 ,  0.40755355]], dtype=float32)]


## Calculate gradient for two samples manually

In [30]:
x = np.array([[3, 4, 8],
              [1, 2, 3]]).astype(np.float32)

W_A = np.array([[0.2, 0.3],
                [0.1, 0.2],
                [0.4, 0.5]]).astype(np.float32)

x

array([[3., 4., 8.],
       [1., 2., 3.]], dtype=float32)

In [31]:
z = np.matmul(x, W_A)
z

array([[4.2, 5.7],
       [1.6, 2.2]], dtype=float32)

In [32]:
W_B = np.array([
    [0.1, -0.3],
    [0.2, 0.1]]).astype(np.float32)
logits = np.matmul(z, W_B)
logits

array([[ 1.56, -0.69],
       [ 0.6 , -0.26]], dtype=float32)

In [36]:
y = sigmoid(logits)
y

array([[0.8263534 , 0.33403307],
       [0.6456563 , 0.4353637 ]], dtype=float32)

In [38]:
grad = y*(1 - y)
grad_mean = np.mean(grad, axis=0)
print("grad: \n", grad)
print("grad_mean: \n", grad_mean)

grad: 
 [[0.14349347 0.22245498]
 [0.22878425 0.24582215]]
grad_mean: 
 [0.18613887 0.23413856]


In [39]:
back_grad = np.matmul(grad, np.transpose(W_B))
back_grad_mean = np.matmul(grad_mean, np.transpose(W_B))
print("back_grad: \n", back_grad)
print("back_grad_mean: \n", back_grad_mean)

back_grad: 
 [[-0.05238715  0.05094419]
 [-0.05086822  0.07033907]]
back_grad_mean: 
  [-0.05162768  0.06064163]


In [40]:
print(x.shape)
print(x[0].shape)
g_acc = 0
for i in range(x.shape[0]):
     g_acc += np.outer(x[i], back_grad[i])
        
print(g_acc)

(2, 3)
(3,)
[[-0.20802966  0.22317165]
 [-0.31128502  0.3444549 ]
 [-0.5717019   0.61857074]]


## Calculate gradient for two samples automatically

In [16]:
x = np.array([[3,4,8],
              [1,2,3]]).astype(np.float32)
V_A = np.array([[0.2, 0.3],
              [0.1, 0.2],
              [0.4, 0.5]]).astype(np.float32)
# W_A = tf.Variable(tf.random_normal([3, 2]))
W_A = tf.Variable(V_A)

In [17]:
z = tf.matmul(x, W_A)

In [18]:
V_B = np.array([
    [0.1,-0.3],
    [0.2,0.1]]).astype(np.float32)
# W_B = tf.Variable(tf.random_normal([2, 3]))
W_B = tf.Variable(V_B)
logits = tf.matmul(z, W_B)

In [19]:
y = tf.sigmoid(logits)
y_sum = tf.reduce_sum(y)
y_mean = tf.reduce_mean(y)

In [20]:
g1 = tf.gradients(y, logits)
g_sum = tf.gradients(y_sum, logits)
g_mean = tf.gradients(y_mean, logits)

In [22]:
grad_WA = tf.gradients(y, W_A)

In [24]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print("logits:", logits.eval())
    print("y:", sess.run(y))
    print("y_sum:", sess.run(y_sum))
    print("g:    ", sess.run(g1))
    print("g sum:", sess.run(g_sum))
    print("g mean:", sess.run(g_mean))
    print("g WA:", sess.run(grad_WA))

logits: [[ 1.56 -0.69]
 [ 0.6  -0.26]]
y: [[0.8263534  0.33403307]
 [0.6456563  0.4353637 ]]
y_sum: 2.2414064
g:     [array([[0.14349347, 0.22245498],
       [0.22878425, 0.24582215]], dtype=float32)]
g sum: [array([[0.14349347, 0.22245498],
       [0.22878425, 0.24582215]], dtype=float32)]
g mean: [array([[0.03587337, 0.05561375],
       [0.05719606, 0.06145554]], dtype=float32)]
g WA: [array([[-0.20802966,  0.22317165],
       [-0.31128502,  0.3444549 ],
       [-0.5717019 ,  0.61857074]], dtype=float32)]


## combine

In [428]:
x = np.array([[3,4,8]]).astype(np.float32)
V_A = np.array([[0.2, 0.3],
              [0.1, 0.2],
              [0.4, 0.5]]).astype(np.float32)
# W_A = tf.Variable(tf.random_normal([3, 2]))
W_A = tf.Variable(V_A)

In [429]:
z = tf.matmul(x, W_A)

In [430]:
W_B = np.array([
    [0.1,-0.3],
    [0.2,0.1]]).astype(np.float32)
logits = tf.matmul(z, W_B)
logits

<tf.Tensor 'MatMul_71:0' shape=(1, 2) dtype=float32>

In [431]:
back_grad = np.array([[-0.05238715,  0.05094419]]).astype(np.float32)
back_grad_tensor = tf.convert_to_tensor(back_grad)

In [432]:
g1 = tf.gradients(z, W_A, grad_ys=back_grad_tensor)
# , grad_ys=back_grad

In [433]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print(logits.eval())
    print(sess.run(g1))
#     print(sess.run(back_grad[0]))

[[ 1.56 -0.69]]
[array([[-0.15716144,  0.15283257],
       [-0.2095486 ,  0.20377676],
       [-0.4190972 ,  0.40755352]], dtype=float32)]


In [74]:
x = np.array([[3,4,8],
              [1,2,3]]).astype(np.float32)
V_A = np.array([[0.2, 0.3],
              [0.1, 0.2],
              [0.4, 0.5]]).astype(np.float32)
# W_A = tf.Variable(tf.random_normal([3, 2]))
W_A = tf.Variable(V_A)

In [75]:
z = tf.matmul(x, W_A)

In [76]:
W_B = np.array([
    [0.1,-0.3],
    [0.2,0.1]]).astype(np.float32)
logits = tf.matmul(z, W_B)
logits

<tf.Tensor 'MatMul_5:0' shape=(2, 2) dtype=float32>

In [77]:
back_grad = np.array([[-0.05238715, 0.05094419], [-0.05086822, 0.07033907]]).astype(np.float32)
back_grad_tensor = tf.convert_to_tensor(back_grad)

In [78]:
g1 = tf.gradients(z, W_A, grad_ys=back_grad_tensor)

In [79]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print(logits.eval())
    print(sess.run(g1))
#     print(sess.run(back_grad[0]))

[[ 1.56 -0.69]
 [ 0.6  -0.26]]
[array([[-0.20802966,  0.22317164],
       [-0.31128502,  0.34445488],
       [-0.5717019 ,  0.61857074]], dtype=float32)]
