### Initialization

In [1]:
# For Colab only!

try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

In [1]:
import tensorflow as tf

In [28]:
import torch
from torch.nn import functional as F

In [3]:
print(tf.__version__)
print(tf.test.is_gpu_available())

2.1.0
Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
True


In [4]:
print(torch.__version__)
print(torch.cuda.is_available())

1.4.0
True


### MSE gradient

In [54]:
def one_hot(label, depth):
    out = torch.zeros(label.size(0), depth)
    idx = torch.LongTensor(label).view(-1, 1)
    out.scatter_(dim=1, index=idx, value=1)
    return out

In [62]:
# Example: [3,4] linear conversion ->[3,2]
#  y = x*w +c  x:[3,4] w:[4,2] b:[2], y:[3]
#  y one-hot depth = 2

x = tf.random.uniform([3,4])
w = tf.Variable(tf.random.uniform([4,2]))
b = tf.Variable(tf.zeros([2]))
y = tf.constant([0, 1, 1])

with tf.GradientTape() as tape:
#     tape.watch([w,b])
    logits = x @ w + b
    probs = tf.nn.softmax(logits)
    
    y_true = tf.one_hot(y, depth=2)
    
    losses = tf.losses.MSE(y_true,probs)
    loss = tf.reduce_mean(losses)
    
grads = tape.gradient(loss, [w,b])

grads_w = grads[0]
grads_b = grads[1]

print(grads[0])
print(grads[1])

print(logits)
print(probs)


tf.Tensor(
[[ 0.0189726  -0.01897261]
 [-0.02627433  0.02627433]
 [-0.02474528  0.02474528]
 [-0.05913349  0.05913348]], shape=(4, 2), dtype=float32)
tf.Tensor([ 0.05598413 -0.05598414], shape=(2,), dtype=float32)
tf.Tensor(
[[1.5465539  1.8689929 ]
 [0.6153563  0.8887136 ]
 [0.72412205 0.8128369 ]], shape=(3, 2), dtype=float32)
tf.Tensor(
[[0.42008144 0.57991856]
 [0.4320831  0.5679169 ]
 [0.4778358  0.52216417]], shape=(3, 2), dtype=float32)


In [63]:
# Example: [3,4] linear conversion ->[3,2]
#  y = x*w +c  x:[3,4] w:[4,2] b:[2], y:[3]
#  y one-hot depth = 2

x = torch.rand(3,4)
w = torch.rand([4,2], requires_grad=True)
b = torch.zeros([2], requires_grad=True)
y = torch.LongTensor([0, 1, 1])

logits = x @ w +b
probs = F.softmax(logits, dim = 1)

y_true = one_hot(y, depth=2)
loss = F.mse_loss(y_true, probs)
# grads = torch.autograd.grad(loss, [w, b])

# grads_w = grads[0]
# grads_b = grads[1]

loss.backward()

print(loss)
# print(grads[0])
# print(grads[1])

print(w.grad)
print(b.grad)

print(logits)
print(probs)



tensor(0.3485, grad_fn=<MeanBackward0>)
tensor([[ 0.0356, -0.0356],
        [ 0.0069, -0.0069],
        [ 0.1585, -0.1585],
        [ 0.0573, -0.0573]])
tensor([ 0.1570, -0.1570])
tensor([[1.4062, 0.4471],
        [1.4924, 0.4679],
        [0.9863, 0.3508]], grad_fn=<AddBackward0>)
tensor([[0.7229, 0.2771],
        [0.7359, 0.2641],
        [0.6537, 0.3463]], grad_fn=<SoftmaxBackward>)
