In [None]:
#---------------------------
# 每一项导数是如何求的？
# 计算图的DAG遍历顺序？
# 非静态控制流中，计算图是怎么构建的？

# tf1->tf2: https://zhuanlan.zhihu.com/p/74858833
# 四种gradient：https://www.jianshu.com/p/4fe50a98d0c8, https://stackoverflow.com/questions/50098971/whats-the-difference-between-gradienttape-implicit-gradients-gradients-functi

In [None]:
#------- 卷积层准备

In [1]:
import tensorflow as tf

In [2]:
def corr2d(X, K):
    h, w = K.shape
    Y = tf.Variable(tf.zeros((X.shape[0] - h + 1, X.shape[1] - w +1)))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i,j].assign(tf.cast(tf.reduce_sum(X[i:i+h, j:j+w] * K), dtype=tf.float32))
    return Y

In [3]:
X = tf.constant([[0,1,2], [3,4,5], [6,7,8]])
K = tf.constant([[0,1], [2,3]])
corr2d(X, K)

<tf.Variable 'Variable:0' shape=(2, 2) dtype=float32, numpy=
array([[19., 25.],
       [37., 43.]], dtype=float32)>

In [None]:
#-------- 简单函数求导 in tf2

In [14]:
x = tf.reshape(tf.Variable(range(4), dtype = tf.float32), (4,1))
with tf.GradientTape() as t:
    t.watch(x)
    y = tf.matmul(tf.transpose(x), x)
    dy_dx = t.gradient(y, [x])
dy_dx

[<tf.Tensor: shape=(4, 1), dtype=float32, numpy=
 array([[0.],
        [2.],
        [4.],
        [6.]], dtype=float32)>]

In [12]:
x = tf.reshape(tf.Variable(range(4), dtype = tf.float32), (4,1))
z = tf.reshape(tf.Variable(range(4), dtype = tf.float32), (4,1))
with tf.GradientTape() as t:
    t.watch(x)
    t.watch(z)
    y = tf.matmul(tf.transpose(z), x)
    dy_dxz = t.gradient(y, [x, z])

In [13]:
dy_dxz

[<tf.Tensor: shape=(4, 1), dtype=float32, numpy=
 array([[0.],
        [1.],
        [2.],
        [3.]], dtype=float32)>,
 <tf.Tensor: shape=(4, 1), dtype=float32, numpy=
 array([[0.],
        [1.],
        [2.],
        [3.]], dtype=float32)>]

In [None]:
# https://trickygo.github.io/Dive-into-DL-TensorFlow2.0/#/chapter07_optimization/7.3_minibatch-sgd
# https://stackoverflow.com/questions/37921781/what-does-opt-apply-gradients-do-in-tensorflow

In [22]:
train_X = tf.Variable([2.3, 1.4, 5.9, 3.6])
train_Y = tf.Variable([4.8, 3.0, 12.0, 7.4])

print (train_X)
print (train_Y)

<tf.Variable 'Variable:0' shape=(4,) dtype=float32, numpy=array([2.3, 1.4, 5.9, 3.6], dtype=float32)>
<tf.Variable 'Variable:0' shape=(4,) dtype=float32, numpy=array([ 4.8,  3. , 12. ,  7.4], dtype=float32)>


In [24]:
import tensorflow as tf
import numpy as np
# tf.enable_eager_exeuction()

# W = tf.Variable(np.random.randn())
# b = tf.Variable(np.random.randn())

W = tf.Variable(0.0)
b = tf.Variable(0.0)

def linear_regression(inputs):
    return inputs * W + b;

#  net = tf.keras.Sequential()
#  net.add(tf.keras.layers.Dense(1))

def MSE(model_fn, inputs, labels):
    return tf.reduce_sum(tf.pow(model_fn(inputs) - labels, 2)) / (2 * inputs.shape[0])

# loss = tf.losses.MeanSquaredError()

# optimizer = tf.optimizers.SGD(learning_rate = 0.001)
optimizer = tf.optimizers.SGD(learning_rate = 0.1)

with tf.GradientTape() as tape:
    loss = MSE(linear_regression, train_X, train_Y)
    grads = tape.gradient(loss, [W, b])

In [25]:
grads

[<tf.Tensor: shape=(), dtype=float32, numpy=-28.17>,
 <tf.Tensor: shape=(), dtype=float32, numpy=-6.7999997>]

In [26]:
optimizer.apply_gradients(zip(grads, [W,b]))

<tf.Variable 'UnreadVariable' shape=() dtype=int64, numpy=1>

In [27]:
print(W)
print(b)

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.8170002>
<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.68>


In [None]:
# 1) x 一步到位，只能保证方向性
# 2）x 学不到直接的函数关系？？