In [1]:
import tensorflow as tf

In [2]:
v = tf.Variable(0.0)

In [3]:
v

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.0>

In [4]:
v + 1

<tf.Tensor: id=13, shape=(), dtype=float32, numpy=1.0>

In [5]:
# change the value of the variable
v.assign(5)

<tf.Variable 'UnreadVariable' shape=() dtype=float32, numpy=5.0>

In [6]:
v.assign_add(1)

<tf.Variable 'UnreadVariable' shape=() dtype=float32, numpy=6.0>

In [7]:
# read the value of a variable
v.read_value()

<tf.Tensor: id=22, shape=(), dtype=float32, numpy=6.0>

In [8]:
w = tf.Variable([[1.0]])
# record the progress using GradientTape
with tf.GradientTape() as t:
    loss = w * w

In [9]:
grad = t.gradient(loss, w)

In [10]:
grad

<tf.Tensor: id=39, shape=(1, 1), dtype=float32, numpy=array([[2.]], dtype=float32)>

In [11]:
w = tf.constant(3.0)
with tf.GradientTape() as t:
    t.watch(w)
    loss = w * w
dw = t.gradient(loss, w)

In [12]:
dw

<tf.Tensor: id=46, shape=(), dtype=float32, numpy=6.0>

In [13]:
# set persistent to make it pesistent
w = tf.constant(3.0)
with tf.GradientTape(persistent=True) as t:
    t.watch(w)
    y = w * w
    z = y * y 
dy_dw = t.gradient(y, w)
dz_dw = t.gradient(z, w) 

In [14]:
dz_dw

<tf.Tensor: id=61, shape=(), dtype=float32, numpy=108.0>

In [15]:
# customized training

In [16]:
(train_img, train_label), _ = tf.keras.datasets.mnist.load_data()

In [17]:
train_label.shape

(60000,)

In [18]:
train_img = tf.expand_dims(train_img, -1)
train_img = tf.cast(train_img / 255, tf.float32)
train_label = tf.cast(train_label, tf.int64)

In [19]:
train_label.shape

TensorShape([60000])

In [20]:
dataset = tf.data.Dataset.from_tensor_slices((train_img, train_label))

In [21]:
dataset

<TensorSliceDataset shapes: ((28, 28, 1), ()), types: (tf.float32, tf.int64)>

In [22]:
dataset = dataset.shuffle(10000).batch(32)

In [23]:
dataset

<BatchDataset shapes: ((None, 28, 28, 1), (None,)), types: (tf.float32, tf.int64)>

In [24]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(16, [3, 3], input_shape=(None, None, 1), activation='relu'), 
    tf.keras.layers.Conv2D(32, [3, 3], activation='relu'),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(10)
])

In [25]:
optimizer = tf.keras.optimizers.Adam()
loss_func = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [26]:
feature, label = next(iter(dataset))

In [27]:
feature.shape

TensorShape([32, 28, 28, 1])

In [28]:
prediction = model(feature)

In [29]:
prediction.shape

TensorShape([32, 10])

In [30]:
# this is before training
tf.argmax(prediction, axis=1)

<tf.Tensor: id=203, shape=(32,), dtype=int64, numpy=
array([9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 3, 9,
       9, 9, 9, 9, 3, 9, 9, 9, 9, 9], dtype=int64)>

In [31]:
label

<tf.Tensor: id=185, shape=(32,), dtype=int64, numpy=
array([4, 5, 2, 4, 9, 4, 9, 9, 8, 9, 9, 8, 8, 2, 8, 9, 7, 9, 7, 8, 8, 1,
       7, 4, 1, 1, 0, 9, 7, 9, 1, 7], dtype=int64)>

In [32]:
def loss(model, x, y):
    y_ = model(x)
    return loss_func(y, y_)

In [33]:
# customize training
def train_step(model, images, labels):
    with tf.GradientTape() as t:
        loss = model(images, labels)
    grads = t.gradient(loss, model.trainable_variables) 
    optimizer.apply_gradients(zip(grads, model.trainable_variables))

In [34]:
def train():
    for epoch in range(3):
        for batch, (images, labels) in enumerate(dataset):
            train_step(model, images, labels)
        print("epoch {} with batch size of {} is completed. ".format(epoch, batch))

In [39]:
train()

epoch 0 with batch size of 1874 is completed. 
epoch 1 with batch size of 1874 is completed. 
epoch 2 with batch size of 1874 is completed. 
