学习 [简单粗暴 TensorFlow 2](https://tf.wiki/zh_hans/) (github页 https://github.com/snowkylin/tensorflow-handbook)

In [34]:
import numpy as np
import tensorflow as tf
import datetime
import os 
import sys
import pandas as pd

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pd.options.display.max_columns = None
pd.options.display.max_colwidth = 80
pd.options.display.precision = 4
pd.options.display.max_rows = 999
pd.options.display.float_format = '{:.4f}'.format  # 防止科学计数法，小数显示4位

# gpu_devices = tf.config.list_physical_devices('GPU') 
# if gpu_devices:
#     for gpu_device in gpu_devices:
#         tf.config.experimental.set_memory_growth(gpu_device, True)

os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [35]:
# 定义一个随机数（标量）
random_float = tf.random.uniform(shape=())

# 定义一个有2个元素的零向量
zero_vector = tf.zeros(shape=(2))

In [36]:
random_float

<tf.Tensor: shape=(), dtype=float32, numpy=0.9093815>

In [37]:
zero_vector

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([0., 0.], dtype=float32)>

In [38]:
# 定义两个2×2的常量矩阵
A = tf.constant([[1., 2.], [3., 4.]])
B = tf.constant([[5., 6.], [7., 8.]])

In [39]:
print(A.shape)
print(A.dtype)
print(A.numpy())

(2, 2)
<dtype: 'float32'>
[[1. 2.]
 [3. 4.]]


In [40]:
C = tf.add(A, B)
D = tf.matmul(A, B)

In [41]:
C
D

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[ 6.,  8.],
       [10., 12.]], dtype=float32)>

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[19., 22.],
       [43., 50.]], dtype=float32)>

In [42]:
x = tf.Variable(initial_value=3.)
with tf.GradientTape() as tape:     # 在 tf.GradientTape() 的上下文内，所有计算步骤都会被记录以用于求导
    y = tf.square(x)
y_grad = tape.gradient(y, x)        # 计算y关于x的导数
print(y, y_grad, sep='\n')

tf.Tensor(9.0, shape=(), dtype=float32)
tf.Tensor(6.0, shape=(), dtype=float32)


In [43]:
X = tf.constant([[1., 2.], [3., 4.]])
y = tf.constant([[1.], [2.]])
w = tf.Variable(initial_value=[[1.], [2.]])
b = tf.Variable(initial_value=1.)
with tf.GradientTape() as tape:
    L = tf.reduce_sum(tf.square(tf.matmul(X, w) + b - y))
w_grad, b_grad = tape.gradient(L, [w, b])        # 计算L(w, b)关于w, b的偏导数
print(L, w_grad, b_grad, sep = '\n')

tf.Tensor(125.0, shape=(), dtype=float32)
tf.Tensor(
[[ 70.]
 [100.]], shape=(2, 1), dtype=float32)
tf.Tensor(30.0, shape=(), dtype=float32)


In [44]:

X_raw = np.array([2013, 2014, 2015, 2016, 2017], dtype=np.float32)
y_raw = np.array([12000, 14000, 15000, 16500, 17500], dtype=np.float32)

X = (X_raw - X_raw.min()) / (X_raw.max() - X_raw.min())
y = (y_raw - y_raw.min()) / (y_raw.max() - y_raw.min())

In [45]:
X
y

array([0.  , 0.25, 0.5 , 0.75, 1.  ], dtype=float32)

array([0.        , 0.36363637, 0.54545456, 0.8181818 , 1.        ],
      dtype=float32)

### 使用Numpy线性回归

In [46]:
a, b = 0, 0

num_epoch = 10000
learning_rate = 5e-4
for e in range(num_epoch):
    # 手动计算损失函数关于自变量（模型参数）的梯度
    y_pred = a * X + b
    grad_a, grad_b = 2 * (y_pred - y).dot(X), 2 * (y_pred - y).sum()

    # 更新参数
    a, b = a - learning_rate * grad_a, b - learning_rate * grad_b

print(a, b)

0.9763702027872221 0.057564988311377796


### 尝试Tensorflow线性回归

In [47]:
X = tf.constant(X)
y = tf.constant(y)

a = tf.Variable(initial_value=0.)
b = tf.Variable(initial_value=0.)
variables = [a, b]

num_epoch = 10000
optimizer = tf.keras.optimizers.SGD(learning_rate=5e-4)
for e in range(num_epoch):
    # 使用tf.GradientTape()记录损失函数的梯度信息
    with tf.GradientTape() as tape:
        y_pred = a * X + b
        loss = tf.reduce_sum(tf.square(y_pred - y))
    # TensorFlow自动计算损失函数关于自变量（模型参数）的梯度
    grads = tape.gradient(loss, variables)
    # TensorFlow自动根据梯度更新参数
    _ = optimizer.apply_gradients(grads_and_vars=zip(grads, variables))

In [48]:
variables

[<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.97637>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.057565063>]

In [49]:
c = 1
d = 1 
alist = [c, d]

In [50]:
alist

[1, 1]

In [51]:
d = 2
alist

[1, 1]

### Model与Layer

In [52]:
import tensorflow as tf

X = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
y = tf.constant([[10.0], [20.0]])


class Linear(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.dense = tf.keras.layers.Dense(
            units=1,
            activation=None,
            kernel_initializer=tf.zeros_initializer(),
            bias_initializer=tf.zeros_initializer()
        )

    def call(self, input):
        output = self.dense(input)
        return output


# 以下代码结构与前节类似
model = Linear()
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)
for i in range(10000):
    with tf.GradientTape() as tape:
        y_pred = model(X)      # 调用模型 y_pred = model(X) 而不是显式写出 y_pred = a * X + b
        loss = tf.reduce_mean(tf.square(y_pred - y))
    grads = tape.gradient(loss, model.variables)    # 使用 model.variables 这一属性直接获得模型中的所有变量
    _ = optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))
    if i % 1000 == 0:
        print("""%s, %s""" % (i, loss))
        print(model.variables)
print(model.variables)

0, tf.Tensor(250.0, shape=(), dtype=float32)
[<tf.Variable 'linear_1/dense_7/kernel:0' shape=(3, 1) dtype=float32, numpy=
array([[0.9      ],
       [1.1999999],
       [1.5      ]], dtype=float32)>, <tf.Variable 'linear_1/dense_7/bias:0' shape=(1,) dtype=float32, numpy=array([0.29999998], dtype=float32)>]
1000, tf.Tensor(1.8959781e-08, shape=(), dtype=float32)
[<tf.Variable 'linear_1/dense_7/kernel:0' shape=(3, 1) dtype=float32, numpy=
array([[1.4300377e-04],
       [1.1111389e+00],
       [2.2221353e+00]], dtype=float32)>, <tf.Variable 'linear_1/dense_7/bias:0' shape=(1,) dtype=float32, numpy=array([1.1109966], dtype=float32)>]
2000, tf.Tensor(3.092282e-11, shape=(), dtype=float32)
[<tf.Variable 'linear_1/dense_7/kernel:0' shape=(3, 1) dtype=float32, numpy=
array([[4.8735565e-06],
       [1.1111153e+00],
       [2.2222164e+00]], dtype=float32)>, <tf.Variable 'linear_1/dense_7/bias:0' shape=(1,) dtype=float32, numpy=array([1.1111081], dtype=float32)>]
3000, tf.Tensor(3.092282e-11, sha

### 基础示例：多层感知机(MLP)

In [53]:
class MNISTLoader():
    def __init__(self):
        mnist = tf.keras.datasets.mnist
        (self.train_data, self.train_label), (self.test_data, self.test_label) = mnist.load_data()
        # MNIST中的图像默认为uint8（0-255的数字）。以下代码将其归一化到0-1之间的浮点数，并在最后增加一维作为颜色通道
        self.train_data = np.expand_dims(self.train_data.astype(np.float32) / 255.0, axis=-1)      # [60000, 28, 28, 1]
        self.test_data = np.expand_dims(self.test_data.astype(np.float32) / 255.0, axis=-1)        # [10000, 28, 28, 1]
        self.train_label = self.train_label.astype(np.int32)    # [60000]
        self.test_label = self.test_label.astype(np.int32)      # [10000]
        self.num_train_data, self.num_test_data = self.train_data.shape[0], self.test_data.shape[0]

    def get_batch(self, batch_size):
        # 从数据集中随机取出batch_size个元素并返回
        index = np.random.randint(0, self.num_train_data, batch_size)
        return self.train_data[index, :], self.train_label[index]

In [54]:
class MLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.flatten = tf.keras.layers.Flatten()    # Flatten层将除第一维（batch_size）以外的维度展平
        self.dense1 = tf.keras.layers.Dense(units=100, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(units=10)

    def call(self, inputs):         # [batch_size, 28, 28, 1]
        x = self.flatten(inputs)    # [batch_size, 784]
        x = self.dense1(x)          # [batch_size, 100]
        x = self.dense2(x)          # [batch_size, 10]
        output = tf.nn.softmax(x)
        return output

In [55]:
num_epochs = 5
batch_size = 5000
learning_rate = 0.001

In [56]:
model = MLP()
data_loader = MNISTLoader()
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

In [57]:
num_batches = int(data_loader.num_train_data // batch_size * num_epochs)
for batch_index in range(num_batches):
    X, y = data_loader.get_batch(batch_size)
    with tf.GradientTape() as tape:
        y_pred = model(X)
        loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=y, y_pred=y_pred)
        loss = tf.reduce_mean(loss)
        print("batch %d: loss %f" % (batch_index, loss.numpy()))
    grads = tape.gradient(loss, model.variables)
    _ = optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))

batch 0: loss 2.329207
batch 1: loss 2.209703
batch 2: loss 2.084824
batch 3: loss 1.975706
batch 4: loss 1.868010
batch 5: loss 1.765895
batch 6: loss 1.672412
batch 7: loss 1.571534
batch 8: loss 1.477891
batch 9: loss 1.412329
batch 10: loss 1.317883
batch 11: loss 1.240054
batch 12: loss 1.160698
batch 13: loss 1.085559
batch 14: loss 1.037410
batch 15: loss 0.981626
batch 16: loss 0.933554
batch 17: loss 0.878644
batch 18: loss 0.837373
batch 19: loss 0.795795
batch 20: loss 0.743644
batch 21: loss 0.742164
batch 22: loss 0.691802
batch 23: loss 0.657179
batch 24: loss 0.650451
batch 25: loss 0.614053
batch 26: loss 0.605872
batch 27: loss 0.566973
batch 28: loss 0.592695
batch 29: loss 0.542336
batch 30: loss 0.536893
batch 31: loss 0.505649
batch 32: loss 0.519813
batch 33: loss 0.493039
batch 34: loss 0.483567
batch 35: loss 0.471591
batch 36: loss 0.451677
batch 37: loss 0.469772
batch 38: loss 0.460688
batch 39: loss 0.446727
batch 40: loss 0.445746
batch 41: loss 0.422504
ba

In [58]:
num_epochs = 5
batch_size = 5000
learning_rate = 0.001

In [59]:
model = MLP()
data_loader = MNISTLoader()
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

In [60]:
num_batches = int(data_loader.num_train_data // batch_size * num_epochs)
for batch_index in range(num_batches):
    X, y = data_loader.get_batch(batch_size)
    with tf.GradientTape() as tape:
        y_pred = model(X)
        loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=y, y_pred=y_pred)
        loss = tf.reduce_mean(loss)
        print("batch %d: loss %f" % (batch_index, loss.numpy()))
    grads = tape.gradient(loss, model.variables)
    _ = optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))

batch 0: loss 2.355918
batch 1: loss 2.229244
batch 2: loss 2.113219
batch 3: loss 2.002385
batch 4: loss 1.904586
batch 5: loss 1.803965
batch 6: loss 1.710389
batch 7: loss 1.623107
batch 8: loss 1.532561
batch 9: loss 1.451877
batch 10: loss 1.361400
batch 11: loss 1.287200
batch 12: loss 1.202418
batch 13: loss 1.146919
batch 14: loss 1.081165
batch 15: loss 1.003534
batch 16: loss 0.963668
batch 17: loss 0.899036
batch 18: loss 0.875277
batch 19: loss 0.814417
batch 20: loss 0.791860
batch 21: loss 0.759517
batch 22: loss 0.715120
batch 23: loss 0.701559
batch 24: loss 0.683164
batch 25: loss 0.651570
batch 26: loss 0.619663
batch 27: loss 0.587273
batch 28: loss 0.593463
batch 29: loss 0.556539
batch 30: loss 0.553421
batch 31: loss 0.524289
batch 32: loss 0.523962
batch 33: loss 0.489198
batch 34: loss 0.509384
batch 35: loss 0.504030
batch 36: loss 0.487321
batch 37: loss 0.465456
batch 38: loss 0.481739
batch 39: loss 0.461579
batch 40: loss 0.455148
batch 41: loss 0.438359
ba

In [61]:
sparse_categorical_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
num_batches = int(data_loader.num_test_data // batch_size)
for batch_index in range(num_batches):
    start_index, end_index = batch_index * batch_size, (batch_index + 1) * batch_size
    y_pred = model.predict(data_loader.test_data[start_index: end_index])
    sparse_categorical_accuracy.update_state(y_true=data_loader.test_label[start_index: end_index], y_pred=y_pred)
print("test accuracy: %f" % sparse_categorical_accuracy.result())

<tf.Variable 'UnreadVariable' shape=() dtype=float32, numpy=5000.0>

<tf.Variable 'UnreadVariable' shape=() dtype=float32, numpy=10000.0>

test accuracy: 0.905800


### 卷积神经网络

#### 使用Keras实现卷积神经网络  

In [62]:
class CNN(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.conv1 = tf.keras.layers.Conv2D(
            filters=32,             # 卷积层神经元（卷积核）数目
            kernel_size=[5, 5],     # 感受野大小
            padding='same',         # padding策略（vaild 或 same）
            activation=tf.nn.relu   # 激活函数
        )
        self.pool1 = tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2)
        self.conv2 = tf.keras.layers.Conv2D(
            filters=64,
            kernel_size=[5, 5],
            padding='same',
            activation=tf.nn.relu
        )
        self.pool2 = tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2)
        self.flatten = tf.keras.layers.Reshape(target_shape=(7 * 7 * 64,))
        self.dense1 = tf.keras.layers.Dense(units=1024, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(units=10)

    def call(self, inputs):
        x = self.conv1(inputs)                  # [batch_size, 28, 28, 32]
        x = self.pool1(x)                       # [batch_size, 14, 14, 32]
        x = self.conv2(x)                       # [batch_size, 14, 14, 64]
        x = self.pool2(x)                       # [batch_size, 7, 7, 64]
        x = self.flatten(x)                     # [batch_size, 7 * 7 * 64]
        x = self.dense1(x)                      # [batch_size, 1024]
        x = self.dense2(x)                      # [batch_size, 10]
        output = tf.nn.softmax(x)
        return output

In [63]:
num_epochs = 5
batch_size = 5000
learning_rate = 0.001

In [64]:
model = CNN()
data_loader = MNISTLoader()
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

In [65]:
num_batches = int(data_loader.num_train_data // batch_size * num_epochs)
for batch_index in range(num_batches):
    X, y = data_loader.get_batch(batch_size)
    with tf.GradientTape() as tape:
        y_pred = model(X)
        loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=y, y_pred=y_pred)
        loss = tf.reduce_mean(loss)
        print("batch %d: loss %f" % (batch_index, loss.numpy()))
    grads = tape.gradient(loss, model.variables)
    _ = optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))

ResourceExhaustedError: OOM when allocating tensor with shape[5000,28,28,32] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:Conv2D]

In [None]:
sparse_categorical_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
num_batches = int(data_loader.num_test_data // batch_size)
for batch_index in range(num_batches):
    start_index, end_index = batch_index * batch_size, (batch_index + 1) * batch_size
    y_pred = model.predict(data_loader.test_data[start_index: end_index])
    sparse_categorical_accuracy.update_state(y_true=data_loader.test_label[start_index: end_index], y_pred=y_pred)
print("test accuracy: %f" % sparse_categorical_accuracy.result())

#### 使用Keras中预定义的经典卷积神经网络结构

In [None]:
model = tf.keras.applications.MobileNetV2()

In [None]:
tf.keras.backend.set_learning_phase(True)

In [None]:

import tensorflow_datasets as tfds

num_epoch = 5
batch_size = 50
learning_rate = 0.001

dataset = tfds.load("tf_flowers", split=tfds.Split.TRAIN, as_supervised=True)
dataset = dataset.map(lambda img, label: (tf.image.resize(img, (224, 224)) / 255.0, label)).shuffle(1024).batch(batch_size)
model = tf.keras.applications.MobileNetV2(weights=None, classes=5)
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
for e in range(num_epoch):
    for images, labels in dataset:
        with tf.GradientTape() as tape:
            labels_pred = model(images, training=True)
            loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=labels, y_pred=labels_pred)
            loss = tf.reduce_mean(loss)
            print("loss %f" % loss.numpy())
        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(grads_and_vars=zip(grads, model.trainable_variables))
    print(labels_pred)