学习 [简单粗暴 TensorFlow 2](https://tf.wiki/zh_hans/) (github页 https://github.com/snowkylin/tensorflow-handbook)

In [1]:
import numpy as np
import tensorflow as tf
import datetime
import os 
import sys
import pandas as pd

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pd.options.display.max_columns = None
pd.options.display.max_colwidth = 80
pd.options.display.precision = 4
pd.options.display.max_rows = 999
pd.options.display.float_format = '{:.4f}'.format  # 防止科学计数法，小数显示4位

# gpu_devices = tf.config.list_physical_devices('GPU') 
# if gpu_devices:
#     for gpu_device in gpu_devices:
#         tf.config.experimental.set_memory_growth(gpu_device, True)

os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [2]:
# 定义一个随机数（标量）
random_float = tf.random.uniform(shape=())

# 定义一个有2个元素的零向量
zero_vector = tf.zeros(shape=(2))

In [3]:
random_float

<tf.Tensor: shape=(), dtype=float32, numpy=0.0038645267>

In [4]:
zero_vector

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([0., 0.], dtype=float32)>

In [5]:
# 定义两个2×2的常量矩阵
A = tf.constant([[1., 2.], [3., 4.]])
B = tf.constant([[5., 6.], [7., 8.]])

In [6]:
print(A.shape)
print(A.dtype)
print(A.numpy())

(2, 2)
<dtype: 'float32'>
[[1. 2.]
 [3. 4.]]


In [7]:
C = tf.add(A, B)
D = tf.matmul(A, B)

In [8]:
C
D

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[ 6.,  8.],
       [10., 12.]], dtype=float32)>

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[19., 22.],
       [43., 50.]], dtype=float32)>

In [9]:
x = tf.Variable(initial_value=3.)
with tf.GradientTape() as tape:     # 在 tf.GradientTape() 的上下文内，所有计算步骤都会被记录以用于求导
    y = tf.square(x)
y_grad = tape.gradient(y, x)        # 计算y关于x的导数
print(y, y_grad, sep='\n')

tf.Tensor(9.0, shape=(), dtype=float32)
tf.Tensor(6.0, shape=(), dtype=float32)


In [10]:
X = tf.constant([[1., 2.], [3., 4.]])
y = tf.constant([[1.], [2.]])
w = tf.Variable(initial_value=[[1.], [2.]])
b = tf.Variable(initial_value=1.)
with tf.GradientTape() as tape:
    L = tf.reduce_sum(tf.square(tf.matmul(X, w) + b - y))
w_grad, b_grad = tape.gradient(L, [w, b])        # 计算L(w, b)关于w, b的偏导数
print(L, w_grad, b_grad, sep = '\n')

tf.Tensor(125.0, shape=(), dtype=float32)
tf.Tensor(
[[ 70.]
 [100.]], shape=(2, 1), dtype=float32)
tf.Tensor(30.0, shape=(), dtype=float32)


In [11]:

X_raw = np.array([2013, 2014, 2015, 2016, 2017], dtype=np.float32)
y_raw = np.array([12000, 14000, 15000, 16500, 17500], dtype=np.float32)

X = (X_raw - X_raw.min()) / (X_raw.max() - X_raw.min())
y = (y_raw - y_raw.min()) / (y_raw.max() - y_raw.min())

In [12]:
X
y

array([0.  , 0.25, 0.5 , 0.75, 1.  ], dtype=float32)

array([0.        , 0.36363637, 0.54545456, 0.8181818 , 1.        ],
      dtype=float32)

### 使用Numpy线性回归

In [13]:
a, b = 0, 0

num_epoch = 10000
learning_rate = 5e-4
for e in range(num_epoch):
    # 手动计算损失函数关于自变量（模型参数）的梯度
    y_pred = a * X + b
    grad_a, grad_b = 2 * (y_pred - y).dot(X), 2 * (y_pred - y).sum()

    # 更新参数
    a, b = a - learning_rate * grad_a, b - learning_rate * grad_b

print(a, b)

0.9763702027872221 0.057564988311377796


### 尝试Tensorflow线性回归

In [14]:
X = tf.constant(X)
y = tf.constant(y)

a = tf.Variable(initial_value=0.)
b = tf.Variable(initial_value=0.)
variables = [a, b]

num_epoch = 10000
optimizer = tf.keras.optimizers.SGD(learning_rate=5e-4)
for e in range(num_epoch):
    # 使用tf.GradientTape()记录损失函数的梯度信息
    with tf.GradientTape() as tape:
        y_pred = a * X + b
        loss = tf.reduce_sum(tf.square(y_pred - y))
    # TensorFlow自动计算损失函数关于自变量（模型参数）的梯度
    grads = tape.gradient(loss, variables)
    # TensorFlow自动根据梯度更新参数
    _ = optimizer.apply_gradients(grads_and_vars=zip(grads, variables))

In [15]:
variables

[<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.97637>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.057565063>]

In [16]:
c = 1
d = 1 
alist = [c, d]

In [17]:
alist

[1, 1]

In [18]:
d = 2
alist

[1, 1]

### Model与Layer

In [19]:
import tensorflow as tf

X = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
y = tf.constant([[10.0], [20.0]])


class Linear(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.dense = tf.keras.layers.Dense(
            units=1,
            activation=None,
            kernel_initializer=tf.zeros_initializer(),
            bias_initializer=tf.zeros_initializer()
        )

    def call(self, input):
        output = self.dense(input)
        return output


# 以下代码结构与前节类似
model = Linear()
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)
for i in range(10000):
    with tf.GradientTape() as tape:
        y_pred = model(X)      # 调用模型 y_pred = model(X) 而不是显式写出 y_pred = a * X + b
        loss = tf.reduce_mean(tf.square(y_pred - y))
    grads = tape.gradient(loss, model.variables)    # 使用 model.variables 这一属性直接获得模型中的所有变量
    _ = optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))
    if i % 1000 == 0:
        print("""%s, %s""" % (i, loss))
        print(model.variables)
print(model.variables)

0, tf.Tensor(250.0, shape=(), dtype=float32)
[<tf.Variable 'linear/dense/kernel:0' shape=(3, 1) dtype=float32, numpy=
array([[0.9      ],
       [1.1999999],
       [1.5      ]], dtype=float32)>, <tf.Variable 'linear/dense/bias:0' shape=(1,) dtype=float32, numpy=array([0.29999998], dtype=float32)>]
1000, tf.Tensor(1.8959781e-08, shape=(), dtype=float32)
[<tf.Variable 'linear/dense/kernel:0' shape=(3, 1) dtype=float32, numpy=
array([[1.4300377e-04],
       [1.1111389e+00],
       [2.2221353e+00]], dtype=float32)>, <tf.Variable 'linear/dense/bias:0' shape=(1,) dtype=float32, numpy=array([1.1109966], dtype=float32)>]
2000, tf.Tensor(3.092282e-11, shape=(), dtype=float32)
[<tf.Variable 'linear/dense/kernel:0' shape=(3, 1) dtype=float32, numpy=
array([[4.8735565e-06],
       [1.1111153e+00],
       [2.2222164e+00]], dtype=float32)>, <tf.Variable 'linear/dense/bias:0' shape=(1,) dtype=float32, numpy=array([1.1111081], dtype=float32)>]
3000, tf.Tensor(3.092282e-11, shape=(), dtype=float32)
[<

### 基础示例：多层感知机(MLP)

In [20]:
class MNISTLoader():
    def __init__(self):
        mnist = tf.keras.datasets.mnist
        (self.train_data, self.train_label), (self.test_data, self.test_label) = mnist.load_data()
        # MNIST中的图像默认为uint8（0-255的数字）。以下代码将其归一化到0-1之间的浮点数，并在最后增加一维作为颜色通道
        self.train_data = np.expand_dims(self.train_data.astype(np.float32) / 255.0, axis=-1)      # [60000, 28, 28, 1]
        self.test_data = np.expand_dims(self.test_data.astype(np.float32) / 255.0, axis=-1)        # [10000, 28, 28, 1]
        self.train_label = self.train_label.astype(np.int32)    # [60000]
        self.test_label = self.test_label.astype(np.int32)      # [10000]
        self.num_train_data, self.num_test_data = self.train_data.shape[0], self.test_data.shape[0]

    def get_batch(self, batch_size):
        # 从数据集中随机取出batch_size个元素并返回
        index = np.random.randint(0, self.num_train_data, batch_size)
        return self.train_data[index, :], self.train_label[index]

In [21]:
class MLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.flatten = tf.keras.layers.Flatten()    # Flatten层将除第一维（batch_size）以外的维度展平
        self.dense1 = tf.keras.layers.Dense(units=100, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(units=10)

    def call(self, inputs):         # [batch_size, 28, 28, 1]
        x = self.flatten(inputs)    # [batch_size, 784]
        x = self.dense1(x)          # [batch_size, 100]
        x = self.dense2(x)          # [batch_size, 10]
        output = tf.nn.softmax(x)
        return output

In [22]:
num_epochs = 5
batch_size = 5000
learning_rate = 0.001

In [23]:
model = MLP()
data_loader = MNISTLoader()
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

In [24]:
num_batches = int(data_loader.num_train_data // batch_size * num_epochs)
for batch_index in range(num_batches):
    X, y = data_loader.get_batch(batch_size)
    with tf.GradientTape() as tape:
        y_pred = model(X)
        loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=y, y_pred=y_pred)
        loss = tf.reduce_mean(loss)
        print("batch %d: loss %f" % (batch_index, loss.numpy()))
    grads = tape.gradient(loss, model.variables)
    _ = optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))

batch 0: loss 2.371371
batch 1: loss 2.239149
batch 2: loss 2.126992
batch 3: loss 2.013624
batch 4: loss 1.921539
batch 5: loss 1.823450
batch 6: loss 1.731531
batch 7: loss 1.642982
batch 8: loss 1.546291
batch 9: loss 1.482331
batch 10: loss 1.386470
batch 11: loss 1.329714
batch 12: loss 1.246488
batch 13: loss 1.197361
batch 14: loss 1.122247
batch 15: loss 1.063440
batch 16: loss 0.999299
batch 17: loss 0.940590
batch 18: loss 0.892957
batch 19: loss 0.847923
batch 20: loss 0.786904
batch 21: loss 0.773646
batch 22: loss 0.748165
batch 23: loss 0.694927
batch 24: loss 0.673480
batch 25: loss 0.643737
batch 26: loss 0.618235
batch 27: loss 0.593064
batch 28: loss 0.578121
batch 29: loss 0.576151
batch 30: loss 0.565602
batch 31: loss 0.533897
batch 32: loss 0.528404
batch 33: loss 0.497020
batch 34: loss 0.515296
batch 35: loss 0.504322
batch 36: loss 0.471791
batch 37: loss 0.476796
batch 38: loss 0.453334
batch 39: loss 0.471725
batch 40: loss 0.440841
batch 41: loss 0.441390
ba

In [25]:
num_epochs = 5
batch_size = 5000
learning_rate = 0.001

In [26]:
model = MLP()
data_loader = MNISTLoader()
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

In [27]:
num_batches = int(data_loader.num_train_data // batch_size * num_epochs)
for batch_index in range(num_batches):
    X, y = data_loader.get_batch(batch_size)
    with tf.GradientTape() as tape:
        y_pred = model(X)
        loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=y, y_pred=y_pred)
        loss = tf.reduce_mean(loss)
        print("batch %d: loss %f" % (batch_index, loss.numpy()))
    grads = tape.gradient(loss, model.variables)
    _ = optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))

batch 0: loss 2.308586
batch 1: loss 2.165733
batch 2: loss 2.054898
batch 3: loss 1.946550
batch 4: loss 1.846834
batch 5: loss 1.754010
batch 6: loss 1.665236
batch 7: loss 1.558420
batch 8: loss 1.479116
batch 9: loss 1.397051
batch 10: loss 1.320507
batch 11: loss 1.230818
batch 12: loss 1.167060
batch 13: loss 1.080948
batch 14: loss 1.031197
batch 15: loss 0.990011
batch 16: loss 0.944606
batch 17: loss 0.877118
batch 18: loss 0.818900
batch 19: loss 0.798874
batch 20: loss 0.739151
batch 21: loss 0.721938
batch 22: loss 0.703488
batch 23: loss 0.672889
batch 24: loss 0.649885
batch 25: loss 0.625202
batch 26: loss 0.601108
batch 27: loss 0.601640
batch 28: loss 0.569000
batch 29: loss 0.559953
batch 30: loss 0.557702
batch 31: loss 0.519960
batch 32: loss 0.505457
batch 33: loss 0.497832
batch 34: loss 0.478130
batch 35: loss 0.482033
batch 36: loss 0.480359
batch 37: loss 0.458188
batch 38: loss 0.440691
batch 39: loss 0.440099
batch 40: loss 0.425644
batch 41: loss 0.440526
ba

In [28]:
sparse_categorical_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
num_batches = int(data_loader.num_test_data // batch_size)
for batch_index in range(num_batches):
    start_index, end_index = batch_index * batch_size, (batch_index + 1) * batch_size
    y_pred = model.predict(data_loader.test_data[start_index: end_index])
    sparse_categorical_accuracy.update_state(y_true=data_loader.test_label[start_index: end_index], y_pred=y_pred)
print("test accuracy: %f" % sparse_categorical_accuracy.result())

<tf.Variable 'UnreadVariable' shape=() dtype=float32, numpy=5000.0>

<tf.Variable 'UnreadVariable' shape=() dtype=float32, numpy=10000.0>

test accuracy: 0.911100


### 卷积神经网络

#### 使用Keras实现卷积神经网络  

In [29]:
class CNN(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.conv1 = tf.keras.layers.Conv2D(
            filters=32,             # 卷积层神经元（卷积核）数目
            kernel_size=[5, 5],     # 感受野大小
            padding='same',         # padding策略（vaild 或 same）
            activation=tf.nn.relu   # 激活函数
        )
        self.pool1 = tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2)
        self.conv2 = tf.keras.layers.Conv2D(
            filters=64,
            kernel_size=[5, 5],
            padding='same',
            activation=tf.nn.relu
        )
        self.pool2 = tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2)
        self.flatten = tf.keras.layers.Reshape(target_shape=(7 * 7 * 64,))
        self.dense1 = tf.keras.layers.Dense(units=1024, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(units=10)

    def call(self, inputs):
        x = self.conv1(inputs)                  # [batch_size, 28, 28, 32]
        x = self.pool1(x)                       # [batch_size, 14, 14, 32]
        x = self.conv2(x)                       # [batch_size, 14, 14, 64]
        x = self.pool2(x)                       # [batch_size, 7, 7, 64]
        x = self.flatten(x)                     # [batch_size, 7 * 7 * 64]
        x = self.dense1(x)                      # [batch_size, 1024]
        x = self.dense2(x)                      # [batch_size, 10]
        output = tf.nn.softmax(x)
        return output

In [30]:
num_epochs = 5
batch_size = 5000
learning_rate = 0.001

In [31]:
model = CNN()
data_loader = MNISTLoader()
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

In [32]:
num_batches = int(data_loader.num_train_data // batch_size * num_epochs)
for batch_index in range(num_batches):
    X, y = data_loader.get_batch(batch_size)
    with tf.GradientTape() as tape:
        y_pred = model(X)
        loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=y, y_pred=y_pred)
        loss = tf.reduce_mean(loss)
        print("batch %d: loss %f" % (batch_index, loss.numpy()))
    grads = tape.gradient(loss, model.variables)
    _ = optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))

batch 0: loss 2.298971
batch 1: loss 2.056868
batch 2: loss 1.746237
batch 3: loss 1.402286
batch 4: loss 1.067504
batch 5: loss 0.837676
batch 6: loss 0.769545
batch 7: loss 0.755532
batch 8: loss 0.576172
batch 9: loss 0.634142
batch 10: loss 0.500004
batch 11: loss 0.560528
batch 12: loss 0.439404
batch 13: loss 0.402138
batch 14: loss 0.437225
batch 15: loss 0.390562
batch 16: loss 0.360635
batch 17: loss 0.352598
batch 18: loss 0.361211
batch 19: loss 0.289392
batch 20: loss 0.281713
batch 21: loss 0.272801
batch 22: loss 0.274740
batch 23: loss 0.268170
batch 24: loss 0.252186
batch 25: loss 0.246066
batch 26: loss 0.220906
batch 27: loss 0.226074
batch 28: loss 0.232279
batch 29: loss 0.203930
batch 30: loss 0.193038
batch 31: loss 0.181264
batch 32: loss 0.194855
batch 33: loss 0.194136
batch 34: loss 0.168026
batch 35: loss 0.158129
batch 36: loss 0.164509
batch 37: loss 0.165041
batch 38: loss 0.148151
batch 39: loss 0.153146
batch 40: loss 0.164807
batch 41: loss 0.157485
ba

In [33]:
sparse_categorical_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
num_batches = int(data_loader.num_test_data // batch_size)
for batch_index in range(num_batches):
    start_index, end_index = batch_index * batch_size, (batch_index + 1) * batch_size
    y_pred = model.predict(data_loader.test_data[start_index: end_index])
    sparse_categorical_accuracy.update_state(y_true=data_loader.test_label[start_index: end_index], y_pred=y_pred)
print("test accuracy: %f" % sparse_categorical_accuracy.result())

<tf.Variable 'UnreadVariable' shape=() dtype=float32, numpy=5000.0>

<tf.Variable 'UnreadVariable' shape=() dtype=float32, numpy=10000.0>

test accuracy: 0.974400
