学习 [简单粗暴 TensorFlow 2](https://tf.wiki/zh_hans/) (github页 https://github.com/snowkylin/tensorflow-handbook)

In [1]:
import numpy as np
import tensorflow as tf
import datetime
import os 
import sys
import pandas as pd

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pd.options.display.max_columns = None
pd.options.display.max_colwidth = 80
pd.options.display.precision = 4
pd.options.display.max_rows = 999
pd.options.display.float_format = '{:.4f}'.format  # 防止科学计数法，小数显示4位

gpu_devices = tf.config.list_physical_devices('GPU') 
if gpu_devices:
    for gpu_device in gpu_devices:
        tf.config.experimental.set_memory_growth(gpu_device, True)

# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [2]:
# 定义一个随机数（标量）
random_float = tf.random.uniform(shape=())

# 定义一个有2个元素的零向量
zero_vector = tf.zeros(shape=(2))

In [3]:
random_float

<tf.Tensor: shape=(), dtype=float32, numpy=0.2507789>

In [4]:
zero_vector

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([0., 0.], dtype=float32)>

In [5]:
# 定义两个2×2的常量矩阵
A = tf.constant([[1., 2.], [3., 4.]])
B = tf.constant([[5., 6.], [7., 8.]])

In [6]:
print(A.shape)
print(A.dtype)
print(A.numpy())

(2, 2)
<dtype: 'float32'>
[[1. 2.]
 [3. 4.]]


In [7]:
C = tf.add(A, B)
D = tf.matmul(A, B)

In [8]:
C
D

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[ 6.,  8.],
       [10., 12.]], dtype=float32)>

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[19., 22.],
       [43., 50.]], dtype=float32)>

In [9]:
x = tf.Variable(initial_value=3.)
with tf.GradientTape() as tape:     # 在 tf.GradientTape() 的上下文内，所有计算步骤都会被记录以用于求导
    y = tf.square(x)
y_grad = tape.gradient(y, x)        # 计算y关于x的导数
print(y, y_grad, sep='\n')

tf.Tensor(9.0, shape=(), dtype=float32)
tf.Tensor(6.0, shape=(), dtype=float32)


In [10]:
X = tf.constant([[1., 2.], [3., 4.]])
y = tf.constant([[1.], [2.]])
w = tf.Variable(initial_value=[[1.], [2.]])
b = tf.Variable(initial_value=1.)
with tf.GradientTape() as tape:
    L = tf.reduce_sum(tf.square(tf.matmul(X, w) + b - y))
w_grad, b_grad = tape.gradient(L, [w, b])        # 计算L(w, b)关于w, b的偏导数
print(L, w_grad, b_grad, sep = '\n')

tf.Tensor(125.0, shape=(), dtype=float32)
tf.Tensor(
[[ 70.]
 [100.]], shape=(2, 1), dtype=float32)
tf.Tensor(30.0, shape=(), dtype=float32)


In [11]:

X_raw = np.array([2013, 2014, 2015, 2016, 2017], dtype=np.float32)
y_raw = np.array([12000, 14000, 15000, 16500, 17500], dtype=np.float32)

X = (X_raw - X_raw.min()) / (X_raw.max() - X_raw.min())
y = (y_raw - y_raw.min()) / (y_raw.max() - y_raw.min())

In [12]:
X
y

array([0.  , 0.25, 0.5 , 0.75, 1.  ], dtype=float32)

array([0.        , 0.36363637, 0.54545456, 0.8181818 , 1.        ],
      dtype=float32)

### 使用Numpy线性回归

In [13]:
a, b = 0, 0

num_epoch = 10000
learning_rate = 5e-4
for e in range(num_epoch):
    # 手动计算损失函数关于自变量（模型参数）的梯度
    y_pred = a * X + b
    grad_a, grad_b = 2 * (y_pred - y).dot(X), 2 * (y_pred - y).sum()

    # 更新参数
    a, b = a - learning_rate * grad_a, b - learning_rate * grad_b

print(a, b)

0.9763702027872221 0.057564988311377796


### 尝试Tensorflow线性回归

In [14]:
X = tf.constant(X)
y = tf.constant(y)

a = tf.Variable(initial_value=0.)
b = tf.Variable(initial_value=0.)
variables = [a, b]

num_epoch = 10000
optimizer = tf.keras.optimizers.SGD(learning_rate=5e-4)
for e in range(num_epoch):
    # 使用tf.GradientTape()记录损失函数的梯度信息
    with tf.GradientTape() as tape:
        y_pred = a * X + b
        loss = tf.reduce_sum(tf.square(y_pred - y))
    # TensorFlow自动计算损失函数关于自变量（模型参数）的梯度
    grads = tape.gradient(loss, variables)
    # TensorFlow自动根据梯度更新参数
    _ = optimizer.apply_gradients(grads_and_vars=zip(grads, variables))

In [15]:
variables

[<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.97637>,
 <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=0.057565063>]

In [16]:
c = 1
d = 1 
alist = [c, d]

In [17]:
alist

[1, 1]

In [18]:
d = 2
alist

[1, 1]

### Model与Layer

In [19]:
import tensorflow as tf

X = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
y = tf.constant([[10.0], [20.0]])


class Linear(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.dense = tf.keras.layers.Dense(
            units=1,
            activation=None,
            kernel_initializer=tf.zeros_initializer(),
            bias_initializer=tf.zeros_initializer()
        )

    def call(self, input):
        output = self.dense(input)
        return output


# 以下代码结构与前节类似
model = Linear()
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)
for i in range(10000):
    with tf.GradientTape() as tape:
        y_pred = model(X)      # 调用模型 y_pred = model(X) 而不是显式写出 y_pred = a * X + b
        loss = tf.reduce_mean(tf.square(y_pred - y))
    grads = tape.gradient(loss, model.variables)    # 使用 model.variables 这一属性直接获得模型中的所有变量
    _ = optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))
    if i % 1000 == 0:
        print("""%s, %s""" % (i, loss))
        print(model.variables)
print(model.variables)

0, tf.Tensor(250.0, shape=(), dtype=float32)
[<tf.Variable 'linear/dense/kernel:0' shape=(3, 1) dtype=float32, numpy=
array([[0.9      ],
       [1.1999999],
       [1.5      ]], dtype=float32)>, <tf.Variable 'linear/dense/bias:0' shape=(1,) dtype=float32, numpy=array([0.29999998], dtype=float32)>]
1000, tf.Tensor(1.8959781e-08, shape=(), dtype=float32)
[<tf.Variable 'linear/dense/kernel:0' shape=(3, 1) dtype=float32, numpy=
array([[1.4300377e-04],
       [1.1111389e+00],
       [2.2221353e+00]], dtype=float32)>, <tf.Variable 'linear/dense/bias:0' shape=(1,) dtype=float32, numpy=array([1.1109966], dtype=float32)>]
2000, tf.Tensor(3.092282e-11, shape=(), dtype=float32)
[<tf.Variable 'linear/dense/kernel:0' shape=(3, 1) dtype=float32, numpy=
array([[4.8735565e-06],
       [1.1111153e+00],
       [2.2222164e+00]], dtype=float32)>, <tf.Variable 'linear/dense/bias:0' shape=(1,) dtype=float32, numpy=array([1.1111081], dtype=float32)>]
3000, tf.Tensor(3.092282e-11, shape=(), dtype=float32)
[<

### 基础示例：多层感知机(MLP)

In [20]:
class MNISTLoader():
    def __init__(self):
        mnist = tf.keras.datasets.mnist
        (self.train_data, self.train_label), (self.test_data, self.test_label) = mnist.load_data()
        # MNIST中的图像默认为uint8（0-255的数字）。以下代码将其归一化到0-1之间的浮点数，并在最后增加一维作为颜色通道
        self.train_data = np.expand_dims(self.train_data.astype(np.float32) / 255.0, axis=-1)      # [60000, 28, 28, 1]
        self.test_data = np.expand_dims(self.test_data.astype(np.float32) / 255.0, axis=-1)        # [10000, 28, 28, 1]
        self.train_label = self.train_label.astype(np.int32)    # [60000]
        self.test_label = self.test_label.astype(np.int32)      # [10000]
        self.num_train_data, self.num_test_data = self.train_data.shape[0], self.test_data.shape[0]

    def get_batch(self, batch_size):
        # 从数据集中随机取出batch_size个元素并返回
        index = np.random.randint(0, self.num_train_data, batch_size)
        return self.train_data[index, :], self.train_label[index]

In [21]:
class MLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.flatten = tf.keras.layers.Flatten()    # Flatten层将除第一维（batch_size）以外的维度展平
        self.dense1 = tf.keras.layers.Dense(units=100, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(units=10)

    def call(self, inputs):         # [batch_size, 28, 28, 1]
        x = self.flatten(inputs)    # [batch_size, 784]
        x = self.dense1(x)          # [batch_size, 100]
        x = self.dense2(x)          # [batch_size, 10]
        output = tf.nn.softmax(x)
        return output

In [22]:
num_epochs = 5
batch_size = 5000
learning_rate = 0.001

In [23]:
model = MLP()
data_loader = MNISTLoader()
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

In [24]:
num_batches = int(data_loader.num_train_data // batch_size * num_epochs)
for batch_index in range(num_batches):
    X, y = data_loader.get_batch(batch_size)
    with tf.GradientTape() as tape:
        y_pred = model(X)
        loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=y, y_pred=y_pred)
        loss = tf.reduce_mean(loss)
        print("batch %d: loss %f" % (batch_index, loss.numpy()))
    grads = tape.gradient(loss, model.variables)
    _ = optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))

batch 0: loss 2.310311
batch 1: loss 2.182139
batch 2: loss 2.060784
batch 3: loss 1.959442
batch 4: loss 1.858464
batch 5: loss 1.757245
batch 6: loss 1.657469
batch 7: loss 1.582313
batch 8: loss 1.488093
batch 9: loss 1.385024
batch 10: loss 1.298926
batch 11: loss 1.236500
batch 12: loss 1.170656
batch 13: loss 1.107809
batch 14: loss 1.032902
batch 15: loss 1.008354
batch 16: loss 0.942107
batch 17: loss 0.878711
batch 18: loss 0.847123
batch 19: loss 0.815416
batch 20: loss 0.760611
batch 21: loss 0.748718
batch 22: loss 0.710711
batch 23: loss 0.700457
batch 24: loss 0.664940
batch 25: loss 0.646776
batch 26: loss 0.618817
batch 27: loss 0.608039
batch 28: loss 0.587438
batch 29: loss 0.586172
batch 30: loss 0.559860
batch 31: loss 0.537126
batch 32: loss 0.537327
batch 33: loss 0.528377
batch 34: loss 0.510048
batch 35: loss 0.480038
batch 36: loss 0.497400
batch 37: loss 0.465899
batch 38: loss 0.480509
batch 39: loss 0.445201
batch 40: loss 0.453532
batch 41: loss 0.452634
ba

In [25]:
num_epochs = 5
batch_size = 5000
learning_rate = 0.001

In [26]:
model = MLP()
data_loader = MNISTLoader()
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

In [27]:
num_batches = int(data_loader.num_train_data // batch_size * num_epochs)
for batch_index in range(num_batches):
    X, y = data_loader.get_batch(batch_size)
    with tf.GradientTape() as tape:
        y_pred = model(X)
        loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=y, y_pred=y_pred)
        loss = tf.reduce_mean(loss)
        print("batch %d: loss %f" % (batch_index, loss.numpy()))
    grads = tape.gradient(loss, model.variables)
    _ = optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))

batch 0: loss 2.309474
batch 1: loss 2.184384
batch 2: loss 2.067222
batch 3: loss 1.961653
batch 4: loss 1.848184
batch 5: loss 1.751510
batch 6: loss 1.665823
batch 7: loss 1.567607
batch 8: loss 1.469888
batch 9: loss 1.385012
batch 10: loss 1.311684
batch 11: loss 1.236081
batch 12: loss 1.167836
batch 13: loss 1.098149
batch 14: loss 1.043209
batch 15: loss 0.991133
batch 16: loss 0.934995
batch 17: loss 0.893430
batch 18: loss 0.846296
batch 19: loss 0.821185
batch 20: loss 0.773384
batch 21: loss 0.760975
batch 22: loss 0.720968
batch 23: loss 0.695151
batch 24: loss 0.665964
batch 25: loss 0.646742
batch 26: loss 0.619782
batch 27: loss 0.610285
batch 28: loss 0.589692
batch 29: loss 0.573173
batch 30: loss 0.552676
batch 31: loss 0.553706
batch 32: loss 0.531903
batch 33: loss 0.520633
batch 34: loss 0.495754
batch 35: loss 0.488248
batch 36: loss 0.497202
batch 37: loss 0.478454
batch 38: loss 0.481091
batch 39: loss 0.469034
batch 40: loss 0.480349
batch 41: loss 0.444983
ba

In [28]:
sparse_categorical_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
num_batches = int(data_loader.num_test_data // batch_size)
for batch_index in range(num_batches):
    start_index, end_index = batch_index * batch_size, (batch_index + 1) * batch_size
    y_pred = model.predict(data_loader.test_data[start_index: end_index])
    sparse_categorical_accuracy.update_state(y_true=data_loader.test_label[start_index: end_index], y_pred=y_pred)
print("test accuracy: %f" % sparse_categorical_accuracy.result())

<tf.Variable 'UnreadVariable' shape=() dtype=float32, numpy=5000.0>

<tf.Variable 'UnreadVariable' shape=() dtype=float32, numpy=10000.0>

test accuracy: 0.905600


In [188]:
data_loader.test_label[start_index: end_index]

array([7, 2, 1, 0, 4, 1, 4, 9, 5, 9], dtype=int32)

### 卷积神经网络

#### 使用Keras实现卷积神经网络  

In [29]:
class CNN(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.conv1 = tf.keras.layers.Conv2D(
            filters=32,             # 卷积层神经元（卷积核）数目
            kernel_size=[5, 5],     # 感受野大小
            padding='same',         # padding策略（vaild 或 same）
            activation=tf.nn.relu   # 激活函数
        )
        self.pool1 = tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2)
        self.conv2 = tf.keras.layers.Conv2D(
            filters=64,
            kernel_size=[5, 5],
            padding='same',
            activation=tf.nn.relu
        )
        self.pool2 = tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2)
        self.flatten = tf.keras.layers.Reshape(target_shape=(7 * 7 * 64,))
        self.dense1 = tf.keras.layers.Dense(units=1024, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(units=10)

    def call(self, inputs):
        x = self.conv1(inputs)                  # [batch_size, 28, 28, 32]
        x = self.pool1(x)                       # [batch_size, 14, 14, 32]
        x = self.conv2(x)                       # [batch_size, 14, 14, 64]
        x = self.pool2(x)                       # [batch_size, 7, 7, 64]
        x = self.flatten(x)                     # [batch_size, 7 * 7 * 64]
        x = self.dense1(x)                      # [batch_size, 1024]
        x = self.dense2(x)                      # [batch_size, 10]
        output = tf.nn.softmax(x)
        return output

In [30]:
num_epochs = 5
batch_size = 5000
learning_rate = 0.001

In [31]:
model = CNN()
data_loader = MNISTLoader()
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

In [32]:
num_batches = int(data_loader.num_train_data // batch_size * num_epochs)
for batch_index in range(num_batches):
    X, y = data_loader.get_batch(batch_size)
    with tf.GradientTape() as tape:
        y_pred = model(X)
        loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=y, y_pred=y_pred)
        loss = tf.reduce_mean(loss)
        print("batch %d: loss %f" % (batch_index, loss.numpy()))
    grads = tape.gradient(loss, model.variables)
    _ = optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))

batch 0: loss 2.302025
batch 1: loss 2.058893
batch 2: loss 1.747453
batch 3: loss 1.366509
batch 4: loss 1.053738
batch 5: loss 0.909884
batch 6: loss 0.789708
batch 7: loss 0.798355
batch 8: loss 0.688380
batch 9: loss 0.577808
batch 10: loss 0.609948
batch 11: loss 0.455673
batch 12: loss 0.464754
batch 13: loss 0.437746
batch 14: loss 0.402453
batch 15: loss 0.407344
batch 16: loss 0.355393
batch 17: loss 0.328806
batch 18: loss 0.370193
batch 19: loss 0.334883
batch 20: loss 0.298504
batch 21: loss 0.285496
batch 22: loss 0.288759
batch 23: loss 0.289981
batch 24: loss 0.263217
batch 25: loss 0.269641
batch 26: loss 0.248456
batch 27: loss 0.251506
batch 28: loss 0.238241
batch 29: loss 0.203239
batch 30: loss 0.209931
batch 31: loss 0.203775
batch 32: loss 0.206832
batch 33: loss 0.188401
batch 34: loss 0.190474
batch 35: loss 0.184831
batch 36: loss 0.171019
batch 37: loss 0.177792
batch 38: loss 0.169812
batch 39: loss 0.160875
batch 40: loss 0.176795
batch 41: loss 0.160044
ba

In [33]:
sparse_categorical_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
num_batches = int(data_loader.num_test_data // batch_size)
for batch_index in range(num_batches):
    start_index, end_index = batch_index * batch_size, (batch_index + 1) * batch_size
    y_pred = model.predict(data_loader.test_data[start_index: end_index])
    sparse_categorical_accuracy.update_state(y_true=data_loader.test_label[start_index: end_index], y_pred=y_pred)
print("test accuracy: %f" % sparse_categorical_accuracy.result())

<tf.Variable 'UnreadVariable' shape=() dtype=float32, numpy=5000.0>

<tf.Variable 'UnreadVariable' shape=() dtype=float32, numpy=10000.0>

test accuracy: 0.973000


#### 使用Keras中预定义的经典卷积神经网络结构

In [34]:
model = tf.keras.applications.MobileNetV2()

In [35]:
tf.keras.backend.set_learning_phase(True)

Instructions for updating:
Simply pass a True/False value to the `training` argument of the `__call__` method of your layer or model.


In [36]:

import tensorflow_datasets as tfds

num_epoch = 5
batch_size = 10
learning_rate = 0.001

dataset = tfds.load("tf_flowers", split=tfds.Split.TRAIN, as_supervised=True)
dataset = dataset.map(lambda img, label: (tf.image.resize(img, (224, 224)) / 255.0, label)).shuffle(1024).batch(batch_size)
model = tf.keras.applications.MobileNetV2(weights=None, classes=5)
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
for e in range(num_epoch):
    for images, labels in dataset:
        with tf.GradientTape() as tape:
            labels_pred = model(images, training=True)
            loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=labels, y_pred=labels_pred)
            loss = tf.reduce_mean(loss)
            print("loss %f" % loss.numpy())
        grads = tape.gradient(loss, model.trainable_variables)
        _ = optimizer.apply_gradients(grads_and_vars=zip(grads, model.trainable_variables))
    print(labels_pred)

loss 1.671574
loss 1.491288
loss 1.802438
loss 1.968150
loss 1.493334
loss 1.730590
loss 2.254787
loss 2.130812
loss 1.731451
loss 1.261857
loss 2.667665
loss 2.557539
loss 1.690144
loss 1.653540
loss 1.529181
loss 2.247490
loss 1.650390
loss 1.810979
loss 1.742309
loss 1.821816
loss 1.339355
loss 1.857087
loss 2.090892
loss 1.110196
loss 2.735552
loss 2.317418
loss 1.800508
loss 2.048780
loss 1.487516
loss 1.959415
loss 1.665091
loss 0.866619
loss 1.321856
loss 1.799621
loss 1.199245
loss 0.935215
loss 2.830135
loss 2.218498
loss 1.512071
loss 1.620365
loss 1.374629
loss 1.502180
loss 1.181385
loss 1.073298
loss 1.214451
loss 2.018344
loss 1.715464
loss 2.043723
loss 1.562713
loss 1.236320
loss 1.474574
loss 1.614066
loss 1.369063
loss 1.242561
loss 1.390357
loss 1.621014
loss 1.571876
loss 1.546423
loss 1.347420
loss 1.480775
loss 1.940262
loss 1.135460
loss 1.444464
loss 1.099991
loss 1.502466
loss 1.773658
loss 1.558560
loss 1.424176
loss 1.781923
loss 1.133695
loss 1.434603
loss 2

In [97]:
images.shape

TensorShape([10, 224, 224, 3])

In [37]:
print(1)

1


In [38]:
dataset

<BatchDataset shapes: ((None, 224, 224, 3), (None,)), types: (tf.float32, tf.int64)>

In [39]:
tfds.Split.TRAIN

Split('train')

In [106]:
# TensorFlow 的图像表示为 [图像数目，长，宽，色彩通道数] 的四维张量
# 这里我们的输入图像 image 的张量形状为 [1, 7, 7, 1]
image = np.array([[
    [0, 0, 0, 0, 0, 0, 0],
    [0, 1, 0, 1, 2, 1, 0],
    [0, 0, 2, 2, 0, 1, 0],
    [0, 1, 1, 0, 2, 1, 0],
    [0, 0, 2, 1, 1, 0, 0],
    [0, 2, 1, 1, 2, 0, 0],
    [0, 0, 0, 0, 0, 0, 0]
]], dtype=np.float32)
image.shape

(1, 7, 7)

In [107]:
image = np.expand_dims(image, axis=-1)  
W = np.array([[
    [ 0, 0, -1], 
    [ 0, 1, 0 ], 
    [-2, 0, 2 ]
]], dtype=np.float32)
b = np.array([1], dtype=np.float32)

In [108]:
image.shape

(1, 7, 7, 1)

### 验证码数据

In [43]:
# 验证码数据，先用之前切好到字符的，之前是向量，27*27拉平了，这里先转成矩阵
with open('../data/train_x.txt') as xfile:
    m_x = [[float(num) for num in line.split(',')] for line in xfile]

with open('../data/train_y.txt') as yfile:
    label_y = [ line.split(',')[0] for line in yfile]

In [45]:
train_x_vc = np.matrix(m_x)

In [46]:
train_x_vc.shape

(5005, 729)

In [60]:
train_x_vc_reshape = np.array(train_x_vc).reshape(5005, 27, 27)

In [62]:
train_x_vc_reshape[0]

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0.,
        1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 0.,
        0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 1., 1., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],


In [121]:
import string

In [122]:
string.ascii_letters

'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'

In [125]:
# 把label转成数字
LABEL_TEXT = list(string.digits) + list(string.ascii_letters)
# LABEL_TEXT
train_y_vc = [LABEL_TEXT.index(x) for x in label_y]

In [128]:
for label_str, label_num in zip(label_y[:10], train_y_vc[:10]):
    print(label_str + ':' + str(label_num))

8:8
Z:61
2:2
3:3
3:3
9:9
J:45
R:53
Q:52
H:43


In [129]:
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

X = tf.convert_to_tensor(train_x_vc_reshape)
Y = tf.convert_to_tensor(train_y_vc)
vc_dataset = tf.data.Dataset.from_tensor_slices((X, Y))

train_size = 4000
vc_train_dataset = vc_dataset.take(train_size)
vc_test_dataset = vc_dataset.skip(train_size)

In [130]:
len(vc_train_dataset)

4000

In [131]:
x1 = vc_train_dataset.batch(10)

stopsign = 0
for x_data, x_label in x1:
    print("data")
    print(x_data)
    print("label")
    print(x_label)
    stopsign += 1
    if stopsign > 1:
        break

data
tf.Tensor(
[[[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 ...

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 

In [114]:
len(x1)

400

In [168]:
class MLP1(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.flatten = tf.keras.layers.Flatten()    # Flatten层将除第一维（batch_size）以外的维度展平
        self.dense1 = tf.keras.layers.Dense(units=100, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(units=62)

    def call(self, inputs):         # [batch_size, 28, 28, 1]
        x = self.flatten(inputs)    # [batch_size, 784]
        x = self.dense1(x)          # [batch_size, 100]
        x = self.dense2(x)          # [batch_size, 10]
        output = tf.nn.softmax(x)
        return output

model_vc = MLP1()
batch_size = 10
num_epochs = 10
num_batches = int( len(vc_train_dataset) // batch_size * num_epochs)
vc_train_dataset_batches = vc_dataset.batch(batch_size)
# for batch_index in range(num_batches):
for X, y in vc_train_dataset_batches:
    with tf.GradientTape() as tape:
        y_pred = model_vc(X)
#         y_pred_single = y_pred.numpy().argmax(axis=1)
#         y_pred_single = tf.math.argmax(y_pred, axis=1)
        loss = tf.keras.losses.sparse_categorical_crossentropy(y_true=y, y_pred=y_pred)
#         loss = tf.keras.losses.categorical_crossentropy(y_true=y, y_pred=y_pred_single)
        loss = tf.reduce_mean(loss)
        print("batch %d: loss %f" % (batch_index, loss.numpy()))
    grads = tape.gradient(loss, model_vc.variables)
    _ = optimizer.apply_gradients(grads_and_vars=zip(grads, model_vc.variables))
    



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.





To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



batch 0: loss 4.220487
batch 0: loss 4.130552
batch 0: loss 3.883441
batch 0: loss 4.210034
batch 0: loss 3.996984
batch 0: loss 3.813447
batch 0: loss 3.816523
batch 0: loss 3.357016
batch 0: loss 3.896343
batch 0: loss 3.785726
batch 0: loss 3.491256
batch 0: loss 3.110800
batch 0: loss 3.633547
batch 0: loss 3.551370
batch 0: loss 3.351297
batch 0: loss 3.430377
batch 0: loss 3.114422
batch 0: loss 3.675418
batch 0: loss 3.436012
batch 0: loss 3.428229
batch 0: loss 2.605186
batch 0: loss 3.596010
batch 0: loss 3.465043
batch 0: loss 2.943807
batch 0: loss 3.026711
batch 0: loss 2.720569
batch 0: loss 2.790480
batch 0: loss 2.231145
batch 0: loss 2.708890
batch 0: loss 2.360967
batch 0: loss 3.247438
batch 0: loss 2.453648
batch 0: loss 2.398784
batch 0: loss 1.820028
batch 0: loss 2.091329
batch 0: loss 2.554716
batch 0: loss 3.564167
batch 0: loss 2.403932
batch 0: loss 1.420699
batch 0: loss 2.529496
batch 0: loss 3.157752
batch 0: loss 1.711055
batch 0: loss 2.389216
batch 0: lo

In [196]:
sparse_categorical_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
num_batches = int(len(vc_train_dataset) // batch_size)
# for batch_index in range(num_batches):
#     start_index, end_index = batch_index * batch_size, (batch_index + 1) * batch_size
vc_train_dataset_batches = vc_train_dataset.batch(batch_size)
for X, y in vc_train_dataset_batches:
    y_pred = model_vc.predict(X)
#     y_pred_single = tf.math.argmax(y_pred, axis=1)
    _ = sparse_categorical_accuracy.update_state(y_true=y, y_pred=y_pred)
print("train accuracy: %f" % sparse_categorical_accuracy.result())

train accuracy: 0.987250


In [197]:
sparse_categorical_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
num_batches = int(len(vc_test_dataset) // batch_size)

vc_test_dataset_batches = vc_test_dataset.batch(batch_size)
for X, y in vc_test_dataset_batches:
    y_pred = model_vc.predict(X)
#     y_pred_single = tf.math.argmax(y_pred, axis=1)
    _ = sparse_categorical_accuracy.update_state(y_true=y, y_pred=y_pred)
print("test accuracy: %f" % sparse_categorical_accuracy.result())

test accuracy: 0.994030
