## MNIST Tensorflow

#### 在运行之前，应满足如下库的需求，并在工作区下载data源文件

In [1]:
!pip3 install tensorflow-gpu==2.0.0 




In [1]:
# 检验gpu
import tensorflow as tf
tf.test.is_gpu_available()

False

### 根据数据存放位置，修改配置信息。

In [0]:
# 需要配置 根据文件存放位置
train_data_file = '/content/drive/My Drive/CNN-MNIST/data/train/train-images-idx3-ubyte'
train_label_file = '/content/drive/My Drive/CNN-MNIST/data/train/train-labels-idx1-ubyte'
test_data_file = '/content/drive/My Drive/CNN-MNIST/data/test/t10k-images-idx3-ubyte'
test_label_file = '/content/drive/My Drive/CNN-MNIST/data/test/t10k-labels-idx1-ubyte'


#### 可视化工具，包括进度条和图片显示工具

In [0]:
# 同pytorch中的注释
import time
class Progress_bar(object):
    def __init__(self):
        self.start = time.perf_counter()
        self.last_update = time.perf_counter()

    def bar(self, index, length, script, batch=1):
        index = index * batch
        if length - index < batch:
            index = length-1
        percentage = (index+1) / length
        progress = list('..........................')
        progress[(index+1) * 25//length] = '>'
        progress[:(index+1) * 25//length] = '=' * ((index+1) * 25//length + 1)
        progress = ''.join(progress)
        end_time = time.perf_counter()
        print("\r{}: {}  time left:{:.2f}s {}/{}  {:.2f} {} time cost:{:.2f}s "
              .format(script,
                      progress,
                      (end_time - self.start) / percentage * (1 - percentage),
                      (index+1), length, percentage * 100, "%", end_time - self.start), end='')
        self.last_update = time.perf_counter()


In [0]:
# 同Pytorch中的注释
# %matplotlib inline
import matplotlib.pyplot as plt
def show_image(array, rows=1, cols=1):
    if rows * cols == 1:
        plt.figure()
        plt.imshow(array, cmap='gray')
        plt.show()
    else:
        for row in range(rows):
            for col in range(cols):
                index = row * cols + col 
                plt.subplot(rows, cols, index+1)
                plt.imshow(array[index], cmap="gray", interpolation="nearest")
                plt.axis('off')
        plt.show()
 



#### 数据的预处理函数

In [0]:
# 同pytorch
import struct
import numpy as np

def raw_file_idx3_process(file_path):
    with open(file_path, 'rb') as f:
        binary_data = f.read()
        off_set = 0
        fmt_header = '>iiii'
        magic_number, num_images, num_rows, num_cols = struct.unpack_from(fmt_header, binary_data, off_set)

        image_size = num_rows * num_cols
        off_set += struct.calcsize(fmt_header)
        fmt_image = '>' + str(image_size) + 'B'
        images = np.empty((num_images, 1,  num_rows, num_cols))

        print("\nfile idx3 decoding:")
        b = Progress_bar()
        for i in range(num_images):
            b.bar(i, num_images, "Preprocessed ")
            temp = np.array(struct.unpack_from(fmt_image, binary_data, off_set)).reshape((num_rows, num_cols)).reshape(-1,1)
            temp = temp / 255
            temp = temp.reshape(1, 28, 28)
            images[i][0] = temp
            off_set += struct.calcsize(fmt_image)
    return images

In [0]:
# 同pytorch
def raw_file_idx1_process(file_path):
    with open(file_path, 'rb') as f:
        binary_data = f.read()
        off_set = 0
        fmt_header = '>ii'
        magic_number, num_labels = struct.unpack_from(fmt_header, binary_data, off_set)

        off_set += struct.calcsize(fmt_header)
        fmt_label = '>B'
        labels = np.empty(num_labels)

        print("\nfile idx1 decoding:")
        b = Progress_bar()
        for i in range(num_labels):
            b.bar(i, num_labels, "Preprocessed ")
            labels[i] = np.array(struct.unpack_from(fmt_label, binary_data, off_set), dtype=np.int)
            off_set += struct.calcsize(fmt_label)
    return labels

#### Tensorflow 数据处理 构建dataset

In [0]:
# 构建数据集
import tensorflow as tf
def reader_dataset(data_file, label_file, idx3=True, batch_size=128, shuffle_buffer_size=10000):
    '''
    :param data_file: 图像数据文件
    :param label_file:  标签文件
    :param idx3: 是否是idx3格式 默认是
    :param batch_size: 批处理的参数
    :param shuffle_buffer_size: 随机打乱的参数
    :return: 数据集
    '''
    data_array = raw_file_idx3_process(data_file)
    label_array = raw_file_idx1_process(label_file)
    # 调整格式
    if idx3:
        data_array = data_array.reshape(-1, 1).reshape(60000, 28, 28, -1)
    else:
        data_array = data_array.reshape(-1, 1).reshape(10000, 28, 28, -1)
    dataset = tf.data.Dataset.from_tensor_slices((data_array, label_array))
    # 重复
    dataset = dataset.repeat()
    # 打乱
    dataset = dataset.shuffle(shuffle_buffer_size).batch(batch_size)
    return dataset


#### Tendorflow 构建数据


In [9]:
# 数据集 训练和测试
BATCH_SIZE = 128
train_set = reader_dataset(train_data_file, train_label_file)
test_set = reader_dataset(test_data_file, test_label_file, idx3=False)


file idx3 decoding:
file idx1 decoding:
file idx3 decoding:
file idx1 decoding:

### Tensorflow 构建keras模型

In [10]:
import tensorflow.keras as keras
'''
    第一卷积层
    池化
    第二卷积层
    池化
    展平
    线性全连接层
    线性全连接层 
'''
model = keras.Sequential([
    keras.layers.Conv2D(filters=20, kernel_size=(5, 5), padding="valid", input_shape=(28, 28, 1), activation='relu'),
    keras.layers.MaxPool2D((2, 2)),
    keras.layers.Conv2D(filters=50, kernel_size=(5, 5), padding="valid", activation='relu'),
    keras.layers.MaxPool2D((2, 2)),
    keras.layers.Flatten(),
    keras.layers.Dense(500, activation='relu'),
    keras.layers.Dense(10, activation="softmax"),
])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 24, 24, 20)        520       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 12, 12, 20)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 8, 8, 50)          25050     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 4, 4, 50)          0         
_________________________________________________________________
flatten (Flatten)            (None, 800)               0         
_________________________________________________________________
dense (Dense)                (None, 500)               400500    
_________________________________________________________________
dense_1 (Dense)              (None, 10)                5

#### 模型训练

In [11]:
model.compile(
    optimizer="sgd",
    loss=keras.losses.SparseCategoricalCrossentropy(),
    metrics=[keras.metrics.SparseCategoricalAccuracy()],
)

history = model.fit(train_set,
                    epochs=5,
                    steps_per_epoch=60000//BATCH_SIZE,
                    validation_steps=10000//BATCH_SIZE,
                    validation_data=test_set)


Train for 468 steps, validate for 78 steps
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


#### 模型评价

In [16]:
model.evaluate(test_set,steps=10000//BATCH_SIZE, verbose=2)

78/78 - 1s - loss: 0.1042 - sparse_categorical_accuracy: 0.9670


[0.10416623271810703, 0.9670473]