## mnist data to TFRecord & read tfrecord

In [None]:
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data

# load mnist data, if there is no data in dir F:\dataSets\mnist, ten
# tensorflow will download data
mnist = input_data.read_data_sets('F:\dataSets\mnist', dtype=tf.uint8, one_hot=True)

def Int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def Bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))


def convert_to_tfrecord_mnist(tfrecord_fname, train_data=True):
    if train_data == True:
        images = mnist.train.images
        labels = mnist.train.labels
        num_examples = mnist.train.num_examples
    else:
        images = mnist.test.images
        labels = mnist.test.labels
        num_examples = mnist.test.num_examples

    writer = tf.python_io.TFRecordWriter(tfrecord_fname)

    for i in range(num_examples):
        img_to_str = images[i].tostring()

        feature = {
            'image_raw':    Bytes_feature(img_to_str),
            'label':        Int64_feature(np.argmax(labels[i]))
        }

        features = tf.train.Features(feature=feature)
        example = tf.train.Example(features=features)

        writer.write(example.SerializeToString())

    writer.close()

    print('convert mnist data to tfrecord format done!')

def create_tfrecord_mnist():
    train_tfrecord_filename = 'F:\dataSets\TFRecod_data\mnist_TFRecord\\mnist_train.tfrecord'
    val_tfrecord_filename = 'F:\dataSets\TFRecod_data\mnist_TFRecord\\mnist_val.tfrecord'

    convert_to_tfrecord_mnist(train_tfrecord_filename)
    convert_to_tfrecord_mnist(val_tfrecord_filename, False)

def read_tfrecord_minst():
    fname = 'F:\dataSets\TFRecod_data\mnist_TFRecord\\mnist_train.tfrecord'
    
    # Create a TFRecordReader instance
    reader = tf.TFRecordReader()

    # Create a queue which maintain file list
    filename_queue = tf.train.string_input_producer([fname])

    # Read a example data use TFRecordReader instance
    _, serialized_example = reader.read(filename_queue)

    # Parse readed example data by parse_single_example()
    features = tf.parse_single_example(
        serialized_example,
        features={
            'image_raw': tf.FixedLenFeature([], tf.string),
            'label': tf.FixedLenFeature([], tf.int64)
        }
    )

    # Convert stirng format image to array by decode_raw()
    images = tf.decode_raw(features['image_raw'], tf.uint8)
    labels = tf.cast(features['labels'], tf.int32)

    # Prosess with session
    with tf.Session() as sess:
        # Run multi-thread
        coordinator = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coordinator)

        for i in range(10):
            image, label, pixel = sess.run([images, labels])
            print(label)

        coordinator.request_stop()
        coordinator.join(threads)

## keras model with tfrecord iterator

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense, Activation
import tensorflow.keras as keras
from tensorflow.keras.models import Model
import matplotlib.pyplot as plt
from tensorflow import FixedLenFeature

mnist_feature = {
    'image_raw':    tf.FixedLenFeature([], tf.string),
    'label':        tf.FixedLenFeature([], tf.int64)
}

def parse_mnist_record(example_proto, clip=False):
    ex = tf.parse_single_example(example_proto, mnist_feature)
    img = tf.decode_raw(ex['image_raw'], tf.uint8)
    img = tf.reshape(img, [28, 28, 1])
    img = tf.cast(img, tf.float32)
    #label = tf.cast(ex['labels'], tf.int32)
    label = tf.cast(ex['label'], tf.int32)
    label = tf.one_hot(label, 10)

    return img, label

def creat_mnist_data(tfrecord_file):
    dataset = tf.data.TFRecordDataset(tfrecord_file)
    dataset = dataset.map(parse_mnist_record, num_parallel_calls=8)
    dataset = dataset.repeat().shuffle(1000).batch(32)
    iter = dataset.make_one_shot_iterator()

    return iter


train_tfrecord = 'F:\dataSets\TFRecod_data\mnist_TFRecord\\mnist_train.tfrecord'
val_tfrecord = 'F:\dataSets\TFRecod_data\mnist_TFRecord\\mnist_val.tfrecord'

train_iter = creat_mnist_data(train_tfrecord)
val_iter = creat_mnist_data(val_tfrecord)

model_input = keras.layers.Input(shape=(28, 28, 1))

x = Conv2D(64, (3, 3), padding='same')(model_input)
x = Activation('relu')(x)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Conv2D(32, (3, 3))(x)
x = Flatten()(x)
x = Dense(10)(x)
model_output = Activation('softmax')(x)

model = Model(model_input, model_output)


model.compile(loss='categorical_crossentropy', optimizer='adadelta',
              metrics=['accuracy'])
his = model.fit(train_iter, steps_per_epoch=1562, verbose=1, epochs=5, 
                validation_data=val_iter, validation_steps=20)


loss = his.history['loss']
val_loss = his.history['val_loss']
plt.plot(loss, linewidth=3, label='train')
plt.plot(val_loss, linewidth=3, label='valid')
plt.grid()
plt.legend()
plt.xlabel('epoch')
plt.ylabel('loss')
plt.show()


acc = his.history['acc']
val_acc = his.history['val_acc']
plt.plot(acc, linewidth=3, label='train')
plt.plot(val_acc, linewidth=3, label='val')
plt.legend()
plt.xlabel('epoch')
plt.ylabel('acc')
plt.show()




## keras model with tfrecord image and label

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense, Activation
import tensorflow.keras as keras
from tensorflow.keras.models import Model


import matplotlib.pyplot as plt
from tensorflow import FixedLenFeature
mnist_feature = {
    'image_raw':    tf.FixedLenFeature([], tf.string),
    'label':        tf.FixedLenFeature([], tf.int64)
}

def parse_mnist_record(example_proto, clip=False):
    ex = tf.parse_single_example(example_proto, mnist_feature)
    img = tf.decode_raw(ex['image_raw'], tf.uint8)
    label = tf.cast(ex['label'], tf.int32)

    return img, label

def creat_mnist_data(tfrecord_file):
    dataset = tf.data.TFRecordDataset(tfrecord_file)
    dataset = dataset.map(parse_mnist_record, num_parallel_calls=8)
    dataset = dataset.repeat().shuffle(1000).batch(32)
    iter = dataset.make_one_shot_iterator()
    images, labels = iter.get_next()
    images = tf.reshape(images, [-1, 28, 28, 1])
    images = tf.cast(images, tf.float32)
    labels = tf.one_hot(labels, 10)

    return images, labels


train_tfrecord = 'F:\dataSets\TFRecod_data\mnist_TFRecord\\mnist_train.tfrecord'
val_tfrecord = 'F:\dataSets\TFRecod_data\mnist_TFRecord\\mnist_val.tfrecord'

train_images, train_labels = creat_mnist_data(train_tfrecord)
val_images, val_labels = creat_mnist_data(val_tfrecord)


model_input = keras.layers.Input(tensor=train_images)

x = Conv2D(64, (3, 3), padding='same', input_shape=(28, 28, 1))(model_input)
x = Activation('relu')(x)
x = MaxPool2D(pool_size=(2, 2))(x)
x = Conv2D(32, (3, 3))(x)
x = Flatten()(x)
x = Dense(10)(x)
model_output = Activation('softmax')(x)

model = Model(model_input, model_output)


model.compile(loss='categorical_crossentropy', optimizer='adadelta',
              metrics=['accuracy'], target_tensors=[train_labels],)
his = model.fit(steps_per_epoch=1562, verbose=1, epochs=2)


loss = his.history['loss']
val_loss = his.history['val_loss']
plt.plot(loss, linewidth=3, label='train')
plt.plot(val_loss, linewidth=3, label='valid')
plt.grid()
plt.legend()
plt.xlabel('epoch')
plt.ylabel('loss')

plt.show()




## mnist data serialize to TFRecod file

In [None]:
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data

# load mnist data, if there is no data in dir F:\dataSets\mnist, ten
# tensorflow will download data
mnist = input_data.read_data_sets('F:\dataSets\mnist', dtype=tf.uint8, one_hot=True)

def Int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def Bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

# Read mnist data
images = mnist.train.images
labels = mnist.train.labels
pixels = images.shape[1]
num_examples = mnist.train.num_examples

# TFRecord data directory.
# Alert! 'F:\dataSets\TFRecod_data\mnist', mnist must not exist, tensorflow will
# auto create a mnist file
filename = 'F:\dataSets\TFRecod_data\mnist'

# create a python_io.TFRecordWriter instance
writer = tf.python_io.TFRecordWriter(filename)

for i in range(num_examples):
    # convert image into a string
    image_to_string = images[i].tostring()

    feature = {
        'pixels':       Int64_feature(pixels),
        'labels':       Int64_feature(np.argmax(labels[i])),
        'image_raw':    Bytes_feature(image_to_string)
    }

    features = tf.train.Features(feature=feature)

    # define a Example class, and write inof to it
    example = tf.train.Example(features=features)

    # write a Example object to TFRecord file
    writer.write(example.SerializeToString())

writer.close()
c = 0

## Read TFRecord file

In [None]:
import tensorflow as tf

# Create a TFRecordReader instance
reader = tf.TFRecordReader()

# Create a queue which maintain file list
filename_queue = tf.train.string_input_producer(['F:\dataSets\TFRecod_data\mnist'])

# Read a example data use TFRecordReader instance
_, serialized_example = reader.read(filename_queue)

# Parse readed example data by parse_single_example()
features = tf.parse_single_example(
    serialized_example,
    features={
    'image_raw':    tf.FixedLenFeature([], tf.string),
    'pixels':       tf.FixedLenFeature([], tf.int64),
    'labels':        tf.FixedLenFeature([], tf.int64)
    }
)


# Convert stirng format image to array by decode_raw()
images = tf.decode_raw(features['image_raw'], tf.uint8)
labels = tf.cast(features['labels'], tf.int32)
pixels = tf.cast(features['pixels'], tf.int32)

# Prosess with session
with tf.Session() as sess:
    # Run multi-thread
    coordinator = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coordinator)

    for i in range(10):
        image, label, pixel = sess.run([images, labels, pixels])
        print(label)


## Create mnist test data into multi TFRecord file 

In [None]:
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets('F:\dataSets\mnist', dtype=tf.uint8, one_hot=True)

def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

images = mnist.test.images
labels = mnist.test.labels
pixels = images.shape[1]
num_examples = mnist.test.num_examples

# Define the number of TFRecord files coorresponding to test data
num_files = 2

for i in range(num_files):
    filename = ('F:\dataSets\TFRecod_data\mnist_TFRecord\\test_data\\mnist_test_tfrecod-%.1d-of-%.1d' % (i, num_files))

    writer = tf.python_io.TFRecordWriter(filename)
    for index in range(num_examples):
        image_string = images[index].tostring()

        example = tf.train.Example(features=tf.train.Features(
            feature={
                'pixels':   _int64_feature(pixels),
                'labels':   _int64_feature(np.argmax(labels[index])),
                'image_raw':_bytes_feature(image_string)
            }
        ))

        writer.write(example.SerializeToString())

    writer.close()

## Read multi TFRecord files

In [None]:
import tensorflow as tf

files = tf.train.match_filenames_once("F:\dataSets\TFRecod_data\mnist_TFRecord\\test_data\\*")
filename_queue = tf.train.string_input_producer(files, shuffle=False)
# filename_queue = tf.train.string_input_producer(['F:\dataSets\TFRecod_data\mnist_TFRecord\\test_data\\mnist_test_tfrecod-0-of-2',
#                                                  'F:\dataSets\TFRecod_data\mnist_TFRecord\\test_data\\mnist_test_tfrecod-1-of-2'])
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)

features = tf.parse_single_example(
    serialized_example,
    features={
        'image_raw':    tf.FixedLenFeature([], tf.string),
        'pixels':       tf.FixedLenFeature([], tf.int64),
        'labels':       tf.FixedLenFeature([], tf.int64)
    }
)

images = tf.decode_raw(features['image_raw'], tf.uint8)
labels = tf.cast(features['labels'], tf.int32)
pixels = tf.cast(features['pixels'], tf.int32)

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    tf.local_variables_initializer().run()  # must add this

    print(sess.run(files))

    coordinator = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coordinator)

    for i in range(6):
        print(sess.run([images, labels]))

    coordinator.request_stop()
    coordinator.join(threads)


## Read TFRecord files return batch images

In [None]:
import tensorflow as tf

files = tf.train.match_filenames_once("F:\dataSets\TFRecod_data\mnist_TFRecord\\test_data\\*")
filename_queue = tf.train.string_input_producer(files, shuffle=True)

reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)

features = tf.parse_single_example(
    serialized_example,
    features={
        'image_raw':    tf.FixedLenFeature([], tf.string),
        'pixels':       tf.FixedLenFeature([], tf.int64),
        'labels':       tf.FixedLenFeature([], tf.int64)
    }
)

images = tf.decode_raw(features['image_raw'], tf.uint8)
labels = tf.cast(features['labels'], tf.int32)
pixels = tf.cast(features['pixels'], tf.int32)

# Set batch size
batch_size = 10

# Set queue's catch capacity that used for generate batch images
capacity = 5000 + 3 * batch_size

images.set_shape(784) # must set or report error
image_batch, label_batch = tf.train.batch([images, labels], batch_size=batch_size, capacity=capacity)

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    tf.local_variables_initializer().run()

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    for i in range(3):
        xs, ys = sess.run([image_batch, label_batch])
        print(xs, ys)

    coord.request_stop()
    coord.join(threads)

