In [1]:
import tensorflow as tf
import numpy as np
import sys
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_integer('batch_size', 256,
                            """Number of images to process in a batch.""")
tf.app.flags.DEFINE_string('data_dir', '/tmp/comet_dnn_data',
                           """Path to the comet_dnn data directory.""")
tf.app.flags.DEFINE_boolean('use_fp16', False,
                            """Train the model using fp16.""")

In [2]:
np.random.randint(np.iinfo(np.uint64).max, dtype='uint64')

17565430639775556802

In [7]:
arr = np.arange(10)
print(arr[0:1])
print(int(1.1))

[0]
1


## for flag, value in FLAGS.__flags.items():
    print(flag, "=", value)

In [2]:
def load_images(n_images, height=18, width=300, depth=2, n_filled=80):
    """
    Generate (n_images * height * width * channels) numpy array with 
    n_filled randomly filled entries per image.  Note that for a pixel, 
    both channels are either filled (randomly) or they are both empty.
    
    Parameters
    ----------
    n_images : int 
        Number of images
    height : int
        Height of each image
    width : int
        Width of each image
    depth : int
        Depth of each image
    n_filled : int
        Number of pixels filled in each event
    
    Returns
    -------
    images : ndarray
        Array of shape (n_images * height * width * channels)
    """
    # Initialize the return value
    image = np.zeros((n_images, height, width, 2))
    # Select around n_filled * n_images channels to fill
    layers = np.random.randint(0, high=height-1, size=(n_images, 80))
    cells = np.random.randint(0, high=width-1, size=(n_images, 80))
    # Fill the channels with random numbers
    image[:, layers, cells, :] = np.random.random(size=(n_images, 80,2))
    # Cast to 32 bits and return 
    return image.astype(np.float32)

def write_array_to_tfrecord(array, labels, filename, options=None):
    # Open TFRecords file, ensure we use gzip compression
    writer = tf.python_io.TFRecordWriter(filename, options=options)
    
    # Write all the images to a file
    for lbl, img in zip(labels, array):
        # Create a feature
        image_as_bytes = tf.train.BytesList(value=[tf.compat.as_bytes(img.tostring())])
        label_as_float = tf.train.FloatList(value=[lbl])
        feature = {'train/label':  tf.train.Feature(float_list=label_as_float),
                   'train/image':  tf.train.Feature(bytes_list=image_as_bytes)}
        # Create an example protocol buffer
        example = tf.train.Example(features=tf.train.Features(feature=feature))
        # Serialize to string and write on the file
        writer.write(example.SerializeToString())
    
    # Close the writer and flush the buffer
    writer.close()
    sys.stdout.flush()
    
def read_tfrecord_to_array(filename, options=None):
    feature = {'train/image': tf.FixedLenFeature([], tf.string),
               'train/label': tf.FixedLenFeature([], tf.float32)}
    # Create a list of filenames and pass it to a queue
    filename_queue = tf.train.string_input_producer([filename], num_epochs=1)
    # Define a reader and read the next record
    reader = tf.TFRecordReader(options=options)
    _, serialized_example = reader.read(filename_queue)
    # Decode the record read by the reader
    features = tf.parse_single_example(serialized_example, features=feature)
    # Convert the image data from string back to the numbers
    image = tf.decode_raw(features['train/image'], tf.float32)
    # Cast label data into int32
    label = tf.cast(features['train/label'], tf.float32)
    # Reshape image data into the original shape
    image = tf.reshape(image, [18, 300, 2])

    # Any preprocessing here ...

    # Creates batches by randomly shuffling tensors
    images, labels = tf.train.shuffle_batch([image, label], 
                                            batch_size=1, 
                                            capacity=3,
                                            num_threads=1, 
                                            min_after_dequeue=2)
    return images, labels

In [3]:
# Set the number of samples
n_random_samples = 10
original_images = load_images(n_random_samples)
original_labels = np.random.random(n_random_samples)
compression = tf.python_io.TFRecordCompressionType.GZIP
tf_io_opts = tf.python_io.TFRecordOptions(compression)
# Write the file
write_array_to_tfrecord(original_images, original_labels, "train.tfrecords", tf_io_opts)
# Read the files
new_images, new_labels = [], []
with tf.Session() as sess:
    # Get the images and labels
    tf_images, tf_labels = read_tfrecord_to_array("train.tfrecords", tf_io_opts)
    # Initialize all global and local variables
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
    sess.run(init_op)
    # Create a coordinator and run all QueueRunner objects
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    
    for batch_index in range(n_random_samples):
        img, lbl = sess.run([tf_images, tf_labels])
        new_images += [img]
        new_labels += [lbl]

    # Stop the threads
    coord.request_stop()

    # Wait for threads to stop
    coord.join(threads)
    sess.close()
# Compare the two arrays
np.testing.assert_allclose(original_images, np.vstack(new_images), rtol=1e-7)