In [1]:
import tensorflow as tf
import numpy as np
import os
import sys

# set tf log level to supress messages, unless an error
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# Important Version information
print("Python: {}".format(sys.version_info[:]))
print('TensorFlow: {}'.format(tf.__version__))

# const
SEED = 42

# Check if using GPU
if not tf.test.gpu_device_name():
    print('No GPU')
else:
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))

# helper to create dirs if they don't already exist
def maybe_create_dir(dir_path):
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
        print("{} created".format(dir_path))
    else:
        print("{} already exists".format(dir_path))
    
# Helper to make the output consistent
def reset_graph(seed=SEED):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)
    
reset_graph()

Python: (3, 6, 7, 'final', 0)
TensorFlow: 1.12.0
Default GPU Device: /device:GPU:0


In [2]:
# `/data/muli` will (hopefully) contain our tf_records file# `/rec 
# by the end of this notebook
FINAL_DIR = "../data/multi"
maybe_create_dir(FINAL_DIR)

../data/muli created


In [3]:
ROOT_DIR = "./raw/numpy_final/"

for _, _, files in os.walk(ROOT_DIR):
    files = sorted(files)
    for filename in files:
        print(filename)

X_test = np.load(os.path.join(ROOT_DIR, files[0]))
y_test = np.load(os.path.join(ROOT_DIR, files[1]))

X_train = np.load(os.path.join(ROOT_DIR, files[2]))
y_train = np.load(os.path.join(ROOT_DIR, files[3]))

X_val = np.load(os.path.join(ROOT_DIR, files[4]))
y_val = np.load(os.path.join(ROOT_DIR, files[5]))

test.npy
test_masks.npy
train.npy
train_masks.npy
validation.npy
validation_masks.npy


In [4]:
def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

In [5]:
def numpy_to_tfrecords(features, lables, setType):
    assert len(features) == len(lables), "features & labels are not equal in len"
    tfrecords_file_name = str(setType) + '.tfrecords'
    writer = tf.python_io.TFRecordWriter(os.path.join(FINAL_DIR, tfrecords_file_name))
    
    # TODO: assert same length
    for i in range(len(features)):
        img = features[i]
        label = lables[i]
    
        # create features
        feature = {'/image': _bytes_feature(tf.compat.as_bytes(img.tostring())),
                   '/label': _bytes_feature(tf.compat.as_bytes(label.tostring()))}
        
        # create example protocol buffer
        example = tf.train.Example(features=tf.train.Features(feature=feature))
        
        writer.write(example.SerializeToString())
        
        if i % 5 == 0:
            print("{} {} written".format(i, setType))
        
    writer.close()
    sys.stdout.flush()
    print("done")

In [6]:
numpy_to_tfrecords(X_val, y_val, "validation")

0 validation written
5 validation written
10 validation written
done


In [7]:
numpy_to_tfrecords(X_train, y_train, "train")

0 train written
5 train written
10 train written
15 train written
20 train written
25 train written
30 train written
35 train written
40 train written
45 train written
done


In [8]:
numpy_to_tfrecords(X_test, y_test, "test")

0 test written
5 test written
10 test written
done
