In [1]:
# NOTE: this is a custom cell that contains the common imports I personally 
# use these may/may not be necessary for the following examples

# DL framework
import tensorflow as tf

from datetime import datetime

# common packages
import numpy as np
import os # handling file i/o
import sys
import math
import time # timing epochs

# for ordered dict when building layer components
import collections

# plotting pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import pyplot
from matplotlib import colors # making colors consistent
from mpl_toolkits.axes_grid1 import make_axes_locatable # colorbar helper

# read image
### from imageio import imread
# + data augmentation
from scipy import ndimage
from scipy import misc

# used for manually saving best params
import pickle

# for shuffling data batches
from sklearn.utils import shuffle

# const
SEED = 42

# Helper to make the output consistent
def reset_graph(seed=SEED):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

# helper to create dirs if they don't already exist
def maybe_create_dir(dir_path):
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
        print("{} created".format(dir_path))
    else:
        print("{} already exists".format(dir_path))
    
# set tf log level to supress messages, unless an error
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# Important Version information
print("Python: {}".format(sys.version_info[:]))
print('TensorFlow: {}'.format(tf.__version__))

# Check if using GPU
if not tf.test.gpu_device_name():
    print('No GPU')
else:
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
    
reset_graph()

Python: (3, 6, 5, 'final', 0)
TensorFlow: 1.8.0
Default GPU Device: /device:GPU:0


In [2]:

# `/record_holder` will (hopefully) contain our tf_records file# `/rec 
# by the end of this notebook
FINAL_DIR = "./data/record_holder/150/"
maybe_create_dir(FINAL_DIR)

./data/record_holder/150/ created


In [3]:
ROOT_DIR = "./data/numpy/150/"

for _, _, files in os.walk(ROOT_DIR):
    files = sorted(files)
    for filename in files:
        print(filename)
        
X_test = np.load(os.path.join(ROOT_DIR, files[0]))
X_train = np.load(os.path.join(ROOT_DIR, files[1]))
X_val = np.load(os.path.join(ROOT_DIR, files[2]))
y_test = np.load(os.path.join(ROOT_DIR, files[3]))
y_train = np.load(os.path.join(ROOT_DIR, files[4]))
y_val = np.load(os.path.join(ROOT_DIR, files[5]))

X_test.npy
X_train.npy
X_val.npy
y_test.npy
y_train.npy
y_val.npy


In [5]:
def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

In [6]:
def numpy_to_tfrecords(features, lables, setType):
    tfrecords_file_name = str(setType) + '.tfrecords'
    writer = tf.python_io.TFRecordWriter(os.path.join(FINAL_DIR, tfrecords_file_name))
    
    labelName = str(setType) + '/label'
    featureName = str(setType) + '/image'
    
    # TODO: assert same length
    for i in range(len(features)):
        label = lables[i]
        img = features[i]
    
        # create features
        feature = {labelName: _int64_feature(label),
                   featureName: _bytes_feature(tf.compat.as_bytes(img.tostring()))}
        
        # create example protocol buffer
        example = tf.train.Example(features=tf.train.Features(feature=feature))
        
        writer.write(example.SerializeToString())
        
        if i % 250 == 0:
            print("{} {} written".format(i, setType))
        
    writer.close()
    sys.stdout.flush()
    print("done")

In [7]:
numpy_to_tfrecords(X_test, y_test, "test")

0 test written
250 test written
500 test written
750 test written
1000 test written
1250 test written
1500 test written
1750 test written
2000 test written
2250 test written
2500 test written
2750 test written
3000 test written
3250 test written
3500 test written
3750 test written
4000 test written
4250 test written
4500 test written
4750 test written
done


In [8]:
numpy_to_tfrecords(X_val, y_val, "val")

0 val written
250 val written
500 val written
750 val written
1000 val written
1250 val written
1500 val written
1750 val written
2000 val written
2250 val written
2500 val written
2750 val written
3000 val written
3250 val written
3500 val written
3750 val written
done


In [9]:
numpy_to_tfrecords(X_train, y_train, "train")

0 train written
250 train written
500 train written
750 train written
1000 train written
1250 train written
1500 train written
1750 train written
2000 train written
2250 train written
2500 train written
2750 train written
3000 train written
3250 train written
3500 train written
3750 train written
4000 train written
4250 train written
4500 train written
4750 train written
5000 train written
5250 train written
5500 train written
5750 train written
6000 train written
6250 train written
6500 train written
6750 train written
7000 train written
7250 train written
7500 train written
7750 train written
8000 train written
8250 train written
8500 train written
8750 train written
9000 train written
9250 train written
9500 train written
9750 train written
10000 train written
10250 train written
10500 train written
10750 train written
11000 train written
11250 train written
11500 train written
11750 train written
12000 train written
12250 train written
12500 train written
12750 train written
13000 