In [1]:
import numpy as np
import tensorflow as tf

from tensorflow.contrib.data import Dataset, Iterator

import glob
import sys


In [56]:


def save_predictions(filename, y):
    """np.random.permutation
    Dumps y into .npy file
    """
    np.save(filename, y)
    

In [83]:
#Auxiliary functions for loading data from a folder and obtaining the labels from the file names

def get_label_mapping(label_file):
    """
    Returns mappings of label to index and index to label
    The input file has list of labels, each on a separate line.
    """
    with open(label_file, 'r') as f:
        id2label = f.readlines()
        id2label = [l.strip() for l in id2label]
    label2id = {}
    count = 0
    
    for label in id2label:
        label2id[label] = count
        count += 1
        
    return id2label, label2id


def get_label(filepath, label2id):
    """
    Files are assumed to be labeled as: /path/to/file/999_frog.png
    Returns label for a filepath
    """
    tokens = filepath.split('/')
    label = tokens[-1].split('_')[1][:-4]
    
    if label in label2id:
        return label2id[label]
    else:
        sys.exit("Invalid label: " + label)
        
        
def get_labels(files, label2id):
    """
    Returns vector of labels extracted from filenames of all files in folder
    :param folder: path to data folder
    :param label2id: mapping of text labels to numeric ids. (Eg: automobile -> 0)
    """
    y = []
    
    for f in files:
        y.append(get_label(f, label2id))
        
    return np.array(y)


def input_parser(img_path, label):
    """
    Returns an image file converted into a 3D array and its one hot encoded label
    """
    one_hot = tf.one_hot(label, num_classes)
    img_file = tf.read_file(img_path)
    img_decoded = tf.image.decode_image(img_file, channels=3)
    
    return img_decoded, one_hot



In [89]:
def load_data(root_folder, data_folder, labels, shuffle=False):
    """
    Returns the data split into separate training and validation sets
    :param root_folder: name of root folder containing the dataset
    :param data_folder: name of the subfolder within the root folder that contains the training or test dataset
    :param labels: name of the text file that contains the mapping of class names to integers
    :param shuffle: whether or not to shuffle the dataset before performing the training/validation split
    """
    data_location = root_folder + data_folder
    label_map = get_label_mapping(root_folder + labels)[1]
    data = [file for file in glob.glob(data_location + '*/*')]
    labels = get_labels(data, label_map)

    tmp = np.vstack((np.asarray(data), np.asarray(labels)))
    tmp = tmp.T

    if shuffle == True:
        np.random.shuffle(tmp)

#Perform a 90/10 training/validation split on the dataset
        
    split_point = -int(0.1*tmp.shape[0])
    X_train = tmp[:split_point, 0]
    y_train = tmp[:split_point, 1]
    X_val = tmp[split_point:, 0]
    y_val = tmp[split_point:, 1]
    
    return X_train, y_train.astype(int), X_val, y_val.astype(int)


In [118]:
def load_to_model(X_train, y_train, X_val, y_val, num_classes, batch_size=100):
    num_classes = num_classes
    
    tf_train = tf.contrib.data.Dataset.from_tensor_slices((tf.constant(X_train),
                                                         tf.constant(y_train)))
    tf_train = tf_train.map(input_parser, num_threads=4)
    tf_val = tf.contrib.data.Dataset.from_tensor_slices((tf.constant(X_val),
                                                         tf.constant(y_val)))
    tf_val = tf_val.map(input_parser, num_threads=4)

    tf_train = tf_train.batch(batch_size)
    tf_val = tf_val.batch(batch_size)
    iterator = Iterator.from_structure(tf_train.output_types,
                                       tf_train.output_shapes)

    next_batch = iterator.get_next()
    training_init_op = iterator.make_initializer(tf_train)
    validation_init_op = iterator.make_initializer(tf_val)

    with tf.Session() as sess:
        sess.run(training_init_op)

        while True:
            try:
                elem = sess.run(next_batch)

            except tf.errors.OutOfRangeError:
                break
                

In [112]:
root_folder  = 'cifar10-hw1/'
train_folder = 'train'
labels = 'labels.txt'
num_classes = len(get_label_mapping(root_folder + labels)[0])

X_train, y_train, X_val, y_val = load_data('cifar10-hw1/', 'train', 'labels.txt', True)
print(X_train.shape, y_train.shape, X_val.shape, y_val.shape)


(45000,) (45000,) (5000,) (5000,)


In [119]:
load_to_model(X_train, y_train, X_val, y_val, num_classes)


(TensorShape(None), TensorShape([Dimension(None), Dimension(10)]))
(TensorShape(None), TensorShape([Dimension(None), Dimension(10)]))


In [36]:
def part1_model(features, labels, mode):
    input_layer = tf.reshape(features['x'], [-1, 32, 32, 3])
    conv1 = tf.layers.conv2d(inputs=input_layer,
                                 filters=32,
                                 kernel_size=[5, 5],
                                 padding='same',
                                 activation=tf.nn.relu)
    
    pool1 = tf.layers.max_pooling2d(inputs=conv1,
                                   pool_size=[2, 2],
                                   strides=2)
    
    conv2 = tf.layers.conv2d(inputs=pool1,
                            filters=64,
                            kernel_size=[5, 5],
                            padding='same',
                            activation=tf.nn.relu)
    
    pool2 = tf.layers.max_pooling2d(inputs=conv2,
                                   pool_size=[2, 2],
                                   strides=2)
    
    pool2_flat = tf.reshape(pool2, [-1, 8*8*64])
    
    dense = tf.layers.dense(input=pool2,
                            units=1024,
                            activation=tf.nn.relu)
    
    output_layer = tf.layers.dense(input=dense, units=10)
    
    predictions = {'classes': tf.argmax(input=output_layer, axis=1),
                   'prob': tf.nn.softmax(output_layer, name='softmax')}
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
    
    OH_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=10)
    loss = tf.losses.softmax_cross_entropy(onehot_labels=OH_labels,
                                           logits=output_layer)
    
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = GradientDescentOptimizer(learning_rate=.0001)
        train_op = optimizer.minimize(loss=loss,
                                      global_step=tf.train.get_global_step())
        
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          train_op=train_op)
    
    eval_metric_ops = {'accuracy': tf.metrics.accuracy(labels=labels,
                                                       predictions=predictions['classes'])}
    
    return tf.estimator.EstimatorSpec(mode=mode,
                                      loss=losss,
                                      eval_metric_ops=eval_metric_ops)


In [47]:
def main(unused_argv):
    cifar_classify = tf.estimator.Estimator(model_fn=part1_model,
                                            model_dir='/tmp/part1')

    tensors_log = {'probabilities': 'softmax'}
    hook = tf.train.LoggingTensorHook(tensors=tensors_log, every_n_iter=50)

    train_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': x_tr},
                                                        y=y_tr,
                                                        batch_size=100,
                                                        num_epochs=None,
                                                        shuffle=True)
    
    cifar_classify.train(input_fn=train_input_fn,
                         steps=20000,
                         hooks=[hook]
                        )
    

In [48]:
main(1)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_session_config': None, '_save_checkpoints_secs': 600, '_model_dir': '/tmp/part1', '_log_step_count_steps': 100, '_save_checkpoints_steps': None, '_save_summary_steps': 100, '_tf_random_seed': 1}


AttributeError: 'list' object has no attribute 'shape'