In [1]:
import cv2
import numpy as np
import os
from random import shuffle
from tqdm import tqdm

TPM_TENSORBOARD_PATH='/tmp/kaggle-dog-vs-cat'

TRAIN_DIR = '/home/vlad/projects/deep-learning-nano/03-kaggle-dogs-vs-cats/train'
TEST_DIR = '/home/vlad/projects/deep-learning-nano/03-kaggle-dogs-vs-cats/test'

# image hiperparams
IMG_SIZE = 50

PREPARED_TRAIN_DATA_PATH='/home/vlad/projects/deep-learning-nano/03-kaggle-dogs-vs-cats/train_data.{}.npy'.format(IMG_SIZE)
PREPARED_TEST_DATA_PATH='/home/vlad/projects/deep-learning-nano/03-kaggle-dogs-vs-cats/test_data.{}.npy'.format(IMG_SIZE)


NUM_CHANELS = 1 # how many values per pixel (R,G,B) = 3or 0-1 from black to white = 1.

MODEL_NAME = 'dog-clasifier-alpha.model'

In [2]:
# we check the name of the image to decide type of image (dog or cat)
# dog.01png. cat.01.png
def label_img(img):
    word_label = img.split('.')[0]
    if word_label == 'cat': return [1, 0]
    else: return [0, 1]


def read_grayscale_img(img_path):
    inputiamge = cv2.imread(img_path)
    gray_image = cv2.cvtColor(inputiamge, cv2.COLOR_BGR2GRAY)
    img = cv2.resize(
        gray_image,
        (IMG_SIZE, IMG_SIZE)
    )
    return img
    
# we are reading and preparing the data from the training directory
def read_and_prepare_input_images():
    training_data = []
    for img in tqdm(os.listdir(TRAIN_DIR)):
        label = label_img(img)
        path = os.path.join(TRAIN_DIR, img)
        
        img = read_grayscale_img(path)
        
        training_data.append([
            np.array(img),
            np.array(label)
        ])
        
    return training_data



def get_prepared_input_data(): 
    
    if os.path.isfile(PREPARED_TRAIN_DATA_PATH):
        training_data = np.load(PREPARED_TRAIN_DATA_PATH)
        return training_data
    
    training_data = read_and_prepare_input_images()
    shuffle(training_data)
    np.save(PREPARED_TRAIN_DATA_PATH, training_data)
    
    return training_data

In [3]:
# we are preparing the test
def read_and_prepare_test_images():
    testing_data = []
    for img in tqdm(os.listdir(TEST_DIR)):
        path = os.path.join(TEST_DIR, img)
        img_num = img.split('.')[0]
        img = read_grayscale_img(path)
        testing_data.append([np.array(img), img_num])
    
    return testing_data
    
    
def get_prepared_test_data():
    
    if os.path.isfile(PREPARED_TEST_DATA_PATH):
        testing_data = np.load(PREPARED_TEST_DATA_PATH)
        return testing_data
    
    testing_data = read_and_prepare_test_images()
    np.save(PREPARED_TEST_DATA_PATH, testing_data)
    return testing_data

In [4]:
# get data, either from source, or from parsed source if that is cached and available
training_data = get_prepared_input_data()
test_data = get_prepared_test_data()

In [5]:
import tensorflow as tf

DEFAULT_STRIDES = (1,1)
# inspited from http://blog.bitfusion.io/2017/03/30/intro-to-tensorboard
def configure_conv2_with_summary(net, name, filters, kernel_size, activation=tf.nn.relu, strides=DEFAULT_STRIDES):
    with tf.name_scope(name):
        
        net = tf.layers.conv2d(
            inputs=net,
            name=name,
            strides=strides,
            padding='same',
            filters=filters,
            kernel_size=kernel_size,
            activation=activation
        )
        
        layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, name)
        
        tf.summary.histogram('weights', layer_vars[0])
        tf.summary.histogram('bias', layer_vars[1])
        tf.summary.histogram('act', net)
        
        with tf.name_scope('{}/filters'.format(name)):            
            net_transposed = tf.reduce_max(net, 3, keep_dims=True)
            net_transposed = tf.slice(net_transposed, [0,0,0,0], [-1,-1,-1,-1])

            # this will display random 3 filters from the 64 in conv1
            tf.summary.image(
                name='{}/filters'.format(name), 
                tensor=net_transposed,
                max_outputs=3
            )
    
    return net

def configure_dense_layer(net, name, units, activation=tf.nn.relu):
    with tf.name_scope(name):
        net = tf.layers.dense(
            inputs=net,
            name=name,
            units=units,
            activation=tf.nn.relu
        )

        layer_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, name)

        tf.summary.histogram('weights', layer_vars[0])
        tf.summary.histogram('bias', layer_vars[1])
        tf.summary.histogram('act', net)

    return net

In [6]:


# helper for jupyter rerun of same code chunkss
tf.reset_default_graph()

# input images
with tf.name_scope('input'):

    # we input a set of images (None) as an array of IMG_SIZE*IMG_SIZE values
    image_input = tf.placeholder(tf.float32, shape=[None, IMG_SIZE * IMG_SIZE], name='input_images')

    # we will provide images in 2D format to the convolutional layer so they extract 
    # spatial patterns and learn about relation between neighbour pixels
    x_image = tf.reshape(image_input, [-1, IMG_SIZE, IMG_SIZE, NUM_CHANELS])

    #tf.summary.image('image_input', [-1, IMG_SIZE, IMG_SIZE, NUM_CHANELS])

with tf.name_scope('expected_output'):
    # dog or cat? this is the expected value
    y_expected = tf.placeholder(tf.float32, shape=[None, 2], name='y_expected_value')

In [7]:
# layers configuration (tf.layers) for the NN


net = x_image

# input is 28x28, we take patches of 3x3 and extract lines if possible
net = configure_conv2_with_summary(net, 'conv1', 32, 3, strides=(2,2))

net = tf.layers.max_pooling2d(inputs=net, pool_size=2, strides=1)
net = configure_conv2_with_summary(net, 'conv2', 64, 3)

net = tf.layers.max_pooling2d(inputs=net, pool_size=2, strides=1)
net = configure_conv2_with_summary(net, 'conv3', 64, 3)


# prepare output for fully connected layers
net = tf.contrib.layers.flatten(net)


net = configure_dense_layer(
    net,
    'dense1',
    512
)

net = tf.layers.dropout(
    inputs=net,
    rate=0.50
)

net = configure_dense_layer(
    net,
    'output',
    2,
    None
)

logits = net

with tf.name_scope('prediction'):
    # predictions output
    y_pred = tf.nn.softmax(logits=logits)

In [8]:
import math

#learning rate input
lr = tf.placeholder(tf.float32, name='lr')

tf.summary.scalar('learning_rate', lr)

max_learning_rate = 0.001
min_learning_rate = 0.0005
decay_speed = 200.0

# learning rate decay
def get_learning_rate(i):
    return 0.0005
    learning_rate = min_learning_rate + (max_learning_rate - min_learning_rate) * math.exp(-i/decay_speed)
    return learning_rate

In [9]:
# cost/loss function

with tf.name_scope('loss'):
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
        labels=y_expected,
        logits=logits
    )

    loss = tf.reduce_mean(cross_entropy)

    tf.summary.scalar('cross_entropy', loss)

with tf.name_scope('train'):
    optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss)

In [10]:
def prep_accuracy():
    with tf.name_scope('accuracy'):
        # prepare accuracy for logging
        is_correct = tf.equal(tf.argmax(y_pred,1), tf.argmax(y_expected,1))
        accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))
        tf.summary.scalar('accuracy_value', accuracy)
        
    return accuracy

In [11]:
def train_network(epochs, learning_rate_fn, training_data, session_name):
    # we get 500 test images out of the data set to 
    # test our predictions. Note that the 'test' data 
    # outside this function is not labeled. That is like 
    # an 'exam' data. We don't know the good answers.
    #
    # we take 500 out of the labeld images so we can test
    # ourselves and our accuracy
    
    
    partial_training_data = training_data[:-500]
    
    # the input for the nn will be the image data, which was saved as an array 
    training_image_data = np.array([i[0] for i in partial_training_data]).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
    # the expected results are saved as the second column in the data file
    training_result_data = [i[1] for i in partial_training_data]
    
    
    
    
    test_data = training_data[-500:]
    
    # the input for the nn will be the image data, which was saved as an array 
    test_image_data = np.array([i[0] for i in test_data]).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
    # the expected results are saved as the second column in the data file
    test_result_data = [i[1] for i in test_data]
    
    
    training_image_count = len(training_image_data)
    
    
    accuracy = prep_accuracy()
    with tf.Session() as sess:
        
        
        tf.global_variables_initializer().run()
        
        summary_op = tf.summary.merge_all()
        writer = tf.summary.FileWriter('{}/{}'.format(TPM_TENSORBOARD_PATH, session_name))
        writer.add_graph(sess.graph)
        
        for epoch in range(epochs):
            learning_rate = learning_rate_fn(epoch)
            print('Starting epoch:{} with learning reate: {}'.format(epoch, learning_rate))
            print('Processing {} batches of 100 images'.format(int(training_image_count/100)))
            
            for i in range(int(training_image_count/100)):
                data_for_current_batch = training_image_data[i*100:i*100+100]
                expectation_for_current_batch = training_result_data[i*100:i*100+100]
                
                train_data = {
                    x_image: data_for_current_batch, 
                    y_expected: expectation_for_current_batch,
                    lr: learning_rate
                }
                
                summary, acc = sess.run([summary_op, optimizer], feed_dict=train_data)
                writer.add_summary(summary, epoch * training_image_count / 100 + i)
                
                if i % 25 == 0:
                    print('Total images processed {}/{}'.format(i*100, training_image_count))
                    print(
                        'Testing accuracy in epoch {} is {}'.format(
                            epoch,
                            accuracy.eval({
                                x_image: test_image_data, 
                                y_expected: test_result_data
                            })
                        )
                    )
                

            # get the accuracy after training
            print(
                'Testing accuracy after epoch {} is {}'.format(
                    epoch,
                    accuracy.eval({
                        x_image: test_image_data, 
                        y_expected: test_result_data
                    })
                ))

In [12]:
SESSION_NAME='dog05'
#print(training_data)
train_network(10, get_learning_rate, training_data, SESSION_NAME)

Starting epoch:0 with learning reate: 0.0005
Processing 245 batches of 100 images
Total images processed 0/24500
Testing accuracy in epoch 0 is 0.509999990463
Total images processed 2500/24500
Testing accuracy in epoch 0 is 0.490000009537
Total images processed 5000/24500
Testing accuracy in epoch 0 is 0.490000009537
Total images processed 7500/24500
Testing accuracy in epoch 0 is 0.490000009537
Total images processed 10000/24500
Testing accuracy in epoch 0 is 0.490000009537
Total images processed 12500/24500
Testing accuracy in epoch 0 is 0.490000009537
Total images processed 15000/24500
Testing accuracy in epoch 0 is 0.490000009537
Total images processed 17500/24500
Testing accuracy in epoch 0 is 0.490000009537
Total images processed 20000/24500
Testing accuracy in epoch 0 is 0.490000009537
Total images processed 22500/24500
Testing accuracy in epoch 0 is 0.490000009537
Testing accuracy after epoch 0 is 0.490000009537
Starting epoch:1 with learning reate: 0.0005
Processing 245 batche

KeyboardInterrupt: 