In [1]:
import cv2
import numpy as np
import os

In [2]:
IMG_SIZE = 120
NUM_CHANELS = 3
PREPARED_TRAIN_DATA_PATH='/home/vlad/projects/deep-learning-nano/03-kaggle-dogs-vs-cats/train_data.{}-beta.npy'.format(IMG_SIZE)

def get_prepared_input_data(): 
    training_data = []
    
    if os.path.isfile(PREPARED_TRAIN_DATA_PATH):
        training_data = np.load(PREPARED_TRAIN_DATA_PATH)
        return training_data
    else:
        print 'No data found!!'
    
    return training_data
    
training_data = get_prepared_input_data()


In [3]:
import tensorflow as tf

In [4]:
# name: for debugging and tensorgraph 
#
# previous_layer: the tensor from the previous line
#
# filter_size: how many pixels to group and extrac information as a whole
#
# stride: how big the step from each filter operation is in the input
#
# first layer it is the number of outputs from the previous layer)
#
# autodetected input_channels: how many channels the input has (for images, it's iether 1 or 3
def create_conv_layer( name, previous_layer, filter_size, stride, output_features ):
    input_channels = previous_layer.get_shape().as_list()[-1:][0]

    with tf.name_scope(name):
        
        W = tf.Variable(
            tf.truncated_normal([
                filter_size,
                filter_size, 
                input_channels, 
                output_features
            ], stddev=0.1),
            name='{}-w'.format(name)
        )  
        # 5x5 patch, 1 input channel, K output channels
        B = tf.Variable(
            tf.random_normal([output_features])/10,
            name='{}-b'.format(name)
        )

        conv_layer = tf.nn.relu(
            tf.nn.conv2d(
                previous_layer, 
                W, 
                strides=[1, stride, stride, 1], 
                padding='SAME'
            ) + B,
            name='{}-conv-op'.format(name)
        )

        tf.summary.histogram('weights', W)
        tf.summary.histogram('bias', B)
        tf.summary.histogram('k-act', conv_layer)
        
        
        conv_layer_representation = tf.reduce_mean(conv_layer, 3, keep_dims=True)
        conv_layer_representation = tf.slice(conv_layer_representation, [0,0,0,0], [-1,-1,-1,-1])
        tf.summary.image(
            name='{}/filters'.format(name), 
            tensor=conv_layer_representation,
            max_outputs=3
        )
        
        return conv_layer

In [5]:
# name 
# previous_layer
# count : neurons on this layer
# activation: pointer to function or False for output layer
def create_fully_connected_layer( name, previous_layer, count, activation=tf.nn.relu ):
    
    input_size = previous_layer.get_shape().as_list()[1]
    
    with tf.name_scope(name):
        W = tf.Variable(
            tf.truncated_normal([input_size, count], stddev=0.1),
            name='{}-w'.format(name)
        )
        B = tf.Variable(
            tf.random_normal([count])/10,
            name='{}-b'.format(name)
        )
        
        Y = tf.matmul(previous_layer, W) + B
        if ( activation ):
            Y = activation(Y)

        tf.summary.histogram('weights', W)
        tf.summary.histogram('bias', B)
        tf.summary.histogram('k-act', Y)
        
        return Y

In [6]:
## inputs
tf.reset_default_graph()


with tf.name_scope('images'):
    x = tf.placeholder(tf.float32, [None, IMG_SIZE, IMG_SIZE, NUM_CHANELS])
    reshaped_x = tf.reshape(x, [-1, IMG_SIZE, IMG_SIZE, NUM_CHANELS])
    
    
with tf.name_scope('expectation'):
    labels = tf.placeholder(tf.float32, [None, 2])

    
    
with tf.name_scope('dynamic_learning'):
    lr = tf.placeholder(tf.float32)
    tf.summary.scalar('learning_rate', lr)

    
## nn layer configuration
## 120x120x3 -> 60x60x64
conv1 = create_conv_layer('conv_01', reshaped_x, 3, 2, 96 )

conv1alandala = create_conv_layer('conv_01alandala', conv1, 3, 1, 64 )

## 120x120x3 -> 60x60x64
conv1p = create_conv_layer('conv_01p', conv1alandala, 2, 1, 64 )

## 60x60x64 -> 30x30x64
conv2 = create_conv_layer('conv_02', conv1p, 3, 2, 64 )

## 30x30x64 -> 30x30x32
conv3 = create_conv_layer('conv_03', conv2, 2, 1, 32 )

## 30x30x32 -> 15x15x64
conv4 = create_conv_layer('conv_04', conv3, 2, 2, 64 )

## 15x15x32 -> 7x7x32
conv6 = create_conv_layer('conv_06', conv4, 2, 2, 32 )

flatten_conv = tf.contrib.layers.flatten(conv6)

deep1 = create_fully_connected_layer('depp_01', flatten_conv, 1024)

dropout1 = tf.layers.dropout(
    inputs=deep1,
    rate=0.8
)


deep2 = create_fully_connected_layer('depp_01', flatten_conv, 128)

dropout2 = tf.layers.dropout(
    inputs=deep2,
    rate=0.8
)

logits = create_fully_connected_layer('output', dropout2, 2, False)

prediction = tf.nn.softmax(logits, name='prediction')


## training params
    
with tf.name_scope('loss'): 
    cost = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels)
    cost = tf.reduce_mean(cost)*100
    tf.summary.scalar('cross_enthropy', cost)
    
    
    
with tf.name_scope('training'):
    optimizer = tf.train.AdamOptimizer(lr).minimize(cost)

    
    
with tf.name_scope('accuracy'):
    is_correct = tf.equal(tf.argmax(prediction,1), tf.argmax(labels,1))
    
    accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))
    tf.summary.scalar('accuracy_value', accuracy)

In [7]:
import math

max_learning_rate = 0.001
min_learning_rate = 0.0005
decay_speed = 2000.0

def get_dynamic_learning_rate(epoch):
    learning_rate = min_learning_rate + (max_learning_rate - min_learning_rate) * math.exp(-epoch/decay_speed)
    return learning_rate

In [8]:
def get_working_training_data(training_data):
    partial_training_data = training_data[:-500]
    
    # the input for the nn will be the image data, which was saved as an array 
    training_image_data = np.array([i[0] for i in partial_training_data]).reshape(-1, IMG_SIZE, IMG_SIZE, NUM_CHANELS)
    # the expected results are saved as the second column in the data file
    training_result_data = [i[1] for i in partial_training_data]
    
    test_data = training_data[-500:]
    
    # the input for the nn will be the image data, which was saved as an array 
    test_image_data = np.array([i[0] for i in test_data]).reshape(-1, IMG_SIZE, IMG_SIZE, NUM_CHANELS)
    # the expected results are saved as the second column in the data file
    test_result_data = [i[1] for i in test_data]
    
    return [
        [training_image_data, training_result_data], 
        [test_image_data, test_result_data]
    ]


def get_batch_training_data(training_image_data, training_result_data, batch, size=100):
    section_start = size * batch
    section_end = section_start + size
    return [
        training_image_data[section_start:section_end],
        training_result_data[section_start:section_end]
    ]
# training_image_count = len(training_image_data)

In [9]:
TPM_TENSORBOARD_PATH='/tmp/kaggle-dog-vs-cat-beta'
MODEL_SAVE_PATH='/home/vlad/projects/deep-learning-nano/03-kaggle-dogs-vs-cats'
SESSION_NAME='conv601'

MODEL_NAME = 'alandala'

MDOEL_SAVE_PATH =  '{}/{}.ckpt'.format(MODEL_SAVE_PATH, MODEL_NAME)

import time
MINUTES = 60.0

def train_network(epochs, learning_rate_fn, training_data):
    
    training_input = get_working_training_data(training_data)
    training_image_data, training_result_data = training_input[0]
    test_image_data, test_result_data = training_input[1]
    
    training_image_count = len(training_image_data)
    
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        
        summary_op = tf.summary.merge_all()
        writer = tf.summary.FileWriter('{}/{}'.format(TPM_TENSORBOARD_PATH, SESSION_NAME))
        writer.add_graph(sess.graph)
        
        saver = tf.train.Saver()
        
        if os.path.isfile(MDOEL_SAVE_PATH):
            saver.restore(sess, MDOEL_SAVE_PATH)
        
        for epoch in range(epochs):
            learning_rate = learning_rate_fn(epoch)
            print('Starting epoch:{} with learning reate: {}'.format(epoch, learning_rate))
            print('Processing {} batches of 100 images'.format(int(training_image_count/100)))
            
            t0 = time.time()
            tinitial = t0
            print 'Started trainig at {}'.format(t0)
            
            total_batches = training_image_count/100
            mid_batch = total_batches/2
            
            for batch in range(int(total_batches)):
                
                data_for_current_batch, expectation_for_current_batch = get_batch_training_data(
                    training_image_data, training_result_data, batch, 100
                )
                
                train_data = {
                    reshaped_x: data_for_current_batch, 
                    labels: expectation_for_current_batch,
                    lr: learning_rate
                }
                
                summary, _ = sess.run([summary_op, optimizer], feed_dict=train_data)
                writer.add_summary(summary, epoch * total_batches + batch)
                
                if batch % 10 == 0:
                    print('Total images processed {}/{}'.format(
                        batch*100, 
                        training_image_count
                    ))
                    
                if batch == mid_batch:
                    print('Testing accuracy at 50% training size in epoch {} is {}'.format(
                        epoch,
                        accuracy.eval({
                            reshaped_x: test_image_data, 
                            labels: test_result_data
                        })
                    ))
                    
                    t1 = time.time()
                    delta = (t1 - t0)
                    print '{} estimated time (in minutes) remaining'.format(
                        (delta * 2 * (epochs - epoch) + delta)/MINUTES
                    )
                    t0 = time.time()
                    
            print('Testing accuracy in epoch {} is {}'.format(
                epoch,
                accuracy.eval({
                    reshaped_x: test_image_data, 
                    labels: test_result_data
                })
            ))
            
            save_path = saver.save(sess, MDOEL_SAVE_PATH)
        
        tfinal = time.time()
        print('Finalised training at {}'.format(tf))
        print('Training took {} minutes', (tfinal-tinitial)/MINUTES)
        print('Testing accuracy with training data subset: {}'.format(
            accuracy.eval({
                reshaped_x: test_image_data, 
                labels: test_result_data
            })
        ))

In [10]:
train_network(100, get_dynamic_learning_rate, training_data)

Starting epoch:0 with learning reate: 0.001
Processing 245 batches of 100 images
Started trainig at 1498130275.35
Total images processed 0/24500
Total images processed 1000/24500
Total images processed 2000/24500
Total images processed 3000/24500
Total images processed 4000/24500
Total images processed 5000/24500
Total images processed 6000/24500
Total images processed 7000/24500
Total images processed 8000/24500
Total images processed 9000/24500
Total images processed 10000/24500
Total images processed 11000/24500
Total images processed 12000/24500
Testing accuracy at 50% training size in epoch 0 is 0.541999995708
1419.1727381 estimated time (in minutes) remaining
Total images processed 13000/24500
Total images processed 14000/24500
Total images processed 15000/24500
Total images processed 16000/24500
Total images processed 17000/24500
Total images processed 18000/24500
Total images processed 19000/24500
Total images processed 20000/24500
Total images processed 21000/24500
Total image

Total images processed 1000/24500
Total images processed 2000/24500
Total images processed 3000/24500
Total images processed 4000/24500
Total images processed 5000/24500
Total images processed 6000/24500
Total images processed 7000/24500
Total images processed 8000/24500
Total images processed 9000/24500
Total images processed 10000/24500
Total images processed 11000/24500
Total images processed 12000/24500
Testing accuracy at 50% training size in epoch 7 is 0.67199999094
1282.06132545 estimated time (in minutes) remaining
Total images processed 13000/24500
Total images processed 14000/24500
Total images processed 15000/24500
Total images processed 16000/24500
Total images processed 17000/24500
Total images processed 18000/24500
Total images processed 19000/24500
Total images processed 20000/24500
Total images processed 21000/24500
Total images processed 22000/24500
Total images processed 23000/24500
Total images processed 24000/24500
Testing accuracy in epoch 7 is 0.663999974728
Start

KeyboardInterrupt: 