In [None]:
# data processing
import pandas as pd
import numpy as np 

# read path
import os 

# image 
import cv2

# time 
import datetime as dt

# urlib
import urllib

# Paralel Processing
import multiprocessing
from multiprocessing import Pool

# clean garbage
import gc

# modeling sklearn
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import confusion_matrix

# Tensorflow
import tensorflow as tf
from tensorflow.python.framework import ops
ops.reset_default_graph()

In [None]:
current_path = os.getcwd()

bad_images_path = os.path.join(current_path, 'bad_images')
good_images_path = os.path.join(current_path, 'good_images')


In [None]:
bad_image_list = os.listdir(bad_images_path)
bad_image_list = [os.path.join(bad_images_path, image_file) for image_file in bad_image_list]

good_image_list = os.listdir(good_images_path)
good_image_list = [os.path.join(good_images_path, image_file) for image_file in good_image_list]

In [None]:
img_width = 20 
img_height = 30
img_channel = 3

In [None]:
def get_img_array(img_path):
    try:
        req = urllib.urlopen(img_path)
        arr = np.asarray(bytearray(req.read()), dtype=np.uint8)
        img = cv2.imdecode(arr,-1) # 'load it as it is'

        if img.shape[1] > img.shape[0]: #rotate if landscape
            img = np.rot90(img)
        img = cv2.resize(img, (img_width, img_height)) # resize with width = 200 , height = 300  

        return np.reshape(img, (-1,img_height, img_width, img_channel))
    except:
        return 0 

In [None]:
bad_image_array = []
good_image_array = []

#read image
for img_path in bad_image_list:
    img_array = get_img_array(img_path)
    if type(img_array) == np.ndarray:
#         bad_image_array = np.append(bad_image_array, img_array, axis = 0)
        bad_image_array.append(img_array)


for img_path in good_image_list:
    img_array = get_img_array(img_path)
    if type(img_array) == np.ndarray:
#         good_image_array = np.append(good_image_array, img_array, axis = 0)
        good_image_array.append(img_array)

In [None]:
del bad_image_list, good_image_list

In [None]:
bad_image_data = np.empty((len(bad_image_array),img_height,img_width, img_channel), dtype = int)

for ix in range(len(bad_image_array)): 
    bad_image_data[ix,:,:,:] = bad_image_array[ix][0]
    
good_image_data = np.empty((len(good_image_array),img_height,img_width, img_channel), dtype = int)

for ix in range(len(good_image_array)): 
    good_image_data[ix,:,:,:] = good_image_array[ix][0]

In [None]:
del bad_image_array, good_image_array

In [None]:
image_data = np.append(bad_image_data, good_image_data, axis= 0)

In [None]:
bad_image_label = [0] * len(bad_image_data)
good_image_label = [1] * len(good_image_data)

all_label = bad_image_label + good_image_label

# transform one hot encoding
image_label = np.zeros([len(all_label), len(np.unique(all_label))])
for i in range(image_label.shape[0]):
    image_label[i,all_label[i]] = 1

In [None]:
image_label.shape

In [None]:
X_train, X_test, y_train, y_test = train_test_split(image_data, image_label, stratify = image_label ,test_size = 0.1, random_state = 210)
# X_train, X_test, y_train, y_test = np.array(X_train), np.array(X_test), np.array(y_train), np.array(y_test)

# CNN

In [None]:
#Model Parameter

image_height = img_height #30
image_width = img_width #20
num_channels = img_channel #3

num_targets = 2
generations = 2000

layer_hidden_1 = 384
layer_hidden_2 = 192

eval_every = 500
batch_size = 1024

# Exponential Learning Rate Decay Params
learning_rate = 0.1
lr_decay = 0.1
num_gens_to_wait = 25.

In [None]:
#parameters of convolutional layer
conv1_fmaps = 32
conv1_ksize = 3
conv1_stride = 1
conv1_pad = "SAME"

conv2_fmaps = 64
conv2_ksize = 3
conv2_stride = 2
conv2_pad = "SAME"

#parameters of pooling layer
pool2_fmaps = conv2_fmaps
#parameters of fully connected network and outputs
n_fc1 = 64
n_outputs = num_targets

In [None]:
image_data.shape

In [None]:
y

In [None]:
with tf.name_scope("inputs"):
    X = tf.placeholder(tf.float32, shape=[None, img_height,img_width, img_channel], name = "X")
    y = tf.placeholder(tf.int32, shape = [None, num_targets], name = "y")
    
with tf.name_scope("conv1"):
    conv1 = tf.layers.conv2d(X, filters=conv1_fmaps, kernel_size = conv1_ksize,
                         strides = conv1_stride, padding=conv1_pad,
                         activation = tf.nn.relu, name="conv1")

with tf.name_scope("conv2"):
    conv2 = tf.layers.conv2d(conv1, filters=conv2_fmaps, kernel_size=conv2_ksize,
                         strides=conv2_stride, padding=conv2_pad,
                         activation=tf.nn.relu, name="conv2")
    


In [None]:
with tf.name_scope("pool2"):
    pool2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
    pool2_flat = tf.reshape(pool2, shape=[-1,pool2_fmaps*7*7])

with tf.name_scope("fc1"):
    fc1 = tf.layers.dense(pool2_flat, n_fc1, activation = tf.nn.relu, name = "fc1")

with tf.name_scope("output"):
    logits = tf.layers.dense(fc1, n_outputs, name = "output")
    Y_proba = tf.nn.softmax(logits, name="Y_proba")

with tf.name_scope("train"):
    xentropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y)
    loss = tf.reduce_mean(xentropy)
    optimizer = tf.train.AdamOptimizer()
    training_op = optimizer.minimize(loss)
    
with tf.name_scope("eval"):
#     # Make sure targets are integers and drop extra dimensions
#     targets = tf.squeeze(tf.cast(targets, tf.int32))
#     # Get predicted values by finding which logit is the greatest
#     batch_predictions = tf.cast(tf.argmax(logits, 1), tf.int32)
#     # Check if they are equal across the batch
#     predicted_correctly = tf.equal(batch_predictions, targets)
#     # Average the 1's and 0's (True's and False's) across the batch size
#     accuracy = tf.reduce_mean(tf.cast(predicted_correctly, tf.float32))
#     return(accuracy)
    
    y_squeeze = tf.squeeze(tf.cast(y, tf.int32))
    prediction = tf.cast(tf.argmax(logits, 1), tf.int32)
    
    
    correct = tf.equal(prediction, y_squeeze)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
    
#     correct = tf.nn.in_top_k(logits,y,1)
#     accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

with tf.name_scope("init_and_save"):
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()

In [None]:
y_batch.shape

In [None]:
n_epochs = 2
batch_size = 500

with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for iteration in range(len(X_train) // batch_size):
            #this cycle is for dividing step by step the heavy work of each neuron
            X_batch = X_train[iteration*batch_size:iteration*batch_size+batch_size,:]
            y_batch = y_train[iteration*batch_size:iteration*batch_size+batch_size]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
#         acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
#         acc_test = accuracy.eval(feed_dict={X: X_test, y: y_test})
#         print("Epoch:",epoch+1, "Train accuracy:", acc_train, "test accuracy:", acc_test)
       
        save_path = saver.save(sess, "./my_CNN_model")

In [None]:
X_train, X_test, y_train, y_test

In [None]:
X_train.shape[]

In [None]:
#Model Parameter

image_height = img_height #30
image_width = img_width #20
num_channels = img_channel #3

num_targets = 2
generations = 2000

layer_hidden_1 = 384
layer_hidden_2 = 192

eval_every = 500
batch_size = 1024

# Exponential Learning Rate Decay Params
learning_rate = 0.1
lr_decay = 0.1
num_gens_to_wait = 25.


# Define the model architecture, this will return logits from images
def cnn_model(input_images, batch_size, train_logical=True):
    def truncated_normal_var(name, shape, dtype):
        return(tf.get_variable(name=name, shape=shape, dtype=dtype, initializer=tf.truncated_normal_initializer(stddev=0.05)))
    def zero_var(name, shape, dtype):
        return(tf.get_variable(name=name, shape=shape, dtype=dtype, initializer=tf.constant_initializer(0.0)))
    
    # First Convolutional Layer
    with tf.variable_scope('conv1') as scope:
        # Conv_kernel is 5x5 for all 3 colors and we will create 64 features
        conv1_kernel = truncated_normal_var(name='conv_kernel1', shape=[5, 5, 3, 64], dtype=tf.float32)
        # We convolve across the image with a stride size of 1
        conv1 = tf.nn.conv2d(input_images, conv1_kernel, [1, 1, 1, 1], padding='SAME')
        # Initialize and add the bias term
        conv1_bias = zero_var(name='conv_bias1', shape=[64], dtype=tf.float32)
        conv1_add_bias = tf.nn.bias_add(conv1, conv1_bias)
        # ReLU element wise
        relu_conv1 = tf.nn.relu(conv1_add_bias)
    
    # Max Pooling
    pool1 = tf.nn.max_pool(relu_conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],padding='SAME', name='pool_layer1')
    
    # Local Response Normalization (parameters from paper)
    # paper: http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks
    norm1 = tf.nn.lrn(pool1, depth_radius=5, bias=2.0, alpha=1e-3, beta=0.75, name='norm1')

    # Second Convolutional Layer
    with tf.variable_scope('conv2') as scope:
        # Conv kernel is 5x5, across all prior 64 features and we create 64 more features
        conv2_kernel = truncated_normal_var(name='conv_kernel2', shape=[5, 5, 64, 64], dtype=tf.float32)
        # Convolve filter across prior output with stride size of 1
        conv2 = tf.nn.conv2d(norm1, conv2_kernel, [1, 1, 1, 1], padding='SAME')
        # Initialize and add the bias
        conv2_bias = zero_var(name='conv_bias2', shape=[64], dtype=tf.float32)
        conv2_add_bias = tf.nn.bias_add(conv2, conv2_bias)
        # ReLU element wise
        relu_conv2 = tf.nn.relu(conv2_add_bias)
    
    # Max Pooling
    pool2 = tf.nn.max_pool(relu_conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool_layer2')    
    
     # Local Response Normalization (parameters from paper)
    norm2 = tf.nn.lrn(pool2, depth_radius=5, bias=2.0, alpha=1e-3, beta=0.75, name='norm2')
    
    # Reshape output into a single matrix for multiplication for the fully connected layers
    reshaped_output = tf.reshape(norm2, [batch_size, -1])
    reshaped_dim = reshaped_output.get_shape()[1].value
    

    
    # First Fully Connected Layer
    with tf.variable_scope('full1') as scope:
        # Fully connected layer will have 384 outputs.
        full_weight1 = truncated_normal_var(name='full_mult1', shape=[reshaped_dim, layer_hidden_1], dtype=tf.float32)
        full_bias1 = zero_var(name='full_bias1', shape=[384], dtype=tf.float32)
        full_layer1 = tf.nn.relu(tf.add(tf.matmul(reshaped_output, full_weight1), full_bias1))

    # Second Fully Connected Layer
    with tf.variable_scope('full2') as scope:
        # Second fully connected layer has 192 outputs.
        full_weight2 = truncated_normal_var(name='full_mult2', shape=[layer_hidden_1, layer_hidden_2], dtype=tf.float32)
        full_bias2 = zero_var(name='full_bias2', shape=[192], dtype=tf.float32)
        full_layer2 = tf.nn.relu(tf.add(tf.matmul(full_layer1, full_weight2), full_bias2))

    # Final Fully Connected Layer -> 10 categories for output (num_targets)
    with tf.variable_scope('full3') as scope:
        # Final fully connected layer has 10 (num_targets) outputs.
        full_weight3 = truncated_normal_var(name='full_mult3', shape=[layer_hidden_2, num_targets], dtype=tf.float32)
        full_bias3 =  zero_var(name='full_bias3', shape=[num_targets], dtype=tf.float32)
        final_output = tf.add(tf.matmul(full_layer2, full_weight3), full_bias3)
        
    return(final_output)

In [None]:
# Loss function
def model_loss(logits, targets):
    # Get rid of extra dimensions and cast targets into integers
    targets = tf.squeeze(tf.cast(targets, tf.int32))
    # Calculate cross entropy from logits and targets
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, targets)
    # Take the average loss across batch size
    cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
    return(cross_entropy_mean)

In [None]:
# Train step
def train_step(loss_value, generation_num):
    # Our learning rate is an exponential decay after we wait a fair number of generations
    model_learning_rate = tf.train.exponential_decay(learning_rate, generation_num,
                                                     num_gens_to_wait, lr_decay, staircase=True)
    # Create optimizer
    my_optimizer = tf.train.GradientDescentOptimizer(model_learning_rate)
    # Initialize train step
    train_step = my_optimizer.minimize(loss_value)
    return(train_step)

In [None]:
# Accuracy function
def accuracy_of_batch(logits, targets):
    # Make sure targets are integers and drop extra dimensions
    targets = tf.squeeze(tf.cast(targets, tf.int32))
    # Get predicted values by finding which logit is the greatest
    batch_predictions = tf.cast(tf.argmax(logits, 1), tf.int32)
    # Check if they are equal across the batch
    predicted_correctly = tf.equal(batch_predictions, targets)
    # Average the 1's and 0's (True's and False's) across the batch size
    accuracy = tf.reduce_mean(tf.cast(predicted_correctly, tf.float32))
    return(accuracy)


In [None]:
# Declare Model
print('Creating Model.')
with tf.variable_scope('model_definition') as scope:
    # Declare the training network model
    model_output = cnn_model(X_train, batch_size)
    # This is very important!!!  We must set the scope to REUSE the variables,
    #  otherwise, when we set the test network model, it will create new random
    #  variables.  Otherwise we get random evaluations on the test batches.
    scope.reuse_variables()
    test_output = cifar_cnn_model(test_images, batch_size)

## Neural Network

In [None]:
# Define Variable Functions (weights and bias)
def init_weight(shape, st_dev):
    weight = tf.Variable(tf.random_normal(shape, stddev=st_dev))
    return(weight)
    

def init_bias(shape, st_dev):
    bias = tf.Variable(tf.random_normal(shape, stddev=st_dev))
    return(bias)


# Create a fully connected layer:
def fully_connected(input_layer, weights, biases):
    layer = tf.add(tf.matmul(input_layer, weights), biases)
    return(tf.nn.relu(layer))

def fully_connected_layer(input_layer, layer_input, layer_output, st_dev=10.0):
    weight = init_weight(shape=[layer_input, layer_output], st_dev=st_dev)
    bias = init_bias(shape=[layer_output], st_dev=st_dev)
    layer = fully_connected(input_layer, weight, bias)
    return(layer)

def last_main_layer(input_layer, layer_input, layer_output, st_dev=10.0):
    weight = init_weight(shape=[layer_input, layer_output], st_dev=st_dev)
    bias = init_bias(shape=[layer_output], st_dev=st_dev)
    layer = tf.add(tf.matmul(input_layer, weight), bias)
    return(layer)


In [None]:
# Create graph session 
sess = tf.Session()

In [None]:
#define parameter
n_class = 2

n_features = 90
n_hidden1 = 250
n_hidden2 = 1000
n_hidden3 = 400
n_hidden4 = 350

dropout_rate = 0.25

In [None]:
# Create Placeholders
x_data = tf.placeholder(shape=[None, n_features], dtype=tf.float32)
y_target = tf.placeholder(shape=[None, n_class], dtype=tf.float32)

In [None]:
layer_1 = fully_connected_layer(x_data, n_features, n_hidden1)
layer_1 = tf.layers.dropout(layer_1, rate=dropout_rate)

layer_2 = fully_connected_layer(layer_1, n_hidden1, n_hidden2)
layer_2 = tf.layers.dropout(layer_2, rate=dropout_rate)

layer_3 = fully_connected_layer(layer_2, n_hidden2, n_hidden3)
layer_3 = tf.layers.dropout(layer_3, rate=dropout_rate)

layer_4 = fully_connected_layer(layer_3, n_hidden3, n_hidden4)
layer_4 = tf.layers.dropout(layer_4, rate=dropout_rate)

layer_last = last_main_layer(layer_4, n_hidden4, n_class)


In [None]:
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels= y_target, logits = layer_last))

In [None]:
# Declare optimizer
learning_rate = 0.05
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

In [None]:
# Initialize Variables
init = tf.global_variables_initializer()
sess.run(init)

In [None]:
y_train[rand_index].shape

In [None]:
# Training loop
batch_size = len(X_train)

loss_vec = []
test_loss = []
for i in range(3000):
    rand_index = np.random.choice(len(X_train), size=batch_size)
    rand_x = X_train[rand_index]
    rand_y = y_train[rand_index]
    sess.run(optimizer, feed_dict={x_data: rand_x, y_target: rand_y})

    temp_loss = sess.run(loss, feed_dict={x_data: rand_x, y_target: rand_y})
    loss_vec.append(temp_loss)
    
    test_temp_loss = sess.run(loss, feed_dict={x_data: X_test, y_target: y_test})
    test_loss.append(test_temp_loss)
    if (i+1)%50==0:
        print('Generation: ' + str(i+1) + '. Loss = ' + str(temp_loss))
        
        # Test model
        correct_prediction = tf.equal(tf.argmax(layer_last, 1), tf.argmax(y_target, 1))
        # Calculate accuracy
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        
        print("Accuracy_train:", sess.run(accuracy, feed_dict = {x_data: X_train, y_target: y_train}) )
        print("Accuracy_test:", sess.run(accuracy, feed_dict = {x_data: X_test, y_target: y_test}))
        

#         global result 
#         result = tf.argmax(pred, 1).eval({x: X_test, y: y_test})