# Read me

Unsupervised autoencoder learning by using all the images (train + test)

This autoencoder is constructed based on residual network

Input images: 128 x 128

Output = Input images

embedding length: 128


In [1]:
import tensorflow as tf
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from PIL import Image
import matplotlib.gridspec as gridspec
import numpy as np
import os
import cv2
import pandas as pd
from math import floor, ceil, pi

%matplotlib inline

# Load paths of all the images

In [2]:
def get_image_paths(folder):
#     folder = './data/whale/train_full'
    files = os.listdir(folder)
    files.sort()
    files = ['{}/{}'.format(folder, file) for file in files]
    return files

X_img_paths_train = get_image_paths('./data/whale/train_full')
X_img_paths_test = get_image_paths('./data/whale/test')
X_img_paths = X_img_paths_train + X_img_paths_test
print "total image number: %d" % len(X_img_paths)
print(X_img_paths[:20])

total image number: 25460
['./data/whale/train_full/00022e1a.jpg', './data/whale/train_full/000466c4.jpg', './data/whale/train_full/00087b01.jpg', './data/whale/train_full/001296d5.jpg', './data/whale/train_full/0014cfdf.jpg', './data/whale/train_full/0025e8c2.jpg', './data/whale/train_full/0026a8ab.jpg', './data/whale/train_full/0031c258.jpg', './data/whale/train_full/0035632e.jpg', './data/whale/train_full/0037e7d3.jpg', './data/whale/train_full/00389cd7.jpg', './data/whale/train_full/0042dcc4.jpg', './data/whale/train_full/0042ea34.jpg', './data/whale/train_full/00467ae9.jpg', './data/whale/train_full/004a97f3.jpg', './data/whale/train_full/004c5fb9.jpg', './data/whale/train_full/005c57e7.jpg', './data/whale/train_full/006d0aaf.jpg', './data/whale/train_full/0078af23.jpg', './data/whale/train_full/007c3603.jpg']


# Data augmentation: to do

In [3]:
def data_augmentation(original_image):
    #to do
    
    return original_image

# convert to gray scale

In [4]:
def rgb2gray(rgb):
    return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])

# image resize and augmentation

In [5]:
IMAGE_SIZE = 128

def tf_resize_augment_images(X_img_file_paths):
    X_data = []
    tf.reset_default_graph()
    X = tf.placeholder(tf.float32, (None, None, 1))
    tf_img = tf.image.resize_images(X, (IMAGE_SIZE, IMAGE_SIZE), tf.image.ResizeMethod.NEAREST_NEIGHBOR)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        
        # Each image is resized individually as different image may be of different size.
        for index, file_path in enumerate(X_img_file_paths):
            img = mpimg.imread(file_path)
            if len(img.shape) > 2:# convert to grayscale
                img = rgb2gray(img)
            img = img.reshape(img.shape[0], img.shape[1], 1)
            resized_img = sess.run(tf_img, feed_dict = {X: img})
            X_data.append(data_augmentation(resized_img))

    X_data = np.array(X_data, dtype = np.float32) # Convert to numpy
    return X_data

X_imgs = tf_resize_augment_images(X_img_paths)
print(X_imgs.shape)

(25460, 128, 128, 1)


# save processed images and labels

In [6]:
np.save('./data/whale/save/resize_all.npy', X_imgs)

# load data

In [7]:
X_imgs = np.load('./data/whale/save/resize_all.npy')

# keras import

In [10]:
from keras.layers import Input, Conv2D, Lambda, subtract, merge, Dense, Flatten
from keras.layers import MaxPooling2D, BatchNormalization, LeakyReLU, Activation, add, Conv2DTranspose
from keras.models import Model, Sequential
from keras.regularizers import l2
from keras import backend as K
from keras.optimizers import SGD,Adam
from keras.losses import binary_crossentropy
from keras.utils.training_utils import multi_gpu_model

K.clear_session()
tf.reset_default_graph()

# split data

In [12]:
test_ratio = 0.2
image_train = X_imgs[:int((1 - test_ratio) * X_imgs.shape[0])]
image_test = X_imgs[int((1 - test_ratio) * X_imgs.shape[0]):]


# Data normalization (subtract mean)

In [13]:
def image_normalize(image_train, image_test):
    mean = image_train.mean(axis=0, keepdims=1)
    image_train -= mean
    image_test -= mean
    return image_train, image_test

image_train, image_test = image_normalize(image_train, image_test)

# define residual network block for encoding

In [19]:
def resNet_block_encoder(image, channel, strides):
    net = Conv2D(channel, kernel_size=(3, 3), strides=(strides, strides), kernel_initializer="he_normal", padding='same')(image)
    net = BatchNormalization()(net)
    net = Activation('relu')(net)
    net = Conv2D(channel, kernel_size=(3, 3), strides=(1, 1), kernel_initializer="he_normal", padding='same')(net)
    
    if strides > 1:
        image = Conv2D(channel, kernel_size=(3, 3), strides=(strides, strides), kernel_initializer="he_normal", padding='same')(image)
        
    net = add([image, net])
    net = BatchNormalization()(net)
    net = Activation('relu')(net)
    
    return net


# define residual network block for decoding

In [20]:
def resNet_block_decoder(image, channel, strides, up=1):
    net = Conv2DTranspose(channel, kernel_size=(3, 3), strides=(strides, strides), kernel_initializer="he_normal", padding='same')(image)
    net = BatchNormalization()(net)
    net = Activation('relu')(net)
    net = Conv2DTranspose(channel, kernel_size=(3, 3), strides=(1, 1), kernel_initializer="he_normal", padding='same')(net)
    
    if strides > 1 or up > 1:
        image = Conv2DTranspose(channel, kernel_size=(3, 3), strides=(strides, strides), kernel_initializer="he_normal", padding='same')(image)
        
    net = add([image, net])
    net = BatchNormalization()(net)
    net = Activation('relu')(net)
    
    return net

# define encoder

In [21]:
#input shape: 128 x 128 x 1   output shape: 4 x 4 x 32
def encoding(image):
    net = Conv2D(128, kernel_size=(7, 7), strides=(2, 2), kernel_initializer="he_normal", padding='same')(image)
    net = BatchNormalization()(net)
    net = Activation('relu')(net)  
    net = MaxPooling2D()(net)
    
    net = resNet_block_encoder(net, 128, 1)
    net = resNet_block_encoder(net, 128, 1)
    net = resNet_block_encoder(net, 128, 1)
    
    net = resNet_block_encoder(net, 64, 2)
    net = resNet_block_encoder(net, 64, 1)
    net = resNet_block_encoder(net, 64, 1)
    
    net = resNet_block_encoder(net, 32, 2)
    net = resNet_block_encoder(net, 32, 1)
    net = resNet_block_encoder(net, 32, 1)
    
    embedding = MaxPooling2D()(net)
    
    return embedding

# define decoder

In [22]:
def decoding(embedding):    
    net = resNet_block_decoder(embedding, 32, 1)
    net = resNet_block_decoder(net, 32, 1)
    net = resNet_block_decoder(net, 32, 2)
    
    net = resNet_block_decoder(net, 64, 1, up=64/32)
    net = resNet_block_decoder(net, 64, 1)
    net = resNet_block_decoder(net, 64, 2)
    
    net = resNet_block_decoder(net, 128, 1, up=128/64)
    net = resNet_block_decoder(net, 128, 1)
    net = resNet_block_decoder(net, 128, 1)
    net = UpSampling2D()(net)
    
    net = Conv2DTranspose(128, kernel_size=(7, 7), strides=(2, 2), kernel_initializer="he_normal", padding='same')(net)
    net = BatchNormalization()(net)
    net = Activation('relu')(net)  
    net = UpSampling2D()(net)
    
    net = Conv2DTranspose(1, kernel_size=(5, 5), strides=(1, 1), kernel_initializer="he_normal", padding='same')(net)
    
    return net

# define autoencoder

In [23]:
learning_rate = 0.00001
input_shape = (IMAGE_SIZE, IMAGE_SIZE, 1)
inputs = Input(input_shape)
G = 1 #the number of GPU

def autoencoder(image):
    encoded = encoding(image)
    decoded = decoding(encoded)
    if G <= 1:
        print("[INFO] training with 1 GPU...")
        autoencoder_net = Model(input=image, output=decoded)
    # otherwise, we are compiling using multiple GPUs
    else:
        print("[INFO] training with {} GPUs...".format(G))

        # we'll store a copy of the model on *every* GPU and then combine
        # the results from the gradient updates on the CPU
        with tf.device("/cpu:0"):
            # initialize the model
            autoencoder_net = Model(input=image, output=decoded)

        # make the model parallel
        autoencoder_net = multi_gpu_model(autoencoder_net, gpus=G)    
    
    optimizer = Adam(learning_rate)
    autoencoder_net.compile(loss="mean_squared_error", optimizer=optimizer)
    return autoencoder_net

autoencoder_net = autoencoder(inputs)

[INFO] training with 1 GPU...


  # This is added back by InteractiveShellApp.init_path()


# start training

In [24]:
n_epochs = 500
batch_size = 40
n_batches = image_train.shape[0] // batch_size

for epoch in range(n_epochs):    
    idx = np.random.permutation(image_train.shape[0])
    X_batches = np.array_split(image_train[idx], n_batches)

    for X_batch, y_batch in zip(X_batches, X_batches):
        loss_train = autoencoder_net.train_on_batch(X_batch,y_batch)

    loss_test = autoencoder_net.evaluate(x=image_test, y=image_test, batch_size=batch_size, verbose=0)
    
    print "epoch:{}\tloss_train:{:.7f}\tloss_test:{:.7f}".format(epoch, loss_train, loss_test)

epoch:0	loss_train:2405.7246094	loss_test:2695.7152473
epoch:1	loss_train:2388.4062500	loss_test:2270.2618942
epoch:2	loss_train:1748.1499023	loss_test:2042.1111452
epoch:3	loss_train:1784.1038818	loss_test:1894.9394279
epoch:4	loss_train:1926.6647949	loss_test:1816.1314510
epoch:5	loss_train:1814.8316650	loss_test:1721.0979462
epoch:6	loss_train:1565.5454102	loss_test:1660.2603937
epoch:7	loss_train:1376.7740479	loss_test:1610.7684163
epoch:8	loss_train:1609.4069824	loss_test:1560.9160361
epoch:9	loss_train:1453.6972656	loss_test:1519.3290659
epoch:10	loss_train:1492.0303955	loss_test:1487.7703094
epoch:11	loss_train:1304.2666016	loss_test:1448.1948216
epoch:12	loss_train:1583.6809082	loss_test:1425.2619197
epoch:13	loss_train:1349.9971924	loss_test:1395.2845746
epoch:14	loss_train:1686.7138672	loss_test:1373.3602706
epoch:15	loss_train:1550.3935547	loss_test:1348.0502297
epoch:16	loss_train:1575.4797363	loss_test:1328.6479921
epoch:17	loss_train:1375.7734375	loss_test:1311.5178743
ep

epoch:149	loss_train:701.4777222	loss_test:870.8556516
epoch:150	loss_train:793.2331543	loss_test:878.9563057
epoch:151	loss_train:766.1208496	loss_test:867.1112868
epoch:152	loss_train:827.8177490	loss_test:865.5299544
epoch:153	loss_train:881.1845093	loss_test:864.8745275
epoch:154	loss_train:675.4345703	loss_test:868.6087112
epoch:155	loss_train:740.6918945	loss_test:863.8675621
epoch:156	loss_train:774.9656372	loss_test:865.4727499
epoch:157	loss_train:819.3218384	loss_test:867.3249879
epoch:158	loss_train:733.2232666	loss_test:862.8026135
epoch:159	loss_train:661.9528809	loss_test:858.5976666
epoch:160	loss_train:868.3150024	loss_test:862.1864475
epoch:161	loss_train:841.5212402	loss_test:863.7725058
epoch:162	loss_train:781.5083618	loss_test:856.5975025
epoch:163	loss_train:825.5590820	loss_test:860.7132881
epoch:164	loss_train:668.8458252	loss_test:857.1812612
epoch:165	loss_train:773.5233765	loss_test:854.6895583
epoch:166	loss_train:796.5145264	loss_test:857.5172094
epoch:167	

epoch:298	loss_train:651.0891113	loss_test:808.8147186
epoch:299	loss_train:745.0322876	loss_test:807.3714649
epoch:300	loss_train:707.4176636	loss_test:811.6813346
epoch:301	loss_train:683.4341431	loss_test:804.5750343
epoch:302	loss_train:664.7726440	loss_test:804.6431766
epoch:303	loss_train:724.0640869	loss_test:804.5756947
epoch:304	loss_train:711.9898071	loss_test:805.9627055
epoch:305	loss_train:729.6705322	loss_test:803.1865328
epoch:306	loss_train:730.4527588	loss_test:805.6608259
epoch:307	loss_train:796.9467773	loss_test:801.7468322
epoch:308	loss_train:700.6132202	loss_test:802.7708411
epoch:309	loss_train:665.0058594	loss_test:801.4538350
epoch:310	loss_train:648.5581665	loss_test:805.2754225
epoch:311	loss_train:616.7827148	loss_test:803.2809330
epoch:312	loss_train:796.3297119	loss_test:812.6516915
epoch:313	loss_train:708.6414795	loss_test:802.3506401
epoch:314	loss_train:753.5400391	loss_test:806.6609855
epoch:315	loss_train:644.8314819	loss_test:801.1803428
epoch:316	

epoch:447	loss_train:563.1577759	loss_test:784.9112033
epoch:448	loss_train:620.8226318	loss_test:786.4559504
epoch:449	loss_train:649.9707642	loss_test:784.6791923
epoch:450	loss_train:765.6481934	loss_test:784.3764525
epoch:451	loss_train:593.4718018	loss_test:780.9390576
epoch:452	loss_train:759.4017334	loss_test:787.5501464
epoch:453	loss_train:589.9196777	loss_test:780.2774796
epoch:454	loss_train:760.7128296	loss_test:781.3045664
epoch:455	loss_train:803.3821411	loss_test:789.3434040
epoch:456	loss_train:587.8258057	loss_test:779.7917665
epoch:457	loss_train:560.0026855	loss_test:787.0312308
epoch:458	loss_train:635.1875610	loss_test:780.8481863
epoch:459	loss_train:658.9741821	loss_test:780.8592845
epoch:460	loss_train:638.4605713	loss_test:782.7229016
epoch:461	loss_train:634.9147339	loss_test:781.0644456
epoch:462	loss_train:685.2268677	loss_test:779.5339850
epoch:463	loss_train:755.0346680	loss_test:779.3602121
epoch:464	loss_train:788.3948364	loss_test:782.9997714
epoch:465	

# Save model

In [25]:
autoencoder_net.save('./data/whale/autoencoder_model.h5')  # creates a HDF5 file 'my_model.h5'
# del autoencoder_net  # deletes the existing model

# Reload model

In [26]:
from keras.models import load_model
autoencoder_net = load_model('./data/whale/autoencoder_model.h5')