In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '2'
import time
import pickle
import numpy as np
import pandas as pd
import skimage.io as io
import matplotlib.pyplot as plt
%matplotlib inline

import tensorflow as tf
tf.keras.backend.set_learning_phase(True)
import keras
from keras import backend as K
K.set_learning_phase(True)

from imgaug import augmenters as iaa
from datetime import datetime

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
FLAG_savedir = '/home/put_data/moth/code/cmchang/5_fold/'
FLAG_sfold = 5
FLAG_learning_rate = 2e-5
idx_fold = 3

In [3]:
with open(os.path.join(FLAG_savedir, 'Y_multilabel_dict.pickle'), 'rb') as handle:
    Y_dict = pickle.load(handle)
    
with open(os.path.join(FLAG_savedir, 'Y_num_mapping.pickle'), 'rb') as handle:
    Y_num_mapping = pickle.load(handle)

In [4]:
FLAG_model_save = '/home/put_data/moth/code/cmchang/TF_center_resnet_fold_{}_{}'.format(datetime.now().strftime('%Y%m%d'), idx_fold)
if not os.path.exists(FLAG_model_save):
    os.makedirs(FLAG_model_save)
    print('make a directory: {}'.format(FLAG_model_save))

In [5]:
Xtrain = pd.read_csv(os.path.join(FLAG_savedir, 'train_fold_{0}.csv'.format(idx_fold)))
Ytrain = np.vstack(Xtrain['Species'].apply(lambda x: Y_dict[x]))
Mtrain = np.vstack(Xtrain['Species'].apply(lambda x: Y_num_mapping[x]))

Xtest = pd.read_csv(os.path.join(FLAG_savedir, 'test_fold_{0}.csv'.format(idx_fold)))
Ytest = np.vstack(Xtest['Species'].apply(lambda x: Y_dict[x]))
Mtest = np.vstack(Xtest['Species'].apply(lambda x: Y_num_mapping[x]))

print('Xtrain.shape: {0}, Ytrain.shape: {1}'.format(Xtrain.shape, Ytrain.shape))
print('Xtest.shape: {0}, Ytest.shape: {1}'.format(Xtest.shape, Ytest.shape))

Xtrain.shape: (19888, 9), Ytrain.shape: (19888, 5)
Xtest.shape: (5387, 9), Ytest.shape: (5387, 5)


In [6]:
class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, labels, center_IDs=None, batch_size=32, dim=(256,256,3), n_classes=10, shuffle=True, img_preprocess=None, img_aug = None):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.center_IDs = center_IDs
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()
        self.indexes = list(range(0, len(self.list_IDs)))
        self.img_aug = img_aug
        self.img_preprocess = img_preprocess

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim))
        Y = np.empty((self.batch_size, self.n_classes), dtype=int)
        M = np.empty((self.batch_size), dtype=int)

        # Generate data
        for i, ID in enumerate(indexes):

            # Store sample
            X[i,] = io.imread(self.list_IDs[ID]).astype(float)
            
            # Store class
            Y[i,] = self.labels[ID]
        
        X = self.__data_preprocess(X)
        
        if self.img_aug is not None:
            X = self.img_aug.augment_images(X)
        
        if self.center_IDs is None:
            return X, Y
        else:
            for i, ID in enumerate(indexes):
                M[i] = self.center_IDs[ID]
            return X,Y,M

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)
            
    def __data_preprocess(self, img):
        if self.img_preprocess is None:
            processed_img = img/255.0
        else:
            processed_img = self.img_preprocess(img)
        return processed_img        

In [7]:
sometimes = lambda aug: iaa.Sometimes(0.8, aug)
augseq = iaa.Sequential([
    iaa.Fliplr(0.5),  # horizontally flip 50% of the images
    sometimes(iaa.Affine(
            scale={"x": (0.9, 1.1), "y": (0.9, 1.1)}, # scale images to 80-120% of their size, individually per axis
            translate_percent={"x": (-0.1, 0.1), "y": (-0.1, 0.1)}, # translate by -20 to +20 percent (per axis)
            rotate=(-30, 30), # rotate by -45 to +45 degrees
            cval=255 # if mode is constant, use a cval between 0 and 255
        ))
])

In [8]:
# Parameters
input_shape = (256, 256, 3)
n_classes = Ytest.shape[1]
batch_size = 32
n_hidden = 500

In [9]:
import tempfile
class myResNet(object):
    def __init__(self, input_shape=(256,256,3), n_classes=5, n_hidden=1000, scope_name="model"):
        self.input_shape = (None, *input_shape)
        self.n_classes = n_classes
        self.n_hidden = n_hidden
        self.scope_name = scope_name
    
    def build(self, centers=None, lambda_c = 0.0, keep_prob=1.0):
        with tf.Session() as sess:
            with tf.variable_scope(self.scope_name):
                self.x = tf.placeholder(shape=self.input_shape, dtype=tf.float32)
                self.y = tf.placeholder(shape=(None, self.n_classes), dtype=tf.float32) # one-hot encoding
                self.is_train = tf.placeholder(tf.bool)

                with tf.variable_scope("ResNet"):
                    extractor = tf.contrib.keras.applications.ResNet50(input_tensor=self.x, include_top=False, weights='imagenet', pooling='avg')
                self.feature = extractor.output
                feature_shape = self.feature.shape.as_list()

                self.pretrained_weights = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.scope_name+'/ResNet')
                with tempfile.NamedTemporaryFile() as f:
                    self.tf_checkpoint_path = tf.train.Saver(self.pretrained_weights).save(sess, f.name)
                
                conv_output = self.dropout_layer(self.feature, keep_prob=keep_prob)
                
                fc1_W = tf.get_variable(shape=(feature_shape[1], self.n_classes), initializer=tf.truncated_normal_initializer(mean=0, stddev=0.1), name="fc1_W", dtype=tf.float32)
                fc1_b = tf.get_variable(shape=(self.n_classes,), initializer=tf.truncated_normal_initializer(mean=0, stddev=0.1), name="fc1_b", dtype=tf.float32)
                logits = tf.nn.bias_add(tf.matmul(conv_output, fc1_W), fc1_b, name='logits')
                # fc1_output = tf.nn.bias_add(tf.matmul(conv_output, fc1_W), fc1_b, name='fc1_output')
                # fc1_output = tf.nn.relu(fc1_output)
                

#                 fc2_W = tf.get_variable(shape=(self.n_hidden, self.n_classes), initializer=tf.truncated_normal_initializer(mean=0, stddev=0.1), name="fc2_W", dtype=tf.float32)
#                 fc2_b = tf.get_variable(shape=(self.n_classes,), initializer=tf.truncated_normal_initializer(mean=0, stddev=0.1), name="fc2_b", dtype=tf.float32)
#                 logits = tf.nn.bias_add(tf.matmul(fc1_output, fc2_W), fc2_b, name='logits')

                self.probs = tf.nn.sigmoid(logits, name='probs')
                self.ce_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=self.y))

                self.para_dict = { l.name: l.output for l in extractor.layers }
#                 self.para_dict.update({'fc_1': [fc1_W, fc1_b], 'fc_2': [fc2_W, fc2_b]})
                self.para_dict.update({'fc_1': [fc1_W, fc1_b]})

                if centers is not None:
                    print("incorporate center_loss")
                    self.index = tf.placeholder(shape=(None,), dtype=tf.int32)

                    if isinstance(centers, tuple):
                        self.centers = tf.get_variable(shape=centers, initializer=tf.zeros_initializer(), name="centers", dtype=tf.float32, trainable=False)
                    elif isinstance(centers, np.ndarray):
                        self.centers = tf.get_variable(initializer=centers, name="centers", dtype=tf.float32, trainable=False)
                    else:
                        raise ValueError("please provide either centers' shape or pre-defined centers matrix in numpy.ndarray")

                    batch_centers = tf.gather(self.centers, self.index, axis=0) # batch,
                    self.center_loss = tf.nn.l2_loss(self.feature - batch_centers) # tf.reduce_sum(tf.reduce_mean(tf.square(tf.subtract(x=self.features, y=batch_centers)), axis=1))                

                    # update centers using this batch samples
                    diff = batch_centers - self.feature
                    unique_label, unique_idx, unique_count = tf.unique_with_counts(self.index)
                    appear_times = tf.gather(unique_count, unique_idx)
                    appear_times = tf.reshape(appear_times, [-1, 1])
                    diff = diff / tf.cast((1 + appear_times), tf.float32)
                    diff = 0.5 * diff
                    self.centers_update_op = tf.scatter_sub(self.centers, self.index, diff)
                    self.loss = self.ce_loss + lambda_c*self.ce_loss

                else:
                    self.loss = self.ce_loss
                    print("Not incorporate center_loss")

                self.accuracy = tf.reduce_mean(tf.cast(tf.equal(x=tf.to_int32(self.probs > 0.5), y=tf.to_int32(self.y)), tf.float32))
            # end of variable_scope(self.scope_name)
        # end of tf.Session()
        self.model_weights_tensors = set(self.pretrained_weights)
        
    def load_weights(self):
        sess = tf.get_default_session()
        tf.train.Saver(self.pretrained_weights).restore(sess, self.tf_checkpoint_path)
    
    def __getitem__(self, key):
        return self.para_dict[key]
    
    def dropout_layer(self, bottom, keep_prob):
        if self.is_train == True:
            return tf.nn.dropout(bottom, keep_prob=keep_prob)
        else:
            return bottom

In [11]:
scope_name = "model"

In [12]:
model = myResNet(scope_name=scope_name, input_shape=input_shape, n_classes=Ytest.shape[1], n_hidden=n_hidden)

In [13]:
model.build(centers=(len(Y_dict), 2048),lambda_c=0.02, keep_prob=0.5)

incorporate center_loss


In [14]:
def initialize_uninitialized(sess, scope_name=None):
    if scope_name is not None:
        var_list = [var for var in tf.global_variables() if scope_name in var.name]
    else:
        var_list = tf.global_variables()
    is_not_initialized = sess.run([tf.is_variable_initialized(var) for var in var_list])
    not_initialized_vars = [v for (v,f) in zip(var_list, is_not_initialized) if not f]
    if len(not_initialized_vars): 
            sess.run(tf.variables_initializer(not_initialized_vars))

In [None]:
params = {'dim': input_shape,
          'batch_size': batch_size,
          'n_classes': n_classes,
          'shuffle': True,
          'img_aug': augseq,
          'img_preprocess':  tf.contrib.keras.applications.resnet50.preprocess_input}

# Generators
training_generator   = DataGenerator(list_IDs = list(Xtrain['img_rmbg_path']), labels = Ytrain, center_IDs = Mtrain, **params)
validation_generator = DataGenerator(list_IDs = list(Xtest['img_rmbg_path']), labels = Ytest, center_IDs = Mtest, **params)

In [None]:
with tf.Session() as sess:
    model.load_weights()
    
    # trainable variables
    train_vars = list()
    for var in tf.trainable_variables():
        if model.scope_name in var.name:
            train_vars.append(var)
    
     # hyper parameters
    batch_size = 32
    epoch = 100
    early_stop_patience = 10
    min_delta = 0.0001
    
    # recorder
    epoch_counter = 0
    history = list()

    # Passing global_step to minimize() will increment it at each step.
    learning_rate = FLAG_learning_rate
    opt = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.5)
    
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) #使用內建的 batch normalization layer, 必須執行
    with tf.control_dependencies(update_ops):               #tf.GraphKeys.UPDATE_OPS 才會更新到 BN 層的 mean, variance
        train_op = opt.minimize(model.loss, var_list= train_vars)

    saver = tf.train.Saver(tf.global_variables(), max_to_keep=2)
    
    checkpoint_path = os.path.join(FLAG_model_save, 'model.ckpt')
    
    # reset due to adding a new task
    patience_counter = 0
    current_best_val_accu = 0
    
    n_train_batch = int(Xtrain.shape[0]/batch_size)
    n_valid_batch = int(Xtest.shape[0]/batch_size)
    
    initialize_uninitialized(sess)
    
    while(patience_counter < early_stop_patience and epoch_counter < epoch):
        stime = time.time()
        
        train_loss, train_accu = 0.0, 0.0
        for i in range(n_train_batch):
            print('training: {0}/{1}'.format(i+1, n_train_batch), end='\r')
            xbatch, ybatch, mbatch = training_generator[i]
            
            loss, accu, _, _ = sess.run([model.loss, model.accuracy, model.centers_update_op, train_op],
                                       feed_dict={model.x: xbatch,
                                                 model.y: ybatch,
                                                 model.index: mbatch,
                                                 model.is_train: True})
            train_loss += loss
            train_accu += accu
        
        train_loss = train_loss/n_train_batch
        train_accu = train_accu/n_train_batch
        
        valid_loss, valid_accu = 0.0, 0.0
        for i in range(n_valid_batch):
            print('validating: {0}/{1}'.format(i+1, n_valid_batch), end='\r')
            xbatch, ybatch, mbatch = validation_generator[i]
            
            loss, accu, = sess.run([model.loss, model.accuracy],
                                       feed_dict={model.x: xbatch,
                                                 model.y: ybatch,
                                                 model.index: mbatch,
                                                 model.is_train: False})
            valid_loss += loss
            valid_accu += accu
        
        valid_loss = valid_loss/n_valid_batch
        valid_accu = valid_accu/n_valid_batch
        
         # early stopping check
        if (valid_accu - current_best_val_accu) > min_delta:
            current_best_val_accu = valid_accu
            patience_counter = 0

#             para_dict = sess.run(model.para_dict)
#             np.save(os.path.join(FLAG_save_dir, "para_dict.npy"), para_dict)
#             print("save in %s" % os.path.join(FLAG_model_save, "para_dict.npy"))
            saver.save(sess, checkpoint_path, global_step=epoch_counter)
        else:
            patience_counter += 1
        
        training_generator.on_epoch_end()
        epoch_counter += 1
        
        print("Epoch %s (%s), %s sec >> train loss: %.4f, train accu: %.4f, val loss: %.4f, val accu: %.4f"% (epoch_counter, patience_counter, round(time.time()-stime,2), train_loss, train_accu, valid_loss, valid_accu))
        history.append([train_loss, train_accu, valid_loss, valid_accu])
        
        if epoch_counter % 10 == 0:
            df = pd.DataFrame(history)
            df.columns = ['train_loss', 'train_accu', 'val_loss', 'val_accu']
            df[['train_loss', 'val_loss']].plot()
            plt.savefig(os.path.join(FLAG_model_save, 'loss.png'))
            plt.close()
            df[['train_accu', 'val_accu']].plot()
            plt.savefig(os.path.join(FLAG_model_save, 'accu.png'))
            plt.close()
            
            df.to_csv(os.path.join(FLAG_model_save, "history.csv"))
    saver.save(sess, checkpoint_path, global_step=epoch_counter)
    
    centers = sess.run(model.centers)
    np.save(arr=centers,file=os.path.join(FLAG_model_save,"centers.npy"))

INFO:tensorflow:Restoring parameters from /tmp/tmp0xtnbqtn
Epoch 1 (0), 634.48 sec >> train loss: 0.5290, train accu: 0.7590, val loss: 0.4891, val accu: 0.7846
Epoch 2 (1), 630.8 sec >> train loss: 0.4294, train accu: 0.8101, val loss: 0.4895, val accu: 0.7802
Epoch 3 (2), 626.15 sec >> train loss: 0.3675, train accu: 0.8428, val loss: 0.5071, val accu: 0.7768
Epoch 4 (0), 628.86 sec >> train loss: 0.3229, train accu: 0.8653, val loss: 0.5290, val accu: 0.7907
Epoch 5 (1), 630.72 sec >> train loss: 0.2858, train accu: 0.8826, val loss: 0.5647, val accu: 0.7757
Epoch 6 (2), 629.81 sec >> train loss: 0.2532, train accu: 0.8959, val loss: 0.5744, val accu: 0.7761
Epoch 7 (3), 629.77 sec >> train loss: 0.2285, train accu: 0.9075, val loss: 0.5950, val accu: 0.7816
Epoch 8 (4), 628.24 sec >> train loss: 0.2040, train accu: 0.9172, val loss: 0.6593, val accu: 0.7735
Epoch 9 (5), 628.83 sec >> train loss: 0.1851, train accu: 0.9256, val loss: 0.6792, val accu: 0.7716
Epoch 10 (6), 627.13 sec

In [None]:
#     # Generators
#     training_generator   = DataGenerator(list_IDs = list(Xtrain['img_rmbg_path']), labels = Ytrain, center_IDs = Mtrain, **params)
#     validation_generator = DataGenerator(list_IDs = list(Xtest['img_rmbg_path']), labels = Ytest, center_IDs = Mtest, **params)


In [None]:
# img_input = Input(shape=input_shape)
# extractor = ResNet50(input_tensor=img_input, include_top=False, weights='imagenet', pooling='avg')
# dropout = Dropout(rate=0.5)(extractor.output)
# dense1 = Dense(1000, activation='relu', name='dense1')(dropout)
# dense1 = Dropout(rate=0.5)(dense1)
# output = Dense(n_classes, activation='sigmoid', name='output_layer')(dense1)

In [None]:
# params = {'dim': input_shape,
#           'batch_size': batch_size,
#           'n_classes': n_classes,
#           'shuffle': True,
#           'img_aug': augseq,
#           'img_preprocess': preprocess_input}
    
# if isCenterloss:
#     lambda_c = 0.02
#     input_target = Input(shape=(1,)) # single value ground truth labels as inputs
#     centers = Embedding(input_dim=1, input_length=len(Y_dict), output_dim=int(extractor.output.get_shape()[1]))(input_target)
#     l2_loss = Lambda(lambda x: K.sum(K.square(x[0]-x[1][:,0]),1,keepdims=True),name='l2_loss')([extractor.output, centers])
#     model = Model(inputs=[img_input,input_target],outputs=[output, l2_loss])        
#     model.compile(optimizer=Adam(lr=1e-5, beta_1=0.5), 
#                              loss=["binary_crossentropy", lambda y_true,y_pred: y_pred],
#                              loss_weights=[1,lambda_c],
#                              metrics=['accuracy'])
    
#     # Generators
#     training_generator   = DataGenerator(list_IDs = list(Xtrain['img_rmbg_path']), labels = Ytrain, center_IDs = Mtrain, **params)
#     validation_generator = DataGenerator(list_IDs = list(Xtest['img_rmbg_path']), labels = Ytest, center_IDs = Mtest, **params)

# else:
#     model = Model(inputs=img_input, outputs=output)
#     model.compile(optimizer=Adam(lr=1e-5, beta_1=0.5), 
#                   loss="binary_crossentropy",
#                   metrics=["accuracy"])
    
#         # Generators
#     training_generator   = DataGenerator(list_IDs = list(Xtrain['img_rmbg_path']), labels = Ytrain, center_IDs = None, **params)
#     validation_generator = DataGenerator(list_IDs = list(Xtest['img_rmbg_path']), labels = Ytest, center_IDs = None, **params)

In [None]:
# model.summary()

In [None]:
# csv_logger = keras.callbacks.CSVLogger(os.path.join(FLAG_model_save, 'training.log'))
# checkpoint = keras.callbacks.ModelCheckpoint(os.path.join(FLAG_model_save, 'model.h5'), 
#                                              monitor='val_loss', 
#                                              verbose=1, 
#                                              save_best_only=True,
#                                              save_weights_only=False,
#                                              mode='min',
#                                              period=1)

In [None]:
# # Train model on dataset
# model.fit_generator(generator=training_generator,
#                    validation_data=validation_generator,
#                    use_multiprocessing=True,
#                    steps_per_epoch=Xtrain.shape[0]/batch_size, 
#                    validation_steps=Xtest.shape[0]/batch_size,
#                    workers=6,
#                    epochs=50,
#                    callbacks=[csv_logger, checkpoint])