https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html

https://github.com/keras-team/keras/issues/1538

https://gogul09.github.io/software/flower-recognition-deep-learning

In [1]:
# import os
# os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [2]:
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
# config.gpu_options.per_process_gpu_memory_fraction = 0.3

from keras.backend.tensorflow_backend import set_session
set_session(tf.Session(config=config))

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

In [4]:
nb_train_samples = 20000
nb_validation_samples = 2500
epochs = 50
batch_size = 16


img_width=299
img_height=299


### pre-trained model

In [5]:
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras import applications
from keras.layers import GlobalAveragePooling2D
from keras.models import Model


from keras.callbacks import TensorBoard
import math





# dimensions of our images.
img_width, img_height = 299, 299

top_model_weights_path = 'model/inceptionV3_model.h5'
train_data_dir = 'train/train'
validation_data_dir = 'train/validation'


def save_bottlebeck_features():
    datagen = ImageDataGenerator(rescale=1. / 255)

    # build the VGG16 network
    body = applications.InceptionV3(include_top=False, weights='imagenet', input_shape=(img_width, img_height, 3))
    
    head = body.output
    head = GlobalAveragePooling2D()(head)
    model = Model(body.input, head)    
    

    generator = datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode='binary',
        shuffle=False)
    
    bottleneck_features_train = model.predict_generator(
        generator, math.ceil(nb_train_samples / batch_size),
        verbose=1)
    
    np.save('model/inceptionV3_train_pool.npy',
            bottleneck_features_train)

    
    generator = datagen.flow_from_directory(
        validation_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode='binary',
        shuffle=False)
    
    bottleneck_features_validation = model.predict_generator(
        generator, math.ceil(nb_validation_samples / batch_size),
        verbose=1)
    
    np.save('model/inceptionV3_valid_pool.npy',
            bottleneck_features_validation)




#save_bottlebeck_features()
#train_top_model()

In [6]:
save_bottlebeck_features()

Found 20000 images belonging to 2 classes.
Found 2500 images belonging to 2 classes.


In [9]:
ep = np.load('model/inceptionV3_train_pool.npy')
ep.shape

(20000, 2048)

## from here

In [13]:
from keras.callbacks import ModelCheckpoint, EarlyStopping
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras import applications
import math
from keras.optimizers import RMSprop
from time import time

from keras.callbacks import TensorBoard

In [11]:
nb_train_samples = 20000
nb_validation_samples = 2500
epochs = 50
batch_size = 16


In [12]:
# learning_rate = 0.2
# decay_rate = learning_rate / epochs
# momentum = 0.8
# rmsprop = RMSprop(lr=learning_rate, decay=decay_rate)

In [18]:
def train_top_model():
    train_data = np.load('model/inceptionV3_train_pool.npy')
    train_labels = np.array(
        [0] * (nb_train_samples // 2) + [1] * (nb_train_samples // 2))

    validation_data = np.load('model/inceptionV3_valid_pool.npy')
    validation_labels = np.array(
        [0] * (nb_validation_samples // 2) + [1] * (nb_validation_samples // 2))

    model = Sequential()
    #model.add(Flatten(input_shape=train_data.shape[1:]))
    model.add(Dense(256, activation='relu', input_shape=train_data.shape[1:]))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='adam',
                  loss='binary_crossentropy', metrics=['accuracy'])
    
    filepath="f_inceptionV3_model-{epoch:02d}-{val_acc:.4f}.hdf5"
    
    callbacks_list = [
    ModelCheckpoint('model/'+filepath, monitor='val_acc', verbose=1, save_best_only=True),
    EarlyStopping(monitor='val_acc', patience=10, verbose=0),
    TensorBoard(log_dir="logs/{}".format(time()))
]


    model.fit(train_data, train_labels,
              epochs=epochs,
              batch_size=batch_size,
              validation_data=(validation_data, validation_labels),
              callbacks=callbacks_list)
    model.save_weights(top_model_weights_path)

In [19]:
# save_bottlebeck_features()
train_top_model()

Train on 20000 samples, validate on 2500 samples
Epoch 1/50

Epoch 00001: val_acc improved from -inf to 0.99520, saving model to model/f_inceptionV3_model-01-0.9952.hdf5
Epoch 2/50

Epoch 00002: val_acc did not improve
Epoch 3/50

Epoch 00003: val_acc improved from 0.99520 to 0.99560, saving model to model/f_inceptionV3_model-03-0.9956.hdf5
Epoch 4/50

Epoch 00004: val_acc did not improve
Epoch 5/50

Epoch 00005: val_acc did not improve
Epoch 6/50

Epoch 00006: val_acc did not improve
Epoch 7/50

Epoch 00007: val_acc improved from 0.99560 to 0.99640, saving model to model/f_inceptionV3_model-07-0.9964.hdf5
Epoch 8/50

Epoch 00008: val_acc did not improve
Epoch 9/50

Epoch 00009: val_acc did not improve
Epoch 10/50

Epoch 00010: val_acc did not improve
Epoch 11/50

Epoch 00011: val_acc did not improve
Epoch 12/50

Epoch 00012: val_acc did not improve
Epoch 13/50

Epoch 00013: val_acc did not improve
Epoch 14/50

Epoch 00014: val_acc did not improve
Epoch 15/50

Epoch 00015: val_acc did 

### prdict

In [21]:
import keras

In [30]:
body = applications.InceptionV3(include_top=False, weights='imagenet', input_shape=(img_width, img_height, 3))

head = body.output
head = GlobalAveragePooling2D()(head)
model = Model(body.input, head)    

In [23]:
%%time
final = keras.models.load_model('./model/f_inceptionV3_model-07-0.9964.hdf5')

Wall time: 3.02 s


### predict helper func

In [31]:
def helper(start, end):
    result = []
    for p in range(start,end+1):

        result.append([p,final.predict_proba(
            model.predict(
                img_to_array(load_img('./test/test/{}.jpg'.format(p), 
                                      target_size=(299,299,3)
                                     )).reshape(-1,299,299,3)/255.0
            ))[0][0]])
    return result

In [32]:
import pandas as pd

In [33]:
%%time
df = pd.DataFrame(helper(1,12500))
print (df.shape)

(12500, 2)
Wall time: 5min 58s


In [34]:
df.columns=['id','label']

In [35]:
df.to_csv('pool_transfer_mobelnet.csv',index=False)

In [46]:
def helper2(start, end):
    result = []
    for p in range(start,end+1):

        result.append(final.predict_proba(
            pre_model.predict(
                img_to_array(load_img('./test1/test/{}.jpg'.format(p), 
                                      target_size=(224,224,3)
                                     )).reshape(-1,224,224,3)/255.0
            ))[0][0])
    return result

In [350]:
%%time
df2 = pd.DataFrame(helper2(0,999))
print (df2.shape)

(1000, 1)
Wall time: 12.1 s


In [351]:
df2['file']=df2.index
df2['file'] = df2['file'].apply(lambda x: str(x)+'.jpg')


In [352]:
df2.columns=['label','file']
df2 = df2[['file','label']]

In [353]:
df2['label'] = (df2['label']<0.5).map({True:'cat', False:'dog'})

In [355]:
df2.to_csv('transfersmall2.csv', index=False)

In [354]:
df2

Unnamed: 0,file,label
0,0.jpg,cat
1,1.jpg,cat
2,2.jpg,cat
3,3.jpg,cat
4,4.jpg,dog
5,5.jpg,dog
6,6.jpg,dog
7,7.jpg,dog
8,8.jpg,cat
9,9.jpg,cat
