https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html

https://github.com/keras-team/keras/issues/1538

https://gogul09.github.io/software/flower-recognition-deep-learning

In [53]:
# import os
# os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

In [54]:
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
# config.gpu_options.per_process_gpu_memory_fraction = 0.3

from keras.backend.tensorflow_backend import set_session
set_session(tf.Session(config=config))

In [55]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

In [56]:
gen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest',
        rescale=1./255)

https://keras.io/preprocessing/image/

https://keras.io/applications/#mobilenet

In [57]:
batch_size=16

In [58]:
train_generator =  gen.flow_from_directory(
    directory = 'train/train',
    target_size=(224,224),
    batch_size=batch_size,
    class_mode='binary',
    #save_to_dir='preview'
)

Found 20000 images belonging to 2 classes.


In [59]:
validation_generator = gen.flow_from_directory(
        directory = 'train/validation',
        target_size=(224, 224),
        batch_size=batch_size,
        class_mode='binary')

Found 2500 images belonging to 2 classes.


In [60]:
test_generator = gen.flow_from_directory(
        directory = 'train/test',
        target_size=(224, 224),
        batch_size=batch_size,
        class_mode='binary')

Found 2500 images belonging to 2 classes.


In [61]:
nb_train_samples = 20000
nb_validation_samples = 2500
epochs = 50
batch_size = 16


### pre-trained model

In [62]:
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras import applications
import math




# dimensions of our images.
img_width, img_height = 224, 224

top_model_weights_path = 'model/bottleneck_fc_model.h5'
train_data_dir = 'train/train'
validation_data_dir = 'train/validation'


def save_bottlebeck_features():
    datagen = ImageDataGenerator(rescale=1. / 255)

    # build the VGG16 network
    model = applications.MobileNet(include_top=False, weights='imagenet', input_shape=(img_width, img_height, 3))

    generator = datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode='binary',
        shuffle=False)
    
    bottleneck_features_train = model.predict_generator(
        generator, math.ceil(nb_train_samples / batch_size),
        verbose=1)
    
    np.save('model/bottleneck_features_train.npy',
            bottleneck_features_train)

    
    generator = datagen.flow_from_directory(
        validation_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode='binary',
        shuffle=False)
    
    bottleneck_features_validation = model.predict_generator(
        generator, math.ceil(nb_validation_samples / batch_size),
        verbose=1)
    
    np.save('model/bottleneck_features_validation.npy',
            bottleneck_features_validation)




#save_bottlebeck_features()
#train_top_model()

## from here

In [63]:
from keras.callbacks import ModelCheckpoint, EarlyStopping
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras import applications
import math
from keras.optimizers import RMSprop

In [64]:
nb_train_samples = 20000
nb_validation_samples = 2500
epochs = 50
batch_size = 16


In [None]:
# learning_rate = 0.2
# decay_rate = learning_rate / epochs
# momentum = 0.8
# rmsprop = RMSprop(lr=learning_rate, decay=decay_rate)

In [76]:
def train_top_model():
    train_data = np.load('model/bottleneck_features_train.npy')
    train_labels = np.array(
        [0] * (nb_train_samples // 2) + [1] * (nb_train_samples // 2))

    validation_data = np.load('model/bottleneck_features_validation.npy')
    validation_labels = np.array(
        [0] * (nb_validation_samples // 2) + [1] * (nb_validation_samples // 2))

    model = Sequential()
    model.add(Flatten(input_shape=train_data.shape[1:]))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.4))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='adam',
                  loss='binary_crossentropy', metrics=['accuracy'])
    
    filepath="deeper1-{epoch:02d}-{val_acc:.4f}.hdf5"
    
    callbacks_list = [
    ModelCheckpoint('model/'+filepath, monitor='val_acc', verbose=1, save_best_only=True),
    EarlyStopping(monitor='val_acc', patience=10, verbose=0)
]


    model.fit(train_data, train_labels,
              epochs=epochs,
              batch_size=batch_size,
              validation_data=(validation_data, validation_labels),
              callbacks=callbacks_list)
    model.save_weights(top_model_weights_path)

In [77]:
# save_bottlebeck_features()
train_top_model()

Train on 20000 samples, validate on 2500 samples
Epoch 1/50

Epoch 00001: val_acc improved from -inf to 0.97680, saving model to model/deeper1-01-0.9768.hdf5
Epoch 2/50
 1792/20000 [=>............................] - ETA: 21s - loss: 0.8844 - acc: 0.9425

KeyboardInterrupt: 

In [None]:
# train_top_model()

In [None]:
# datagen = ImageDataGenerator(rescale=1. / 255)

# # build the VGG16 network
# model = applications.MobileNet(include_top=False, weights='imagenet', input_shape=(img_width, img_height, 3))

# generator = datagen.flow_from_directory(
#     train_data_dir,
#     target_size=(img_width, img_height),
#     batch_size=batch_size,
#     class_mode='binary',
#     shuffle=False)

# bottleneck_features_train = model.predict_generator(
#     generator, nb_train_samples // batch_size,
#     verbose=1)


In [23]:
type(bottleneck_features_train)

numpy.ndarray

In [24]:
bottleneck_features_train.shape

(20000, 7, 7, 1024)

In [27]:
%%time
np.save('model/bottleneck_features_train.npy',bottleneck_features_train)

Wall time: 11.1 s


In [32]:
bottleneck_features_train.shape[1:]

(7, 7, 1024)

In [36]:
nb_train_samples

20000

In [39]:
np.array([0] * (nb_train_samples // 2) + [1] * (nb_train_samples // 2))

array([0, 0, 0, ..., 1, 1, 1])

In [42]:
%time
vd = np.load('model/bottleneck_features_validation.npy')

Wall time: 0 ns


In [44]:
vd.shape

(2496, 7, 7, 1024)

In [47]:
generator = datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary',
    shuffle=False)

bottleneck_features_validation = model.predict_generator(
    generator, nb_validation_samples // batch_size,
    verbose=1)

# np.save('model/bottleneck_features_validation.npy',
#         bottleneck_features_validation)

Found 2500 images belonging to 2 classes.


In [57]:
math.ceil(2500/16)

1250

### prdict

In [229]:
import keras

In [258]:
img_width, img_height

(224, 224)

In [240]:
datagen.flow_from_directory?

In [241]:
datagen = ImageDataGenerator(rescale=1. / 255)

generator = datagen.flow_from_directory(
    './test0',
    target_size=(img_width, img_height),
    batch_size=1,
    class_mode=None,
    shuffle=False)



Found 1000 images belonging to 1 classes.


In [242]:
pre_model = keras.applications.MobileNet(include_top=False, weights='imagenet', input_shape=(img_width, img_height, 3))

In [248]:
test_array.shape

(1000, 7, 7, 1024)

In [249]:
%%time
final = keras.models.load_model('./model/bottleneck_fc_model-31-0.9872.hdf5')

Wall time: 5.34 s


In [250]:
test_array[0].shape

(7, 7, 1024)

In [251]:
test_array[0].reshape(-1,7,7,1024).shape

(1, 7, 7, 1024)

In [252]:
final.predict_on_batch(test_array[:10])

array([[0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [1.],
       [1.],
       [1.],
       [1.]], dtype=float32)

In [253]:
final.predict_proba(test_array[1].reshape(-1,7,7,1024))

array([[0.]], dtype=float32)

In [254]:
final.predict_proba(test_array[0:10])

array([[0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [1.],
       [1.],
       [1.],
       [1.]], dtype=float32)

### predict helper func

In [357]:
def helper(start, end):
    result = []
    for p in range(start,end+1):

        result.append([p,final.predict_proba(
            pre_model.predict(
                img_to_array(load_img('./test/test/{}.jpg'.format(p), 
                                      target_size=(224,224,3)
                                     )).reshape(-1,224,224,3)/255.0
            ))[0][0]])
    return result

In [358]:
# def helper_batch(start, end):
#     result = []
#     array=[]
#     for p in range(start,end+1):
#         array.append(
#         img_to_array(load_img('./test/test/{}.jpg'.format(p), 
#                                       target_size=(224,224,3)
#                                      ))/255.0)
            


#         result.append(final.predict_proba(
#             pre_model.predict(
#                 )            
            
#     return result

In [359]:
import pandas as pd

In [364]:
%%time
df = pd.DataFrame(helper(1,12500))
print (df.shape)

(12500, 2)
Wall time: 2min 29s


In [365]:
df.columns=['id','label']

In [372]:
final.predict_proba == final.predict_proba

True

In [373]:
# df['label']

In [376]:
df.to_csv('transfer_mobelnet3.csv',index=False)

In [337]:
def helper2(start, end):
    result = []
    for p in range(start,end+1):

        result.append(final.predict_proba(
            pre_model.predict(
                img_to_array(load_img('./test1/test/{}.jpg'.format(p), 
                                      target_size=(224,224,3)
                                     )).reshape(-1,224,224,3)/255.0
            ))[0][0])
    return result

In [350]:
%%time
df2 = pd.DataFrame(helper2(0,999))
print (df2.shape)

(1000, 1)
Wall time: 12.1 s


In [351]:
df2['file']=df2.index
df2['file'] = df2['file'].apply(lambda x: str(x)+'.jpg')


In [352]:
df2.columns=['label','file']
df2 = df2[['file','label']]

In [353]:
df2['label'] = (df2['label']<0.5).map({True:'cat', False:'dog'})

In [355]:
df2.to_csv('transfersmall2.csv', index=False)

In [354]:
df2

Unnamed: 0,file,label
0,0.jpg,cat
1,1.jpg,cat
2,2.jpg,cat
3,3.jpg,cat
4,4.jpg,dog
5,5.jpg,dog
6,6.jpg,dog
7,7.jpg,dog
8,8.jpg,cat
9,9.jpg,cat
