In [1]:
import tensorflow.keras as keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential

from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D, AveragePooling2D
from tensorflow.keras.optimizers import RMSprop, Adam
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

import os
from os import listdir
from os.path import isfile, join
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import pandas as pd

# Generate Train DFs

In [2]:
MY_PATH = 'ml100-03-final/image_data/'
TRAIN_PATH = 'train/'
TEST_PATH = 'test/'

In [3]:
daisy = os.listdir(MY_PATH+TRAIN_PATH+'/daisy')
dandelion = os.listdir(MY_PATH+TRAIN_PATH+'/dandelion')
rose = os.listdir(MY_PATH+TRAIN_PATH+'/rose')
sunflower = os.listdir(MY_PATH+TRAIN_PATH+'/sunflower')
tulip = os.listdir(MY_PATH+TRAIN_PATH+'/tulip')

In [4]:
# raw function def.
def raw(data,label):
    df=pd.DataFrame(data,columns=['dirname'])
    df['label']=[label]*len(data)
    return df

# dfs for 5 types of flowers
daisy_df=raw(daisy,[1,0,0,0,0])
dandelion_df=raw(dandelion,[0,1,0,0,0])
rose_df=raw(rose,[0,0,1,0,0])
sunflower_df=raw(sunflower,[0,0,0,1,0])
tulip_df=raw(tulip,[0,0,0,0,1])

daisy_df['folder']='daisy'
dandelion_df['folder']='dandelion'
rose_df['folder']='rose'
sunflower_df['folder']='sunflower'
tulip_df['folder']='tulip'

In [5]:
#每個資料集取 390 做train 剩下做各取98 做test

In [6]:
def reduce_sample(df):
    df=df[:488]
    return df

daisy_df=reduce_sample(daisy_df)
dandelion_df=reduce_sample(dandelion_df)
rose_df=reduce_sample(rose_df)
sunflower_df=reduce_sample(sunflower_df)
tulip_df=reduce_sample(tulip_df)

In [7]:
train_data = []
test_data = []

# funciotn that make train data
def load_data(IMAGE_SIZE=256):
    print("Loading images...")
    for data_df in [daisy_df,dandelion_df,rose_df,sunflower_df,tulip_df]:
        X_train, X_test, y_train, y_test = train_test_split(range(0,488),range(0,488),
                                                     test_size=0.2, random_state=1, shuffle = True)
        for i in X_train:
            #print("Loading {0}".format(dirname))
            dirnames=data_df.iloc[i]["dirname"]
            folder_path = data_df.iloc[i]['folder']+'/'
            label = data_df.iloc[i]['label']
            img = Image.open(MY_PATH + TRAIN_PATH + folder_path + dirnames)
            img = img.resize((IMAGE_SIZE, IMAGE_SIZE), Image.ANTIALIAS)
            train_data.append([np.array(img),label])
            
        for i in X_test:
            #print("Loading {0}".format(dirname))
            dirnames=data_df.iloc[i]["dirname"]
            folder_path = data_df.iloc[i]['folder']+'/'
            label = data_df.iloc[i]['label']
            img = Image.open(MY_PATH + TRAIN_PATH + folder_path + dirnames)
            img = img.resize((IMAGE_SIZE, IMAGE_SIZE), Image.ANTIALIAS)
            test_data.append([np.array(img),label])
    
    
    print("done")
    return train_data ,test_data

train_data, test_data = load_data()


Loading images...
done


In [8]:
len(train_data)

1950

In [9]:
len(test_data)

490

In [10]:
def create_dnn_model(IMAGE_SIZE=256):
    model = Sequential()
    #卷積網
    model.add(Conv2D(32, kernel_size = (3, 3), activation='relu', input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(BatchNormalization())
    model.add(Conv2D(64, kernel_size=(3,3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(BatchNormalization())
    model.add(Conv2D(128, kernel_size=(3,3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(BatchNormalization())
    model.add(Conv2D(64, kernel_size=(3,3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(BatchNormalization())
    model.add(Flatten())
    
    #神經網
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(5, activation = 'softmax'))
    print('creating model')
    return model


In [11]:
model = create_dnn_model()
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

creating model


In [12]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 254, 254, 32)      896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 127, 127, 32)      0         
_________________________________________________________________
batch_normalization (BatchNo (None, 127, 127, 32)      128       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 125, 125, 64)      18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 62, 62, 64)        0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 62, 62, 64)        256       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 60, 60, 128)       7

In [13]:
training_images = np.array([i[0] for i in train_data])
training_labels = np.array([i[1] for i in train_data])
testing_images = np.array([i[0] for i in test_data])
testing_labels = np.array([i[1] for i in test_data])

In [14]:
# 影像 generator
def generator(batch_size=64):
    while True:
        for indexs in range(0, len(training_images), batch_size):
            images = training_images[indexs: indexs+batch_size]
            labels = training_labels [indexs: indexs+batch_size]
            yield images, labels
            
img_gen = generator()

In [15]:
checkpoint_dir = "training_1"
# 若目錄不存在，則新建
if not os.path.exists(checkpoint_dir):
    os.makedirs(checkpoint_dir)

checkpoint_path = "training_1/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

cp_callback = keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                              save_weights_only=True,  # save_best_only=True
                                              verbose=1,
                                             monitor='val_loss')

In [16]:
model.fit_generator(img_gen, 
                    steps_per_epoch=(len(training_images) / 64),
                    epochs=150,
                    verbose=1,
                    validation_data=(testing_images, testing_labels),
                    callbacks=cp_callback)



Instructions for updating:
Please use Model.fit, which supports generators.
Epoch 1/150
Epoch 00001: saving model to training_1/cp.ckpt
Epoch 2/150
Epoch 00002: saving model to training_1/cp.ckpt
Epoch 3/150
Epoch 00003: saving model to training_1/cp.ckpt
Epoch 4/150
Epoch 00004: saving model to training_1/cp.ckpt
Epoch 5/150
Epoch 00005: saving model to training_1/cp.ckpt
Epoch 6/150
Epoch 00006: saving model to training_1/cp.ckpt
Epoch 7/150
Epoch 00007: saving model to training_1/cp.ckpt
Epoch 8/150
Epoch 00008: saving model to training_1/cp.ckpt
Epoch 9/150
Epoch 00009: saving model to training_1/cp.ckpt
Epoch 10/150
Epoch 00010: saving model to training_1/cp.ckpt
Epoch 11/150
Epoch 00011: saving model to training_1/cp.ckpt
Epoch 12/150
Epoch 00012: saving model to training_1/cp.ckpt
Epoch 13/150
Epoch 00013: saving model to training_1/cp.ckpt
Epoch 14/150
Epoch 00014: saving model to training_1/cp.ckpt
Epoch 15/150
Epoch 00015: saving model to training_1/cp.ckpt
Epoch 16/150
Epoch

Epoch 30/150
Epoch 00030: saving model to training_1/cp.ckpt
Epoch 31/150
Epoch 00031: saving model to training_1/cp.ckpt
Epoch 32/150
Epoch 00032: saving model to training_1/cp.ckpt
Epoch 33/150
Epoch 00033: saving model to training_1/cp.ckpt
Epoch 34/150
Epoch 00034: saving model to training_1/cp.ckpt
Epoch 35/150
Epoch 00035: saving model to training_1/cp.ckpt
Epoch 36/150
Epoch 00036: saving model to training_1/cp.ckpt
Epoch 37/150
Epoch 00037: saving model to training_1/cp.ckpt
Epoch 38/150
Epoch 00038: saving model to training_1/cp.ckpt
Epoch 39/150
Epoch 00039: saving model to training_1/cp.ckpt
Epoch 40/150
Epoch 00040: saving model to training_1/cp.ckpt
Epoch 41/150
Epoch 00041: saving model to training_1/cp.ckpt
Epoch 42/150
Epoch 00042: saving model to training_1/cp.ckpt
Epoch 43/150
Epoch 00043: saving model to training_1/cp.ckpt
Epoch 44/150
Epoch 00044: saving model to training_1/cp.ckpt
Epoch 45/150
Epoch 00045: saving model to training_1/cp.ckpt
Epoch 46/150
Epoch 00046

Epoch 60/150
Epoch 00060: saving model to training_1/cp.ckpt
Epoch 61/150
Epoch 00061: saving model to training_1/cp.ckpt
Epoch 62/150
Epoch 00062: saving model to training_1/cp.ckpt
Epoch 63/150
Epoch 00063: saving model to training_1/cp.ckpt
Epoch 64/150
Epoch 00064: saving model to training_1/cp.ckpt
Epoch 65/150
Epoch 00065: saving model to training_1/cp.ckpt
Epoch 66/150
Epoch 00066: saving model to training_1/cp.ckpt
Epoch 67/150
Epoch 00067: saving model to training_1/cp.ckpt
Epoch 68/150
Epoch 00068: saving model to training_1/cp.ckpt
Epoch 69/150
Epoch 00069: saving model to training_1/cp.ckpt
Epoch 70/150
Epoch 00070: saving model to training_1/cp.ckpt
Epoch 71/150
Epoch 00071: saving model to training_1/cp.ckpt
Epoch 72/150
Epoch 00072: saving model to training_1/cp.ckpt
Epoch 73/150
Epoch 00073: saving model to training_1/cp.ckpt
Epoch 74/150
Epoch 00074: saving model to training_1/cp.ckpt
Epoch 75/150
Epoch 00075: saving model to training_1/cp.ckpt
Epoch 76/150
Epoch 00076

Epoch 90/150
Epoch 00090: saving model to training_1/cp.ckpt
Epoch 91/150
Epoch 00091: saving model to training_1/cp.ckpt
Epoch 92/150
Epoch 00092: saving model to training_1/cp.ckpt
Epoch 93/150
Epoch 00093: saving model to training_1/cp.ckpt
Epoch 94/150
Epoch 00094: saving model to training_1/cp.ckpt
Epoch 95/150
Epoch 00095: saving model to training_1/cp.ckpt
Epoch 96/150
Epoch 00096: saving model to training_1/cp.ckpt
Epoch 97/150
Epoch 00097: saving model to training_1/cp.ckpt
Epoch 98/150
Epoch 00098: saving model to training_1/cp.ckpt
Epoch 99/150
Epoch 00099: saving model to training_1/cp.ckpt
Epoch 100/150
Epoch 00100: saving model to training_1/cp.ckpt
Epoch 101/150
Epoch 00101: saving model to training_1/cp.ckpt
Epoch 102/150
Epoch 00102: saving model to training_1/cp.ckpt
Epoch 103/150
Epoch 00103: saving model to training_1/cp.ckpt
Epoch 104/150
Epoch 00104: saving model to training_1/cp.ckpt
Epoch 105/150
Epoch 00105: saving model to training_1/cp.ckpt
Epoch 106/150
Epoc

Epoch 120/150
Epoch 00120: saving model to training_1/cp.ckpt
Epoch 121/150
Epoch 00121: saving model to training_1/cp.ckpt
Epoch 122/150
Epoch 00122: saving model to training_1/cp.ckpt
Epoch 123/150
Epoch 00123: saving model to training_1/cp.ckpt
Epoch 124/150
Epoch 00124: saving model to training_1/cp.ckpt
Epoch 125/150
Epoch 00125: saving model to training_1/cp.ckpt
Epoch 126/150
Epoch 00126: saving model to training_1/cp.ckpt
Epoch 127/150
Epoch 00127: saving model to training_1/cp.ckpt
Epoch 128/150
Epoch 00128: saving model to training_1/cp.ckpt
Epoch 129/150
Epoch 00129: saving model to training_1/cp.ckpt
Epoch 130/150
Epoch 00130: saving model to training_1/cp.ckpt
Epoch 131/150
Epoch 00131: saving model to training_1/cp.ckpt
Epoch 132/150
Epoch 00132: saving model to training_1/cp.ckpt
Epoch 133/150
Epoch 00133: saving model to training_1/cp.ckpt
Epoch 134/150
Epoch 00134: saving model to training_1/cp.ckpt
Epoch 135/150
Epoch 00135: saving model to training_1/cp.ckpt
Epoch 13

Epoch 150/150
Epoch 00150: saving model to training_1/cp.ckpt


<tensorflow.python.keras.callbacks.History at 0x7f9190cc7710>

In [17]:
test = os.listdir(MY_PATH+TEST_PATH)

In [18]:
test_df=raw(test,None)

In [19]:
test_df

Unnamed: 0,dirname,label
0,e86789078f3731bdc3d1e740825f29b3.jpg,
1,6b0e58f46b51157485d532c8eeec6179.jpg,
2,fd9cac41cda00325613b362025eb9cc1.jpg,
3,10ede359e864dab7c381d9f8bb35de15.jpg,
4,e73078a312effc5db81b849b083d1365.jpg,
...,...,...
1995,5ba0a1dd4895ff62cd322dccbeab4d33.jpg,
1996,39706bf6a1eb9fc681952d73cfdaa978.jpg,
1997,09700570b778d58ab42a1aa8b62f4f83.jpg,
1998,4cf03c70d04f9bcbf9918b896d21514f.jpg,


In [20]:
# test datat function
def test_load_data(IMAGE_SIZE=256):
    print("Loading images...")
    test_data = []
    for i in range(len(test_df)):
        #print("Loading {0}".format(dirname))
        dirnames=test_df.iloc[i]["dirname"]
        img = Image.open(MY_PATH+TEST_PATH+dirnames)
        img = img.resize((IMAGE_SIZE, IMAGE_SIZE), Image.ANTIALIAS)
        #test_data.append([np.array(img),label])
        test_data.append(np.array(img))
    print("done")
    return test_data

test_data = test_load_data()

Loading images...
done


In [21]:
#load best weights
model.load_weights(checkpoint_path)

# pred output
test_images = np.array(test_data)
pred = model.predict(test_images)
pred_digits = np.argmax(pred,axis=1)
pred_digits

array([0, 3, 4, ..., 4, 1, 1])

In [22]:
test_df['label'] = pred_digits
test_df.rename(columns = {'dirname':'id','label':'flower_class'},inplace=True)

In [23]:
test_df['id'] = test_df['id'].apply(lambda x: x.replace('.jpg',''))
test_df.head()

Unnamed: 0,id,flower_class
0,e86789078f3731bdc3d1e740825f29b3,0
1,6b0e58f46b51157485d532c8eeec6179,3
2,fd9cac41cda00325613b362025eb9cc1,4
3,10ede359e864dab7c381d9f8bb35de15,1
4,e73078a312effc5db81b849b083d1365,4


In [24]:
test_df.to_csv('final.csv',index=False)

In [25]:
_, train_acc = model.evaluate(training_images, training_labels, verbose=0)
_, test_acc = model.evaluate(testing_images, testing_labels, verbose=0)
print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))

Train: 0.954, Test: 0.406


# Reference

In [None]:
# mlp overfit on the moons dataset with patient early stopping and model checkpointing
from sklearn.datasets import make_moons
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from matplotlib import pyplot
from keras.models import load_model
# generate 2d classification dataset
X, y = make_moons(n_samples=100, noise=0.2, random_state=1)
# split into train and test
n_train = 30
trainX, testX = X[:n_train, :], X[n_train:, :]
trainy, testy = y[:n_train], y[n_train:]
# define model
model = Sequential()
model.add(Dense(500, input_dim=2, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# simple early stopping
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=200)
mc = ModelCheckpoint('best_model.h5', monitor='val_accuracy', mode='max', verbose=1, save_best_only=True)
# fit model
history = model.fit(trainX, trainy, validation_data=(testX, testy), epochs=4000, verbose=0, callbacks=[es, mc])
# load the saved model
saved_model = load_model('best_model.h5')
# evaluate the model
_, train_acc = saved_model.evaluate(trainX, trainy, verbose=0)
_, test_acc = saved_model.evaluate(testX, testy, verbose=0)
print('Train: %.3f, Test: %.3f' % (train_acc, test_acc))a