In [1]:
import numpy as np
import pandas as pd
import os

PATH = "/kaggle/input/applications-of-deep-learning-wustl-fall-2020/final-kaggle-data/"
PATH_TRAIN = os.path.join(PATH, "train.csv")
PATH_TEST = os.path.join(PATH, "test.csv")

In [2]:
df_train = pd.read_csv(PATH_TRAIN)
df_test = pd.read_csv(PATH_TEST)

df_train = df_train[df_train.id != 1300]

df_train['filename'] = df_train["id"].astype(str)+".png"
df_train['stable'] = df_train['stable'].astype(str)

df_test['filename'] = df_test["id"].astype(str)+".png"

In [3]:
TEST_PCT = 0.2
TEST_CUT = int(len(df_train) * TEST_PCT)

df_train_cut = df_train[TEST_CUT:]
df_validate_cut = df_train[0:TEST_CUT]

print(f"Training size: {len(df_train_cut)}")
print(f"Validate size: {len(df_validate_cut)}")

Training size: 32785
Validate size: 8196


In [4]:
import tensorflow as tf
import keras_preprocessing
from keras_preprocessing import image
from keras_preprocessing.image import ImageDataGenerator

WIDTH = 640
HEIGHT = 400
training_datagen = ImageDataGenerator(
  rescale = 1./255,
  horizontal_flip=True,
#   featurewise_center = True,
#   zca_epsilon = 0.001,
#   zca_whitening = True,
#   zoom_range = [0.7,0.8],
  brightness_range = [1.2,1.6],
  #Original set it to True, I will set False
  vertical_flip = False,
  fill_mode='nearest')

train_generator = training_datagen.flow_from_dataframe(
        dataframe=df_train_cut,
        directory=PATH,
        x_col="filename",
        y_col="stable",
        target_size=(HEIGHT, WIDTH),
        batch_size=8,
        class_mode='binary')

validation_datagen = ImageDataGenerator(rescale = 1./255,
#                                           featurewise_center = True,
#                                           zca_epsilon = 0.001,
#                                           zca_whitening = True,
#                                           zoom_range = [0.7,0.8],
                                          brightness_range = [1.2,1.5])

val_generator = validation_datagen.flow_from_dataframe(
        dataframe=df_validate_cut,
        directory=PATH,
        x_col="filename",
        y_col="stable",
        target_size=(HEIGHT, WIDTH),
        class_mode='binary')

Found 32785 validated image filenames belonging to 2 classes.
Found 8196 validated image filenames belonging to 2 classes.


In [5]:
## Learning Rate Schedule
def lr_schedule(epoch):
    lr = 0.0001
    if epoch > 20:
        lr = 0.000001
    elif epoch > 15:
        lr = 0.00001
    elif epoch > 10:
        lr = 0.00006
    elif epoch > 5:
        lr = 0.0001
    print('Learning rate: ', lr)
    return lr
lr_callback = tf.keras.callbacks.LearningRateScheduler(lr_schedule, verbose=True)

lr_callback = tf.keras.callbacks.LearningRateScheduler(lr_schedule, verbose=True)

# #Learning Rate Annealer
# from keras.callbacks import ReduceLROnPlateau
# lrr= ReduceLROnPlateau(monitor='val_loss',   factor=.01,   patience=1,  min_lr=1e-6)

In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Conv2D, Dense, Dropout, Flatten,BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.optimizers import RMSprop
from keras.applications import *
from keras.models import Model
import keras

# base_model = tf.keras.applications.Xception(input_shape=(HEIGHT,WIDTH, 3), include_top=False)
# base_model.trainable = True

# base_model = ResNet50(include_top=False, input_shape=(HEIGHT,WIDTH, 3))
# base_model.trainable = True


monitor = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=8, verbose=1, mode='auto',
restore_best_weights=True)
model = Sequential()
base_model = ResNet101(include_top=False, input_shape=(HEIGHT,WIDTH, 3))
base_model.trainable = True
model.add(base_model)
model.add(GlobalAveragePooling2D())
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))
callback_list = [monitor, lr_callback]
model.compile(loss = 'binary_crossentropy', optimizer='adam')

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet101_weights_tf_dim_ordering_tf_kernels_notop.h5


In [7]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet101 (Functional)       (None, 13, 20, 2048)      42658176  
_________________________________________________________________
global_average_pooling2d (Gl (None, 2048)              0         
_________________________________________________________________
flatten (Flatten)            (None, 2048)              0         
_________________________________________________________________
dense (Dense)                (None, 1)                 2049      
Total params: 42,660,225
Trainable params: 42,554,881
Non-trainable params: 105,344
_________________________________________________________________


In [11]:
history = model.fit(train_generator,  
  verbose = 1, 
  validation_data=val_generator, 
  steps_per_epoch=500, 
  validation_steps=200,
  callbacks=callback_list, 
  epochs=25)

Learning rate:  0.0001

Epoch 00001: LearningRateScheduler reducing learning rate to 0.0001.
Epoch 1/25
Learning rate:  0.0001

Epoch 00002: LearningRateScheduler reducing learning rate to 0.0001.
Epoch 2/25
Learning rate:  0.0001

Epoch 00003: LearningRateScheduler reducing learning rate to 0.0001.
Epoch 3/25
Learning rate:  0.0001

Epoch 00004: LearningRateScheduler reducing learning rate to 0.0001.
Epoch 4/25
Learning rate:  0.0001

Epoch 00005: LearningRateScheduler reducing learning rate to 0.0001.
Epoch 5/25
Learning rate:  0.0001

Epoch 00006: LearningRateScheduler reducing learning rate to 0.0001.
Epoch 6/25
Learning rate:  0.0001

Epoch 00007: LearningRateScheduler reducing learning rate to 0.0001.
Epoch 7/25
Learning rate:  0.0001

Epoch 00008: LearningRateScheduler reducing learning rate to 0.0001.
Epoch 8/25
Learning rate:  0.0001

Epoch 00009: LearningRateScheduler reducing learning rate to 0.0001.
Epoch 9/25
Learning rate:  0.0001

Epoch 00010: LearningRateScheduler reduc

In [12]:
model.save("./EfficientNet_Nov8.h5")

In [13]:
from IPython.display import FileLink
FileLink(r'EfficientNet_Nov8.h5')

# Build Submission

Now that the neural network is trained; we need to generate a submit CSV file to send to Kaggle.  We will use nearly the same technique to build the submit file.  However, these essential points that we must address:

* We do not want the data generator to create an infinite date like we did when training.  We have a fixed number of cases to score for the Kaggle submit; we only want to process them.
* We do not want the data generator to randomize the samples' order like it did when training. Therefore we set shuffle to false.
* We want to always start at the beginning of the data, so we reset the generator.

These ensure that the predictions align with the id's.

In [14]:
submit_datagen = ImageDataGenerator(rescale = 1./255)

submit_generator = submit_datagen.flow_from_dataframe(
        dataframe=df_test,
        directory=PATH,
        x_col="filename",
        batch_size = 1,
        shuffle = False,
        target_size=(HEIGHT, WIDTH),
        class_mode=None)

submit_generator.reset()
pred = model.predict(submit_generator,steps=len(df_test))

Found 10294 validated image filenames.


In [15]:
df_submit = pd.DataFrame({"id":df_test['id'],'stable':pred.flatten()})
df_submit.to_csv("./submit.csv",index = False)

In [16]:
from IPython.display import FileLink
FileLink(r'submit.csv')

### Further Fine Tune with Brighter Images

In [17]:
training_datagen = ImageDataGenerator(
  rescale = 1./255,
  horizontal_flip=True,
#   featurewise_center = True,
#   zca_epsilon = 0.001,
#   zca_whitening = True,
#   zoom_range = [0.7,0.8],
  brightness_range = [0.9,1.6],
  #Original set it to True, I will set False
  vertical_flip = False,
  fill_mode='nearest')

train_generator = training_datagen.flow_from_dataframe(
        dataframe=df_train_cut,
        directory=PATH,
        x_col="filename",
        y_col="stable",
        target_size=(HEIGHT, WIDTH),
        batch_size=16,
        class_mode='binary')

validation_datagen = ImageDataGenerator(rescale = 1./255,
#                                           featurewise_center = True,
#                                           zca_epsilon = 0.001,
#                                           zca_whitening = True,
#                                           zoom_range = [0.7,0.8],
                                          brightness_range = [1.2,1.5])

val_generator = validation_datagen.flow_from_dataframe(
        dataframe=df_validate_cut,
        directory=PATH,
        x_col="filename",
        y_col="stable",
        target_size=(HEIGHT, WIDTH),
        class_mode='binary')

from keras.models import load_model
model_tune_2 = load_model("Xception_Tune_Oct_7.h5")

def lr_schedule2(epoch):
    lr = 0.00005
    if epoch > 5:
        lr = 0.00001
    return lr

lr_callback2 = tf.keras.callbacks.LearningRateScheduler(lr_schedule2, verbose=True)
monitor2 = EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=10, verbose=1, mode='auto',
        restore_best_weights=True)
callback_list2 = [monitor2, lr_callback2]


history = model_tune.fit(train_generator,  
  verbose = 1, 
  validation_data=val_generator, 
  steps_per_epoch=500, 
  validation_steps=100,
  epochs=10,callbacks=callback_list2)

Found 32785 validated image filenames belonging to 2 classes.
Found 8196 validated image filenames belonging to 2 classes.


OSError: SavedModel file does not exist at: Xception_Tune_Oct_7.h5/{saved_model.pbtxt|saved_model.pb}

In [None]:
model_tune.save("./ResNet101_Nov8.h5")
from IPython.display import FileLink
FileLink(r'ResNet101_Nov8.h5')