In [None]:
import numpy as np 
import pandas as pd 
import os
import matplotlib.pyplot as plt
import matplotlib.image as mplimg

In [None]:
datadir = "data/"

In [None]:
os.listdir(datadir)

In [None]:
train_df = pd.read_csv(datadir + "train.csv")
train_df.head()

In [None]:
rows, cols = train_df.shape
print("Entries: {}, columns: {}".format(rows,cols))

In [None]:
num_classes = np.count_nonzero(np.unique(train_df.values[:,1]))
num_classes

In [None]:
sortd = train_df.groupby("Id").size().sort_values()
sortd.tail()

In [None]:
import seaborn as sns

sns.barplot(x=sortd.tail().keys(), y=sortd.tail().values)

In [None]:
#Class weighting
cw = np.median(sortd.values)/sortd.values
cw

In [None]:
sns.barplot(x=sortd.tail().keys(), y=cw[-5:]*sortd.tail().values)

In [None]:
from keras.applications.xception import Xception

premodel = Xception(include_top=False, weights='imagenet', input_shape=(299, 299, 3), classes=num_classes)
premodel.summary()

In [None]:
# for layer in premodel.layers[:5]:
#     layer.trainable = False

In [None]:
from keras.layers.pooling import GlobalAveragePooling2D
from keras.layers import Dense

#Adding custom Layers 
# x = model.output
# x = Flatten()(x)
# x = Dense(1024, activation="relu")(x)
# x = Dropout(0.5)(x)
# x = Dense(1024, activation="relu")(x)
# predictions = Dense(16, activation="softmax")(x)

x = premodel.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(num_classes, activation="softmax")(x)

In [None]:
from keras.models import Model

model = Model(input = premodel.input, output = predictions)

del(premodel)

model.summary()

In [None]:
from keras import optimizers
from metrics import dice_loss, dice

learning_rate = 1e-3
optimizer = optimizers.Adam(lr = learning_rate)
metrics = ['accuracy']
loss = 'categorical_crossentropy'
# metrics = [dice]
# loss = [dice_loss]

model.compile(loss = loss, optimizer = optimizer, metrics=metrics)

In [None]:
from keras import callbacks

loggername = "a"

model_checkpoint = callbacks.ModelCheckpoint('weights/{}.hdf5'.format(loggername), monitor='loss', verbose=1, save_best_only=True, save_weights_only=True)
reduce_lr = callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5, patience = 3, verbose = 1, min_lr=1e-7)
csv_logger = callbacks.CSVLogger('logs/{}.log'.format(loggername))
early_stopper = callbacks.EarlyStopping(monitor='loss', min_delta = 0.01, patience = 5, verbose = 1)

callbacks = [model_checkpoint, reduce_lr, csv_logger, early_stopper]

In [None]:
from keras.applications.xception import preprocess_input
from keras.preprocessing import image

def get_np_image(df, target_size=(299,299,3)):
    
    img = image.load_img("data/"+"train/"+df, target_size=target_size)
    x = image.img_to_array(img)
    x = preprocess_input(x)
    
    return(x)

In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

def encode(y):
    values = np.array(y)
    label_encoder = LabelEncoder()
    integer_encoded = label_encoder.fit_transform(values)

    onehot_encoder = OneHotEncoder(sparse=False)
    integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
    onehot_encoded = onehot_encoder.fit_transform(integer_encoded)

    return onehot_encoded.astype(np.int32), label_encoder

In [None]:
y_encoded, label_encoder = encode(train_df["Id"])
y_encoded.shape

In [None]:
x = get_np_image(df = train_df["Image"][0], target_size=(299,299,3))
x.shape

In [None]:
y = y_encoded[0]
y.shape

In [None]:
# def generate_data(directory, batch_size):
#     """Replaces Keras' native ImageDataGenerator."""
#     i = 0
#     file_list = os.listdir(directory)
#     while True:
#         image_batch = []
#         for b in range(batch_size):
#             if i == len(file_list):
#                 i = 0
#                 random.shuffle(file_list)
#             sample = file_list[i]
#             i += 1
#             image = cv2.resize(cv2.imread(sample[0]), INPUT_SHAPE)
#             image_batch.append((image.astype(float) - 128) / 128)

#         yield np.array(image_batch)

def custom_generator(df, y_encoded, target_size = (299,299,3), batch_size = 1, validation = False):
    
    i = 0
    
    while True:
        
        x_batch = []
        y_batch = []
                
        for b in range(batch_size):
            if i == len(df):
                i = 0
                
            x = get_np_image(df = df["Image"][i], target_size = target_size)
            y = y_encoded[i]
            
            i += 1
            
            x_batch.append(x)
            y_batch.append(y)
            
        yield (np.array(x_batch),np.array(y_batch))

In [None]:
batch_size = 4
steps_per_epoch = len(train_df)//batch_size
# validation_steps = len(train_df)//batch_size
epochs = 100
verbose = 2

In [None]:
train_gen = custom_generator(df=train_df, y_encoded=y_encoded, batch_size=batch_size)

In [None]:
history = model.fit_generator(
    train_gen,
    steps_per_epoch = steps_per_epoch,
    class_weight = cw,
    epochs = epochs,
    verbose = verbose,
    callbacks = callbacks
)