In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import keras as k
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, AvgPool2D, BatchNormalization, Reshape
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler
from keras.utils.vis_utils import plot_model
import matplotlib.pyplot as plt
import skimage.morphology as morp
from skimage.filters import rank

%matplotlib inline

assert pd.__version__ == '0.23.4'
assert np.__version__ == '1.15.4'
assert k.__version__ == '2.1.3'

In [None]:
# Load all images. 

import os
import skimage.data
def load_data(data_dir):
    images = []
    
    file_names = sorted([os.path.join(data_dir, f)
                  for f in os.listdir(data_dir) if f.endswith(".jpg")])
    
    for f in file_names:
        images.append(skimage.data.imread(f, as_gray=True))
    
    return images

images = load_data('../data/train/cropped')
print(len(images))

In [None]:
#explore shapes
for image in images[:3]:
    print('image.shape[before]', image.shape)

In [None]:
#transform all images to 250x250
import skimage.transform
images250 = [skimage.transform.resize(image, (250, 250)) for image in images]

In [None]:
#confirm transformation
for image in images250[:3]:
    print('image.shape[after]', image.shape)
    

In [None]:
from PIL import Image

for image in images250[:3]:
    image = (image * 255).astype(np.uint8)
    im = Image.fromarray(image)
    plt.figure()
    plt.imshow(im)
    plt.show()



In [None]:
#Load Labels 
data = pd.read_csv("../data/train.csv")
data = data.sort_values("Img_Name").reset_index(drop=True)
labels = data["Label"]

labels.head()

In [None]:
#categorize labels..

import numpy as np
from keras.utils.np_utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# y_train_labels = np.array(labels)
# X_train_data = np.array(images250)
y = np.array(labels)
X = np.array(images250)
X = X[:, :, :, np.newaxis] # (250,250) --> (467,250,250,1)

num_categories = 6

#encode labels
label_encoder = LabelEncoder()
y_labels_encoded = label_encoder.fit_transform(y)
y_labels_categorized = to_categorical(y_labels_encoded, num_categories)

print('len(X)', len(X))
print('len(y)', len(y))
print('len(y_labels_categorized)', len(y_labels_categorized))

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_labels_categorized, test_size=0.2, random_state=42)

In [None]:
input_shape = (250, 250, 1)

model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape, data_format='channels_last'))
# model.add(Conv2D(32, (3, 3), activation='relu'))
# model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))

# model.add(Conv2D(64, (3, 3), activation='relu'))
# model.add(MaxPool2D(pool_size=(2, 2)))
# model.add(Dropout(0.25))
model.add(Flatten())

model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_categories, activation='softmax'))

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=['accuracy'])

model.summary()

In [None]:
# MODEL #1 -- Basic Non-convolutional

model = Sequential()
model.add(Flatten(input_shape=(250,250,1)))
model.add(Dense(units=128,activation="relu"))#input_shape=(62500,)))
model.add(Dense(units=128,activation="relu"))
model.add(Dense(units=6,activation="softmax"))
model.compile(optimizer="adam",loss="categorical_crossentropy",metrics=["accuracy"])

In [None]:
model.fit(X_train, y_train, epochs=10, batch_size=32)

In [None]:
accuracy = model.evaluate(x=X_test,y=y_test,batch_size=32)
accuracy

In [None]:
# MODEL #2 -- Convolutional test 

model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=(250,250,1), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))

model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(6, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

model.summary()

In [None]:
model.fit(X_train, y_train, epochs=10, batch_size=32)

In [None]:
accuracy = model.evaluate(x=X_test,y=y_test,batch_size=32)
accuracy