In [3]:
import tensorflow as tf
import keras
import cv2
import os
import pandas as pd

# image
from skimage.transform import resize

# Utilities
from keras.preprocessing.image import load_img, img_to_array, save_img
from tensorflow.keras.utils import to_categorical

# Pre-trained model
from tensorflow.keras.applications import resnet50, densenet

# Architectures
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Flatten, Dense, Dropout, BatchNormalization

# Optimizers, metrics, initializers
from tensorflow.keras.optimizers import SGD, Adagrad, Adadelta, RMSprop, Adam

In [4]:
from tensorflow.keras.applications import * #Efficient Net included here
from tensorflow.keras import models
from tensorflow.keras import layers
from keras.preprocessing.image import ImageDataGenerator
import os
import shutil
import pandas as pd
from sklearn import model_selection
from tqdm import tqdm
from tensorflow.keras import optimizers
import tensorflow as tf

In [5]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.precision', 4)

#### Get Dataframe that contain image-name, class-name, and class-id

In [7]:
datafolder = '../data/manual-clusters/20220430/categorized/'

newlabels = []

for label in os.listdir(datafolder):
    for image in os.listdir(datafolder + label):
        newlabels.append({
            'filename': datafolder + label + "/" + image,
            'classname': label,
            
        })
        
newlabels = pd.DataFrame(newlabels)
newlabels['class_id'] = newlabels['classname']
newlabels.replace({"class_id":{"01":0, "02-round-end":1,"03-mib":2,"04":3,"05-hole-flat":4,
                               "06":5,"07-honeycomb":6,"08":7,"09":8,"10-honeycombhollow":9,
                               "11-longthin":10}}, inplace=True)

#### Prepare the dataset. Get data into folders

In [8]:
TRAIN_IMAGES_PATH = '../efNet/images/train'
VAL_IMAGES_PATH = '../efNet/images/val'
os.makedirs(TRAIN_IMAGES_PATH, exist_ok = True)
os.makedirs(VAL_IMAGES_PATH, exist_ok = True)

In [9]:
classes = set(newlabels['class_id'])
# Create directories for each class.
for class_id in [x for x in range(len(classes))]:
    os.makedirs(os.path.join(TRAIN_IMAGES_PATH, str(class_id)), exist_ok = True)
    os.makedirs(os.path.join(VAL_IMAGES_PATH, str(class_id)), exist_ok = True)

In [10]:
def preproccess_data(df, images_path):
    for column, row in df.iterrows():
        class_id = row['class_id']
        shutil.copy(row['filename'], os.path.join(images_path, str(class_id)))
        
#Split the dataset into 80% training and 20% validation
df_train, df_valid = model_selection.train_test_split(newlabels, test_size=0.2, random_state=42, shuffle=True)
#run the  function on each of them
preproccess_data(df_train, TRAIN_IMAGES_PATH)
preproccess_data(df_valid, VAL_IMAGES_PATH)

#### augmentations

In [6]:
input_shape = (224,224,3)
conv_base = EfficientNetB0(weights="imagenet", include_top=False, input_shape=input_shape)

In [11]:
NUMBER_OF_CLASSES = 11

model = models.Sequential()
model.add(conv_base)
model.add(layers.GlobalMaxPooling2D(name="gap"))
#avoid overfitting
model.add(layers.Dropout(rate=0.2, name="dropout_out"))
# Set NUMBER_OF_CLASSES to the number of your final predictions.
model.add(layers.Dense(NUMBER_OF_CLASSES, activation="softmax", name="fc_out"))
#conv_base.trainable = False

In [12]:
height,width=224,224
batch_size = 16

train_datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    rotation_range=45,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.2,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode="nearest",
)

test_datagen = ImageDataGenerator(rescale=1.0 / 255)
train_generator = train_datagen.flow_from_directory(
    # This is the target directory
    TRAIN_IMAGES_PATH,
    # All images will be resized to target height and width.
    target_size=(height, width),
    batch_size=batch_size,
    # Since we use categorical_crossentropy loss, we need categorical labels
    class_mode="categorical",
)

validation_generator = test_datagen.flow_from_directory(
    VAL_IMAGES_PATH,
    target_size=(height, width),
    batch_size=batch_size,
    class_mode="categorical",
)

model.compile(
    loss="categorical_crossentropy",
    optimizer=optimizers.RMSprop(lr=2e-5),
    metrics=[tf.keras.metrics.TopKCategoricalAccuracy(k=2), "acc"], #'acc'
)

Found 746 images belonging to 11 classes.
Found 187 images belonging to 11 classes.


  super(RMSprop, self).__init__(name, **kwargs)


In [13]:
NUMBER_OF_TRAINING_IMAGES = 746
NUMBER_OF_VALIDATION_IMAGES = 187
epochs = 10

history = model.fit(
    train_generator,
    steps_per_epoch=NUMBER_OF_TRAINING_IMAGES // batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=NUMBER_OF_VALIDATION_IMAGES // batch_size,
    verbose=1,
    workers=4,
    use_multiprocessing=False
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [14]:
score = model.evaluate(validation_generator, verbose=1)
print("%s%s: %.2f%%" % ("evaluate ",model.metrics_names[1], score[1]*100))

evaluate top_k_categorical_accuracy: 54.55%
