In [7]:
import numpy as np
import pandas as pd
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Flatten, Dense, Dropout
from keras.optimizers import RMSprop
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from pathlib import Path
from keras.layers import Conv2D, MaxPooling2D
import json
from keras.utils import to_categorical

In [2]:
train_datagen = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    rescale=1/255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2
)
test_datagen = ImageDataGenerator(
    rescale=1/255,
)

# I found that a batch size of 128 offers the best trade-off between
# model training time and batch volatility.
batch_size = 128

In [3]:
train_generator = train_datagen.flow_from_directory(
    'D:/Downloads/Copy of shopee-product-detection-dataset/train_1st/train_1st/',
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training',
)
validation_generator = train_datagen.flow_from_directory(
    'D:/Downloads/Copy of shopee-product-detection-dataset/train_1st/train_1st/',
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'
)

Found 19368 images belonging to 10 classes.
Found 4836 images belonging to 10 classes.


In [4]:
from keras.applications import VGG16

# include top should be False to remove the softmax layer


pretrained_model = VGG16(include_top=False, weights='imagenet',input_shape=(224, 224, 3))

In [5]:
vgg_features_train = pretrained_model.predict(train_generator)
vgg_features_val = pretrained_model.predict(validation_generator)

In [8]:
train_target = to_categorical(train_generator.labels)
val_target = to_categorical(validation_generator.labels)

In [9]:
model = Sequential()
model.add(pretrained_model)
model.add(Flatten())
model.add(Dense(256, activation='relu', name='Dense_Intermediate'))
model.add(Dropout(0.1, name='Dropout_Regularization'))
model.add(Dense(10, activation='softmax', name='Output'))




# compile the model
model.compile(optimizer='adam', metrics=['accuracy'], loss='categorical_crossentropy')



In [10]:
import os
labels_count = dict()
for img_class in [ic for ic in os.listdir('D:/Downloads/Copy of shopee-product-detection-dataset/train_1st/train_1st/') if ic[0] != '.']:
    labels_count[img_class] = len(os.listdir('D:/Downloads/Copy of shopee-product-detection-dataset/train_1st/train_1st/' + img_class))
total_count = sum(labels_count.values())
class_weights = {cls: total_count / count for cls, count in 
                 enumerate(labels_count.values())}

In [None]:
model.fit_generator(
    train_generator,
    steps_per_epoch=len(train_generator.filenames) // batch_size,
    epochs=20,
    validation_data=validation_generator,
    validation_steps=len(train_generator.filenames) // batch_size,
    class_weight=class_weights,
    callbacks=[
        EarlyStopping(patience=3, restore_best_weights=True),
        ReduceLROnPlateau(patience=2)
    ]
)

Epoch 1/20
Epoch 2/20
Epoch 3/20