In [2]:
!unzip "/content/drive/My Drive/shopee-product-detection-dataset.zip" -d /content

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/shopee-product-detection-dataset/train/train/22/34efaaa7ea5f479ed6f2316d55eba39e.jpg  
  inflating: /content/shopee-product-detection-dataset/train/train/22/645f91cdb35b90cccfa18331bfd4da8e.jpg  
  inflating: /content/shopee-product-detection-dataset/train/train/22/2336e72422c9c6aa3040bdc13933cef0.jpg  
  inflating: /content/shopee-product-detection-dataset/train/train/22/99ce5f9e867fdcd6a3e8ea3ee3cfebb9.jpg  
  inflating: /content/shopee-product-detection-dataset/train/train/22/178b23fc1bbef25bd79d78e465ca82c1.jpg  
  inflating: /content/shopee-product-detection-dataset/train/train/22/b59c957cef2b75013d5954e1fe24c157.jpg  
  inflating: /content/shopee-product-detection-dataset/train/train/22/0f8d30760f99be74201599d4ef58791d.jpg  
  inflating: /content/shopee-product-detection-dataset/train/train/22/3a2bc9fb1ea91b3124834b4fd2bc907a.jpg  
  inflating: /content/shopee-product-detection-dataset/train/tr

In [None]:
%tensorflow_version 2.x

import tensorflow as tf

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

In [4]:
!pip install -U efficientnet

Collecting efficientnet
  Downloading https://files.pythonhosted.org/packages/28/91/67848a143b54c331605bfba5fd31cf4e9db13d2e429d103fe807acc3bcf4/efficientnet-1.1.0-py3-none-any.whl
Installing collected packages: efficientnet
Successfully installed efficientnet-1.1.0


In [11]:
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.layers import InputLayer, Input
from tensorflow.keras.layers import Conv2D, Dense, Flatten, Dropout, Activation
from tensorflow.keras.layers import BatchNormalization, Reshape, MaxPooling2D, GlobalAveragePooling2D, GlobalMaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications import DenseNet201, ResNet152V2
import efficientnet.tfkeras as enet
from shutil import copyfile, move
import os
import pandas as pd
import numpy as np

In [6]:
train_idx = pd.read_csv("/content/shopee-product-detection-dataset/train.csv", dtype={"filename": str, "category": str})
test_idx = pd.read_csv("/content/shopee-product-detection-dataset/test.csv", dtype={"filename": str, "category": str})

In [7]:
class_dir = list(train_idx["category"].unique())

In [8]:
src = "/content/shopee-product-detection-dataset/train/train/"
dst = "/content/shopee-product-detection-dataset/sorted_training/"

os.mkdir(dst)
for cl in class_dir:
    os.mkdir(dst+cl)

for idx, row in train_idx.iterrows():
    copyfile(src+row["category"]+"/"+row["filename"], dst+row["category"]+"/"+row["filename"])

In [9]:
src = "shopee-product-detection-dataset/sorted_training/"
dst = "shopee-product-detection-dataset/sorted_validation/"

os.mkdir(dst)
for cl in class_dir:
    os.mkdir(dst+cl)

validation_df = train_idx.sample(n=int(len(train_idx)/10), random_state=2020)

for idx, row in validation_df.iterrows():
    move(src+row["category"]+"/"+row["filename"], dst+row["category"]+"/"+row["filename"])

In [15]:
batch_size = 32
height = 300
width = 300

train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    horizontal_flip=True,
    # rotation_range=40,
    # width_shift_range=0.2,
    # height_shift_range=0.2,
    # shear_range=0.2,
    # zoom_range=0.2,
    # fill_mode="nearest"
)

train_data_dir = "/content/shopee-product-detection-dataset/sorted_training"
val_data_dir = "/content/shopee-product-detection-dataset/sorted_validation"

train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    shuffle=True,
    target_size=(height, width),
    batch_size=batch_size,
    class_mode='categorical'
)

validation_datagen = ImageDataGenerator(rescale=1. / 255)

validation_generator = validation_datagen.flow_from_directory(
    val_data_dir,
    target_size=(height, width),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

input_shape = (height,width,3)
num_classes = 42

Found 94853 images belonging to 42 classes.
Found 10539 images belonging to 42 classes.


In [34]:
base_model = enet.EfficientNetB3(
  weights = 'imagenet', include_top = False, classes = num_classes,
  input_shape = input_shape
)

for layer in base_model.layers:
    layer.trainable = False

for layer in base_model.layers:
    if isinstance(layer, BatchNormalization):
        layer.trainable = True
    else:
        layer.trainable = False

for layer in base_model.layers[345:]:
   layer.trainable = True

# base_model = ResNet152V2(weights = 'imagenet', include_top = False, classes = num_classes, input_shape = input_shape)
x = base_model.output
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
# x = Dense(1024, activation='relu')(x)
x = Dropout(0.2)(x)
predictions = Dense(num_classes, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)

model.compile(
    # optimizer=tf.keras.optimizers.SGD(lr=0.001, momentum=0.9),
    optimizer=tf.keras.optimizers.Adam(lr=0.001),
    loss='categorical_crossentropy', metrics=["accuracy"]
)

# model.summary()

In [36]:
callbacks = [ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=1, verbose=1),
             ModelCheckpoint(filepath='best_model.h5', monitor='val_loss', save_best_only=True)]

In [37]:
epochs = 10
model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=epochs,
    verbose=1,
    steps_per_epoch=len(train_generator),
    validation_steps=len(validation_generator),
    callbacks=callbacks
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 00004: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 5/10
Epoch 00005: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 6/10
Epoch 00006: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 7/10
Epoch 00007: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 8/10
Epoch 00008: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
Epoch 9/10
Epoch 00009: ReduceLROnPlateau reducing learning rate to 1.5625000742147677e-05.
Epoch 10/10
Epoch 00010: ReduceLROnPlateau reducing learning rate to 7.812500371073838e-06.


<tensorflow.python.keras.callbacks.History at 0x7f71339d3240>

In [None]:
for i, layer in enumerate(base_model.layers):
   print(i, layer.name)

In [22]:
for layer in model.layers[:562]:
   layer.trainable = True
for layer in model.layers[562:]:
   layer.trainable = True

In [28]:
model = load_model("/content/best_model_79.h5")

In [29]:
model.compile(loss=tf.keras.losses.categorical_crossentropy, optimizer=tf.keras.optimizers.Adam(lr=1e-4), metrics=['accuracy'])

In [30]:
for layer in model.layers[:345]:
   layer.trainable = False
for layer in model.layers[345:]:
   layer.trainable = True

In [None]:
epochs = 10
model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=epochs,
    verbose=1,
    steps_per_epoch=len(train_generator),
    validation_steps=len(validation_generator),
    callbacks=callbacks
)

In [None]:
# !cp "/content/drive/My Drive/best_model_b5.h5" "/content" 

In [None]:
model.load_weights("best_model_b5.h5")

In [38]:
test_data_dir = "/content/shopee-product-detection-dataset/test"

test_datagen = ImageDataGenerator(rescale=1. / 255)

test_generator = test_datagen.flow_from_directory(
    test_data_dir,
    target_size=(300, 300),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

Found 12192 images belonging to 1 classes.


In [39]:
remove_list = [x for x in test_generator.filenames if x[-7:] == '(1).jpg']
for x in remove_list:
  os.remove("/content/shopee-product-detection-dataset/test/"+x)

In [40]:
test_data_dir = "/content/shopee-product-detection-dataset/test"

test_datagen = ImageDataGenerator(rescale=1. / 255)

test_generator = test_datagen.flow_from_directory(
    test_data_dir,
    target_size=(300, 300),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

Found 12186 images belonging to 1 classes.


In [41]:
pred=model.predict_generator(test_generator,verbose=1)

Instructions for updating:
Please use Model.predict, which supports generators.


In [42]:
y_classes = pred.argmax(axis=-1)

In [43]:
test_df = pd.read_csv("/content/shopee-product-detection-dataset/test.csv", dtype={"filename": str, "category": str})

In [44]:
test_df.head()

Unnamed: 0,filename,category
0,fd663cf2b6e1d7b02938c6aaae0a32d2.jpg,43
1,c7fd77508a8c355eaab0d4e10efd6b15.jpg,43
2,127f3e6d6e3491b2459812353f33a913.jpg,43
3,5ca4f2da11eda083064e6c36f37eeb81.jpg,43
4,46d681a542f2c71be017eef6aae23313.jpg,43


In [45]:
test_generator.filenames[:5]

['test/0004b03ad7eabfb3989727c461310a84.jpg',
 'test/0004d34271d0a0253f8a95d0746bae05.jpg',
 'test/000d4ad3d0a372fd6eea2fee6bf738ef.jpg',
 'test/0012f125ad07123e69468ae1d1d59de4.jpg',
 'test/0015831b249b856e569e883a9dc6d4fa.jpg']

In [46]:
test_df_2 = test_df.sort_values(by=['filename'], ascending=True)

In [47]:
test_df_2.head()

Unnamed: 0,filename,category
485,0004b03ad7eabfb3989727c461310a84.jpg,43
6397,0004d34271d0a0253f8a95d0746bae05.jpg,43
2181,000d4ad3d0a372fd6eea2fee6bf738ef.jpg,43
2626,0012f125ad07123e69468ae1d1d59de4.jpg,43
2433,0015831b249b856e569e883a9dc6d4fa.jpg,43


In [48]:
test_df_2["category"] = y_classes

In [49]:
test_df_2["category"] = test_df_2["category"].apply(str)

In [50]:
def cek(x):
    if x == "0":
        return "00"
    elif x == "1":
        return "01"
    elif x == "2":
        return "02"
    elif x == "3":
        return "03"
    elif x == "4":
        return "04"
    elif x == "5":
        return "05"
    elif x == "6":
        return "06"
    elif x == "7":
        return "07"
    elif x == "8":
        return "08"
    elif x == "9":
        return "09"
    else:
        return x
test_df_2["category"] = test_df_2["category"].apply(lambda x: cek(x))

In [51]:
test_df_2

Unnamed: 0,filename,category
485,0004b03ad7eabfb3989727c461310a84.jpg,12
6397,0004d34271d0a0253f8a95d0746bae05.jpg,16
2181,000d4ad3d0a372fd6eea2fee6bf738ef.jpg,40
2626,0012f125ad07123e69468ae1d1d59de4.jpg,22
2433,0015831b249b856e569e883a9dc6d4fa.jpg,12
...,...,...
2128,ffef3b0e7505855ba5794ebab9aff719.jpg,09
7200,ffef86f7872cd020992e00b7cf6585c7.jpg,20
2286,fff8fdfc178e3e94414fc10c69271f88.jpg,10
2848,fff9ee1d7f920b920dd603631d24bc07.jpg,23


In [52]:
test_df_2.to_csv("submission_11.csv", index=False, header=True)