In [1]:
import pandas as pd
import numpy as np
import os 

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

import warnings
warnings.filterwarnings("ignore")

import tensorflow as tf

np.random.seed(0)
tf.random.set_seed(0)

In [2]:
!pip install efficientnet
import efficientnet.tfkeras  as efn
from kaggle_datasets import KaggleDatasets

AUTO = tf.data.experimental.AUTOTUNE

def TPU():
    # Detect hardware, return appropriate distribution strategy
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection. No parameters necessary if TPU_NAME environment variable is set. On Kaggle this is always the case.
        print('Running on TPU ', tpu.master())
    except ValueError:
        tpu = None

    if tpu:
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
    else:
        strategy = tf.distribute.get_strategy() # default distribution strategy in Tensorflow. Works on CPU and single GPU.

    print("REPLICAS: ", strategy.num_replicas_in_sync)
    return strategy


strategy = TPU()

Collecting efficientnet
  Downloading efficientnet-1.1.0-py3-none-any.whl (18 kB)
Installing collected packages: efficientnet
Successfully installed efficientnet-1.1.0
You should consider upgrading via the '/opt/conda/bin/python3.7 -m pip install --upgrade pip' command.[0m
Running on TPU  grpc://10.0.0.2:8470
REPLICAS:  8


In [3]:
GCS_DS_PATH = KaggleDatasets().get_gcs_path()

In [4]:
train_data = pd.read_csv('../input/plant-pathology-2020-fgvc7/train.csv')
test_data = pd.read_csv('../input/plant-pathology-2020-fgvc7/test.csv')

train_paths = train_data['image_id'].apply(lambda x: os.path.join(GCS_DS_PATH , 'images' , x + '.jpg')).values
test_paths = test_data['image_id'].apply(lambda x: os.path.join(GCS_DS_PATH , 'images' , x + '.jpg')).values

train_labels = train_data.iloc[:,1:].values

In [5]:
n_classes = 4
BATCH_SIZE = 8 * strategy.num_replicas_in_sync
img_size = 800
EPOCHS = 100
FOLDS = 5
SEED = 42

In [6]:
def decode_image(filename, label=None, image_size=(img_size, img_size)):
    bits = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(bits, channels=3)
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.image.resize(image, image_size)
    if label is None:
        return image
    else:
        return image, label
    
def data_augment(image, label=None, seed=2020):
    image = tf.image.random_flip_left_right(image, seed=seed)
    image = tf.image.random_flip_up_down(image, seed=seed)
           
    if label is None:
        return image
    else:
        return image, label
    
    
def prepare_train(train_paths, train_labels):
    data = (
        tf.data.Dataset
        .from_tensor_slices((train_paths, train_labels))
        .map(decode_image, num_parallel_calls=AUTO)
        .map(data_augment, num_parallel_calls=AUTO)
        .repeat()
        .shuffle(512)
        .batch(BATCH_SIZE)
        .prefetch(AUTO)
    )
    return data

def prepare_val(val_paths, val_labels):
    data = (
        tf.data.Dataset
        .from_tensor_slices((val_paths, val_labels))
        .map(decode_image, num_parallel_calls=AUTO)
        .batch(BATCH_SIZE)
        .prefetch(AUTO)
    )
    return data

def prepare_test(test_paths):
    data = (
        tf.data.Dataset
        .from_tensor_slices((test_paths))
        .map(decode_image, num_parallel_calls=AUTO)
        .batch(BATCH_SIZE)
    )
    return data

In [7]:
def get_model():
    base_model = efn.EfficientNetB7(weights='imagenet', include_top=False, pooling='avg', input_shape=(img_size, img_size, 3))
    x = base_model.output
    predictions = Dense(n_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    model.compile(optimizer='adam', loss='categorical_crossentropy',metrics=['accuracy'])
    return model

def Callbacks():
    erl = EarlyStopping(monitor='val_loss', patience=11, verbose=1, mode='min', restore_best_weights=True)
    rdc = ReduceLROnPlateau(monitor='val_loss', factor=0.3, patience=2, verbose=1, mode='min')
    return [erl,rdc]

In [8]:
skf = StratifiedKFold(n_splits=FOLDS, shuffle=True, random_state=SEED)
test_pred = []
val_roc_auc = []
# all_history = []

for i, (train_idx, val_idx) in enumerate(skf.split(train_paths, train_labels.argmax(1))):
    print(); print('#'*25)
    print('### FOLD',i+1)
    print('#'*25)
    X_train, X_val = train_paths[train_idx], train_paths[val_idx]
    y_train, y_val = train_labels[train_idx], train_labels[val_idx]
    
    strategy = TPU()
    with strategy.scope():
        model = get_model()
        history = model.fit(
                    prepare_train(X_train,y_train),
                    steps_per_epoch=y_train.shape[0] // BATCH_SIZE,
                    validation_data=prepare_val(X_val, y_val),
                    validation_steps=y_val.shape[0] // BATCH_SIZE,
                    callbacks=Callbacks(),
                    epochs=EPOCHS,
                    verbose=1
                )

    test_pred.append(model.predict(prepare_test(test_paths), verbose=1))
    val_roc_auc.append(roc_auc_score(y_val,model.predict(prepare_val(X_val, y_val), verbose=1)))
    
#     all_history.append(history)
#     model.save('{}_model.h5'.format(i+1))


#########################
### FOLD 1
#########################
Running on TPU  grpc://10.0.0.2:8470
REPLICAS:  8
Downloading data from https://github.com/Callidior/keras-applications/releases/download/efficientnet/efficientnet-b7_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5
Train for 22 steps, validate for 5 steps
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 00007: ReduceLROnPlateau reducing learning rate to 0.0003000000142492354.
Epoch 8/100
Epoch 9/100
Epoch 00009: ReduceLROnPlateau reducing learning rate to 9.000000427477062e-05.
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 00014: ReduceLROnPlateau reducing learning rate to 2.700000040931627e-05.
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 00024: ReduceLROnPlateau reducing learning rate to 8.100000013655517e-06.
Epoch 25/100
Epoch 26/100
Epoch 00026: ReduceLROn

Epoch 00043: early stopping

#########################
### FOLD 2
#########################
Running on TPU  grpc://10.0.0.2:8470
REPLICAS:  8
Train for 22 steps, validate for 5 steps
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 00004: ReduceLROnPlateau reducing learning rate to 0.0003000000142492354.
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 00010: ReduceLROnPlateau reducing learning rate to 9.000000427477062e-05.
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 00016: ReduceLROnPlateau reducing learning rate to 2.700000040931627e-05.
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 00025: ReduceLROnPlateau reducing learning rate to 8.100000013655517e-06.
Epoch 26/100
Epoch 27/100
Epoch 00027: ReduceLROnPlateau reducing learning rate to 2.429999949526973e-06.
Epoch 28/100
Epoch 29/100
Epoch 00029: ReduceLROnPlateau reducing learning 

Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 00020: ReduceLROnPlateau reducing learning rate to 2.700000040931627e-05.
Epoch 21/100
Epoch 22/100
Epoch 00022: ReduceLROnPlateau reducing learning rate to 8.100000013655517e-06.
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 00027: ReduceLROnPlateau reducing learning rate to 2.429999949526973e-06.
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 00033: ReduceLROnPlateau reducing learning rate to 7.289999985005124e-07.
Epoch 34/100
Epoch 35/100
Epoch 00035: ReduceLROnPlateau reducing learning rate to 2.1870000637136398e-07.
Epoch 36/100
Epoch 37/100
Epoch 00037: ReduceLROnPlateau reducing learning rate to 6.561000276406048e-08.
Epoch 38/100
Epoch 39/100
Epoch 00039: ReduceLROnPlateau reducing learning rate to 1.9683000829218145e-08.
Epoch 40/100
Epoch 41/100
Epoch 42/1

Epoch 00051: early stopping

#########################
### FOLD 4
#########################
Running on TPU  grpc://10.0.0.2:8470
REPLICAS:  8
Train for 22 steps, validate for 5 steps
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 00003: ReduceLROnPlateau reducing learning rate to 0.0003000000142492354.
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 00009: ReduceLROnPlateau reducing learning rate to 9.000000427477062e-05.
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 00018: ReduceLROnPlateau reducing learning rate to 2.700000040931627e-05.
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 00022: ReduceLROnPlateau reducing learning rate to 8.100000013655517e-06.
Epoch 23/100
Epoch 24/100
Epoch 00024: ReduceLROnPlateau reducing learning rate to 2.429999949526973e-06.
Epoch 25/100
Epoch 26/100
Epoch 00026: ReduceLROnPlateau reducing learning rate to 7.289999985005124e-07.
Epoch 27

Epoch 12/100
Epoch 00012: ReduceLROnPlateau reducing learning rate to 9.000000427477062e-05.
Epoch 13/100
Epoch 14/100
Epoch 00014: ReduceLROnPlateau reducing learning rate to 2.700000040931627e-05.
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 00022: ReduceLROnPlateau reducing learning rate to 8.100000013655517e-06.
Epoch 23/100
Epoch 24/100
Epoch 00024: ReduceLROnPlateau reducing learning rate to 2.429999949526973e-06.
Epoch 25/100
Epoch 26/100
Epoch 00026: ReduceLROnPlateau reducing learning rate to 7.289999985005124e-07.
Epoch 27/100
Epoch 28/100
Epoch 00028: ReduceLROnPlateau reducing learning rate to 2.1870000637136398e-07.
Epoch 29/100
Epoch 30/100
Epoch 00030: ReduceLROnPlateau reducing learning rate to 6.561000276406048e-08.
Epoch 31/100
Epoch 00031: early stopping


In [9]:
val_roc_auc

[0.9901811213134308,
 0.9718367444639124,
 0.987488303037211,
 0.9936622123717895,
 0.9799314720281768]

In [10]:
all_test = 0
for i in range(FOLDS):
    all_test += test_pred[i]

In [11]:
all_models = all_test/FOLDS
all_models

array([[8.0598482e-05, 7.4621278e-04, 9.9905378e-01, 1.1943842e-04],
       [8.6567215e-06, 2.0499721e-03, 9.9790019e-01, 4.1136547e-05],
       [7.0974611e-06, 6.9595946e-05, 8.6242189e-06, 9.9991465e-01],
       ...,
       [2.2846107e-05, 1.3209066e-03, 9.9864310e-01, 1.3154573e-05],
       [9.9969274e-01, 1.3020242e-04, 9.9524426e-05, 7.7571822e-05],
       [1.4123447e-05, 7.8365151e-03, 7.1370378e-06, 9.9214220e-01]],
      dtype=float32)

In [12]:
best_2_models = test_pred[0]*.7 + test_pred[3]*.3
best_2_models

array([[8.4195359e-05, 1.4213498e-03, 9.9836200e-01, 1.3245229e-04],
       [9.7644688e-06, 3.0584361e-03, 9.9689764e-01, 3.4145713e-05],
       [4.9493756e-06, 6.5372711e-05, 1.6407637e-05, 9.9991322e-01],
       ...,
       [4.7722435e-05, 3.5397331e-03, 9.9638212e-01, 3.0432406e-05],
       [9.9966782e-01, 1.5272264e-04, 8.8977496e-05, 9.0458336e-05],
       [9.5645573e-06, 1.1003704e-02, 1.2077676e-06, 9.8898554e-01]],
      dtype=float32)

In [13]:
# best_2_models gives me better score on LB
sumb = pd.read_csv('../input/plant-pathology-2020-fgvc7/sample_submission.csv')
sumb.iloc[:,1:] = best_2_models 
# sumb.iloc[:,1:] = all_models

In [14]:
sumb

Unnamed: 0,image_id,healthy,multiple_diseases,rust,scab
0,Test_0,0.000084,0.001421,0.998362,0.000132
1,Test_1,0.000010,0.003058,0.996898,0.000034
2,Test_2,0.000005,0.000065,0.000016,0.999913
3,Test_3,0.999269,0.000022,0.000639,0.000070
4,Test_4,0.000008,0.001409,0.998358,0.000225
...,...,...,...,...,...
1816,Test_1816,0.000001,0.000538,0.999370,0.000091
1817,Test_1817,0.000002,0.003084,0.000376,0.996538
1818,Test_1818,0.000048,0.003540,0.996382,0.000030
1819,Test_1819,0.999668,0.000153,0.000089,0.000090


In [15]:
sumb.to_csv('submission.csv', index=False)