In [1]:
import cv2
import os
import time, gc
import numpy as np
import pandas as pd

import tensorflow as tf

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Lambda, Input, GlobalAveragePooling2D
from math import ceil

# Install EfficientNet
!pip install '../input/grapheme-sub/efficientnet-1.0.0-py3-none-any.whl'
import efficientnet.tfkeras as efn

Processing /kaggle/input/grapheme-sub/efficientnet-1.0.0-py3-none-any.whl
Installing collected packages: efficientnet
Successfully installed efficientnet-1.0.0


In [2]:
# Constants
HEIGHT = 137
WIDTH = 236
FACTOR = 1.0
HEIGHT_NEW = int(HEIGHT * FACTOR)
WIDTH_NEW = int(WIDTH * FACTOR)
CHANNELS = 1
BATCH_SIZE = 16
stats = (0.0692, 0.2051)
# Dir
DIR = '../input/bengaliai-cv19'

## Image Preprocessing

In [3]:
def get_img(img, org_width, org_height):
    # Invert
    img = 255 - img
    # Normalize
    img = (img * (255.0 / img.max())).astype(np.uint8)
    img = img.reshape(org_height, org_width)
    return img    

## Create Model

In [4]:
def create_model(backbone, input_shape):
    base_model = backbone(input_shape=input_shape, include_top=False, weights=None)
    curr_output = GlobalAveragePooling2D()(base_model.output)
    oputput1 = Dense(168,  activation='softmax', name='gra') (curr_output)
    oputput2 = Dense(11,  activation='softmax', name='vow') (curr_output)
    oputput3 = Dense(7,  activation='softmax', name='cons') (curr_output)
    output_tensor = [oputput1, oputput2, oputput3]

    model = Model(base_model.input, output_tensor)
    return model

In [5]:

input_shape = (HEIGHT_NEW,WIDTH_NEW,CHANNELS)

In [6]:

#model.summary()

## Data Generator

In [7]:
class TestDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, X, batch_size = 16, img_size = (512, 512, 3), *args, **kwargs):
        self.X = X
        self.indices = np.arange(len(self.X))
        self.batch_size = batch_size
        self.img_size = img_size
                    
    def __len__(self):
        return int(ceil(len(self.X) / self.batch_size))

    def __getitem__(self, index):
        indices = self.indices[index*self.batch_size:(index+1)*self.batch_size]
        X = self.__data_generation(indices)
        return X
    
    def __data_generation(self, indices):
        X = np.empty((len(indices), *self.img_size))
        
        for i, index in enumerate(indices):
            image = self.X[index]
            image = np.stack((image,)*CHANNELS, axis=-1)
            image = image.reshape(-1, HEIGHT_NEW, WIDTH_NEW, CHANNELS)
            
            image = (image.astype(np.float32)/255.0 - stats[0])/stats[1]
            X[i,] = image        
        return X

## Predict and Submission

In [8]:
def load_single_model(backbone, path, input_shape=input_shape):
    model = create_model(backbone, input_shape)
    model.load_weights(path)
    return model    

In [9]:
# Load Model Weights
models = []
models.append(load_single_model(efn.EfficientNetB4, '../input/grapheme-sub/best_avg_recall_single_mul_addaug_EF4_Fold_1.h5')) #cv9846 lb9763 2020 ef4 fold1 
models.append(load_single_model(efn.EfficientNetB4, '../input/grapheme-sub/best_avg_recall_single_mul_addaug_EF4_Fold_2.h5')) #cv9839 lb? 2020 ef4 fold2 
models.append(load_single_model(efn.EfficientNetB4, '../input/grapheme-sub/best_avg_recall_single_mul_addaug_EF4_Fold_4_0.9838.h5')) #lb? 2020 ef4 fold4 
models.append(load_single_model(efn.EfficientNetB4, '../input/grapheme-sub/best_avg_recall_single_mul_addaug_EF4_Fold_0_0.9846.h5')) #lb? 2020 ef4 fold0 
models.append(load_single_model(efn.EfficientNetB4, '../input/grapheme-sub/best_avg_recall_single_mul_addaug_EF4_Fold_3_0.9831.h5')) #lb? 2020 ef4 fold3 
#models.append(load_single_model(efn.EfficientNetB5, '../input/grapheme-sub/best_avg_recall_single_mul_addaug_EF5_Fold_0.h5'))  #cv9816 lb? 2021 ef5 fold0

In [10]:
# Create Submission File
tgt_cols = ['grapheme_root','vowel_diacritic','consonant_diacritic']

# Create Predictions
row_ids, targets = [], []

# Loop through Test Parquet files (X)
for i in range(0, 4):
    # Test Files Placeholder
    test_files = []

    # Read Parquet file
    df = pd.read_parquet(os.path.join(DIR, 'test_image_data_'+str(i)+'.parquet'))
    # Get Image Id values
    image_ids = df['image_id'].values 
    # Drop Image_id column
    df = df.drop(['image_id'], axis = 1)

    # Loop over rows in Dataframe and generate images 
    X = []
    for image_id, index in zip(image_ids, range(df.shape[0])):
        test_files.append(image_id)
        X.append(get_img(df.loc[df.index[index]].values,WIDTH,HEIGHT))

    # Data_Generator
    data_generator_test = TestDataGenerator(X, batch_size = BATCH_SIZE, img_size = (HEIGHT_NEW, WIDTH_NEW, CHANNELS))
        
    # Predict 
    preds  =[]
    for model in models:
        preds.append(model.predict_generator(data_generator_test, verbose = 1))
    
    # Loop over Preds    
    for i, image_id in zip(range(len(test_files)), test_files):        
        for subi, col in zip(range(len(preds[0])), tgt_cols):
            prob = (preds[0][subi][i]+ preds[1][subi][i] + preds[2][subi][i]+ preds[3][subi][i]+ preds[4][subi][i])/len(models)             
            # Set Prediction with average of 5 predictions
            row_ids.append(str(image_id)+'_'+col)
            sub_pred_value = np.argmax(prob)
            targets.append(sub_pred_value)
    
    # Cleanup
    del df
    gc.collect()



In [11]:
# Create and Save Submission File
submit_df = pd.DataFrame({'row_id':row_ids,'target':targets}, columns = ['row_id','target'])
submit_df.to_csv('submission.csv', index = False)
print(submit_df.head(40))

                         row_id  target
0          Test_0_grapheme_root       3
1        Test_0_vowel_diacritic       0
2    Test_0_consonant_diacritic       0
3          Test_1_grapheme_root      93
4        Test_1_vowel_diacritic       2
5    Test_1_consonant_diacritic       0
6          Test_2_grapheme_root      19
7        Test_2_vowel_diacritic       0
8    Test_2_consonant_diacritic       0
9          Test_3_grapheme_root     115
10       Test_3_vowel_diacritic       0
11   Test_3_consonant_diacritic       0
12         Test_4_grapheme_root      55
13       Test_4_vowel_diacritic       4
14   Test_4_consonant_diacritic       0
15         Test_5_grapheme_root     115
16       Test_5_vowel_diacritic       2
17   Test_5_consonant_diacritic       0
18         Test_6_grapheme_root     147
19       Test_6_vowel_diacritic       9
20   Test_6_consonant_diacritic       5
21         Test_7_grapheme_root     137
22       Test_7_vowel_diacritic       7
23   Test_7_consonant_diacritic       0
