Import important libraries

In [30]:
import librosa
import pandas as pd
import pickle
import numpy as np
import os
from random import shuffle
import keras
import multiprocessing
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from keras import losses, models, optimizers
from keras.layers import Dense, Input, Convolution2D, BatchNormalization, Flatten, MaxPool2D, Activation, Reshape, Dropout
from keras.utils import plot_model, Sequence
from keras.layers.merge import concatenate
from sklearn.model_selection import train_test_split
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import tensorflow as tf

Class for dumping and loading pickle files of exceedingly large sizes

Reads data byte by byte and writes data byte by byte

In [31]:
class MacOSFile(object):

    def __init__(self, f):
        self.f = f

    def __getattr__(self, item):
        return getattr(self.f, item)

    def read(self, n):
        if n >= (1 << 31):
            buffer = bytearray(n)
            idx = 0
            while idx < n:
                batch_size = min(n - idx, 1 << 31 - 1)
                buffer[idx:idx + batch_size] = self.f.read(batch_size)
                idx += batch_size
            return buffer
        return self.f.read(n)

    def write(self, buffer):
        n = len(buffer)
        print("writing total_bytes=%s..." % n, flush=True)
        idx = 0
        while idx < n:
            batch_size = min(n - idx, 1 << 31 - 1)
            print("writing bytes [%s, %s)... " % (idx, idx + batch_size), end="", flush=True)
            self.f.write(buffer[idx:idx + batch_size])
            print("done.", flush=True)
            idx += batch_size


def pickle_dump(obj, file_path):
    with open(file_path, "wb") as f:
        return pickle.dump(obj, MacOSFile(f), protocol=pickle.HIGHEST_PROTOCOL)


def pickle_load(file_path):
    with open(file_path, "rb") as f:
        return pickle.load(MacOSFile(f))

Load test image dataset

In [32]:
dataset = pickle_load('./data/test_image.pkl')

# Analyse the dataset 

In [33]:
print(type(dataset))
print(len(dataset))
print(len(dataset[0]))

<class 'list'>
2000
784


Reshape the dataset and the images

In [34]:
X = []
for i in range(len(dataset)):
    X.append(np.reshape(np.array(dataset[i]), (28,28)))

Check that the dataset is in correct shape

In [35]:
print(len(X))
print(X[0].shape)

2000
(28, 28)


Reshaping finally and also scaling it as was done in the training part of the model

In [36]:
X = np.array(X)
print(X.shape)

(2000, 28, 28)


In [37]:
X = X / 255

In [38]:
X = np.reshape(X , (2000,28,28,1))

Redefining model architecture to load the weights

In [39]:
def model_definition(learning_rate):

    input = Input(shape=(28, 28, 1))

    x = (Convolution2D(32, (3, 3), padding="same", input_shape=(28, 28, 1)))(input)
    x = (BatchNormalization())(x)
    x = (Activation('relu'))(x)
    x = (MaxPool2D())(x)
    
    x = (Dropout(0.25))(x)
    
    x = (Convolution2D(64, (3, 3), padding="same"))(x)
    x = (BatchNormalization())(x)
    x = (Activation('relu'))(x)
    
    x = (MaxPool2D())(x)
    x = (Dropout(0.25))(x)
    
    x = (Convolution2D(128, (3, 3), padding="same"))(x)
    x = (BatchNormalization())(x)
    x = (Activation('relu'))(x)
    
    x = (Dropout(0.4))(x)

    x = (Flatten())(x)
    
    x = (Dense(256, activation='relu'))(x)
    x = BatchNormalization()(x)
    x = (Dropout(0.5))(x)

    x = (Dense(64, activation='relu'))(x)
    x = BatchNormalization()(x)
    x = (Dropout(0.4))(x)

    x = (Dense(16, activation='relu'))(x)
    x = (Dropout(0.3))(x)

    output = (Dense(4, activation='softmax'))(x)

    model = models.Model(inputs=input, outputs=output)

    opt = keras.optimizers.RMSprop(learning_rate)

    model.compile(optimizer=opt, loss=losses.categorical_crossentropy, metrics=['acc'])

    print(model.summary())

    return model

In [40]:
model = model_definition(0.001)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 28, 28, 32)        320       
_________________________________________________________________
batch_normalization_6 (Batch (None, 28, 28, 32)        128       
_________________________________________________________________
activation_4 (Activation)    (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 14, 14, 32)        0         
_________________________________________________________________
dropout_7 (Dropout)          (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 14, 14, 64)        18496     
__________

Load the weights of the model

In [41]:
model.load_weights('./data/model-90-89.h5')

Get the predictions from the model in the shape of (batch_size x 4) as they will be one hot encoded

In [42]:
y_pred = model.predict(X)

We load the dictionary which will give us the correct class from the encoded class.

We traverse the predicted numpy array and chose the index which has the maximum softmax value as the output.

We append the results in y_out and the increasing indices in image_idx lists.

In [43]:
dict = (pickle_load('./data/dataset_large.pkl'))['dict']
image_idx = []
y_out = []
ctr = 0
for y in y_pred:
    image_idx.append(ctr)
    ctr += 1
    mmax = -1
    mmax_idx = -1
    for i in range(4):
        if mmax < y[i]:
            mmax = y[i]
            mmax_idx = i
    y_out.append(dict[mmax_idx])

Store the results in a dataframe

In [44]:
df_out = pd.DataFrame({
    'image_idx': image_idx,
    'class': y_out
})

In [45]:
df_out.head()

Unnamed: 0,image_idx,class
0,0,0
1,1,6
2,2,0
3,3,0
4,4,0


Save the dataframe as a csv file

In [46]:
df_out.to_csv('./data/hitkul_submission.csv')