In [2]:
import tensorflow as tf
import numpy as np 
import os
import matplotlib.pyplot as plt
from glob import glob
from tensorflow.keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator





In [3]:
TRAIN_DIR = "../input/exml-prelims-task-1/train/train"
TEST_DIR = "../input/exml-prelims-task-1/test/test"
trainImagesList = glob(os.path.join(TRAIN_DIR, "**/*.png")) # stores ALL the files in the training dir
testImagesList = glob(os.path.join(TEST_DIR, "*.png")) # stores ALL the files in the test dir





In [5]:
print('number of training images', len(trainImagesList))
print('number of testing images', len(testImagesList))
# get size of training and test sets

In [7]:
# The images are converted from (1600px x 1200px) to (256px x 256px) and the images are converted into grayscale
# That is images are inputted into the CNN with the input shape (256, 256, 1)

# The CNN has a combination of CONV2D layer and MaxPool2D layer with filter size of (11,11) to extract the
# important features of the hand which can be done better with a large filter size.
# It is followed by combinations of CONV2D layer and MaxPool2D layer with filter sizes of (7,7), (5,5) and
# 3 filter sizes of (3,3).

# Then there is a flatten layer followed by a few dropout and  dense layers with activation function as relu.
# The dropout layer is added to prevent overfitting of the model to the training data.

# The final layer is a single neuron with activation function as sigmoid



model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (11,11), padding='same', activation='relu', input_shape = (256, 256, 1)),
    tf.keras.layers.MaxPool2D(2,2),
    
    tf.keras.layers.Conv2D(48, (7,7),  activation='relu', padding='same'),
    tf.keras.layers.MaxPool2D(2,2),
    
    tf.keras.layers.Conv2D(64, (5,5), activation='relu', padding='same'),
    tf.keras.layers.MaxPool2D(2,2),
    
    tf.keras.layers.Conv2D(128, (3,3), activation='relu', padding='same'),
    tf.keras.layers.MaxPool2D(4,4),
    
    tf.keras.layers.Conv2D(128, (3,3), activation='relu', padding='same'),
    tf.keras.layers.MaxPool2D(2,2),
    
    tf.keras.layers.Conv2D(196, (3,3), activation='relu', padding='same'),
    tf.keras.layers.MaxPool2D(2,2),
    
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.01),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dropout(0.01),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model.summary()
model.compile(loss="binary_crossentropy", metrics = ['acc'], 
              optimizer = tf.keras.optimizers.Adam()) 

#I have used the loss as binary_crossentropy as it is a binary classifciation problem.
#An adam optimizer due to the following reasons:- 
 #   1. it combines both the properties AdaGrad RMSProp
 #   2. it is also easy to intialise and auto-adjusts learning rate


In [8]:
# Used an ImageDataGenerator which automatically labels images in a directory according to their folder names
train_datagen = ImageDataGenerator(rescale = (1/255)) 
train_data = train_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size = (256,256), # used target size of 256,256 in order to fit a smaller image to the CONVnet
    color_mode = 'grayscale', # converted the image to grayscale
    batch_size = 100, 
    class_mode = 'binary' # the classification is binary
)

In [9]:
# used learning rate decay so that after reaching near a global minimum, our code does not overshoot it.
# we decrease the learning rate after each epoch by 0.1
class LearningRateDecay(tf.keras.callbacks.Callback):
    def __init__(self, gamma, *args, **kwargs):
        self.gamma = gamma
        super().__init__(*args, **kwargs)
        
    def on_epoch_end(self, epoch, logs={}):
        if epoch >=0:
            lr = float(tf.keras.backend.get_value(self.model.optimizer.learning_rate))
            lr *= self.gamma
            tf.keras.backend.set_value(self.model.optimizer.learning_rate, lr)
        if logs.get('loss') < 1e-5: # when the loss goes below 1e-5 then we can stop training
            self.model.stop_training = True
            print('\nasasdsadsd')
model.fit(
    train_data,
    epochs = 20,
    verbose=1,
    callbacks=[LearningRateDecay(0.9)]
)

In [11]:
model.save('saved_model/my_model')

In [12]:
# code to write output to csv file
from tqdm import tqdm
TEST_DIR = "../input/exml-prelims-task-1/test/test"
OUTPUT_FILE= "./sub2.csv"

!echo "id,AspectofHand" > {OUTPUT_FILE}

for f in tqdm(os.listdir(TEST_DIR)):
    img_path = os.path.join(TEST_DIR, f)
    img = img_to_array(load_img(img_path, grayscale =True, target_size = (256,256))) 
    pred = model.predict(np.expand_dims(img, axis=0))
    
    with open(OUTPUT_FILE, "a") as file:
        op_str = f"{f.split('.')[0]},{int(pred[0,0])}"
        file.write(op_str)
        file.write('\n')