In [1]:
#importing all the major libraries
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import os
import cv2

In [2]:
#loading training data
labels = pd.read_csv("C:/Users/Dell/content/eye_gender_data/Training_set.csv")

In [3]:
labels.head()

Unnamed: 0,filename,label
0,Image_1.jpg,male
1,Image_2.jpg,female
2,Image_3.jpg,female
3,Image_4.jpg,female
4,Image_5.jpg,male


In [4]:
#getting filepath of images
file_paths = [[fname, 'C:/Users/Dell/content/eye_gender_data/train/' + fname] for fname in labels['filename']]

In [5]:
#to verify if number of images is same as number of labels
if len(labels) == len(file_paths):
    print('Number of labels i.e. ', len(labels), 'matches the number of filenames i.e. ', len(file_paths))
else:
    print('Number of labels does not match the number of filenames')

Number of labels i.e.  9220 matches the number of filenames i.e.  9220


In [6]:
#convert the filepaths so obtained into dataframe
images = pd.DataFrame(file_paths, columns=['filename', 'filepaths'])
images.head()

Unnamed: 0,filename,filepaths
0,Image_1.jpg,C:/Users/Dell/content/eye_gender_data/train/Im...
1,Image_2.jpg,C:/Users/Dell/content/eye_gender_data/train/Im...
2,Image_3.jpg,C:/Users/Dell/content/eye_gender_data/train/Im...
3,Image_4.jpg,C:/Users/Dell/content/eye_gender_data/train/Im...
4,Image_5.jpg,C:/Users/Dell/content/eye_gender_data/train/Im...


In [7]:
#merge labels and images
train_data = pd.merge(images, labels, how = 'inner', on = 'filename')
train_data.head()

Unnamed: 0,filename,filepaths,label
0,Image_1.jpg,C:/Users/Dell/content/eye_gender_data/train/Im...,male
1,Image_2.jpg,C:/Users/Dell/content/eye_gender_data/train/Im...,female
2,Image_3.jpg,C:/Users/Dell/content/eye_gender_data/train/Im...,female
3,Image_4.jpg,C:/Users/Dell/content/eye_gender_data/train/Im...,female
4,Image_5.jpg,C:/Users/Dell/content/eye_gender_data/train/Im...,male


In [8]:
train_data.label = train_data.label.replace("male", 0)
train_data.label = train_data.label.replace("female", 1)

In [9]:
train_data.head()

Unnamed: 0,filename,filepaths,label
0,Image_1.jpg,C:/Users/Dell/content/eye_gender_data/train/Im...,0
1,Image_2.jpg,C:/Users/Dell/content/eye_gender_data/train/Im...,1
2,Image_3.jpg,C:/Users/Dell/content/eye_gender_data/train/Im...,1
3,Image_4.jpg,C:/Users/Dell/content/eye_gender_data/train/Im...,1
4,Image_5.jpg,C:/Users/Dell/content/eye_gender_data/train/Im...,0


In [10]:
train_data.label.dtypes

dtype('int64')

In [11]:
#data preprocessing on training data
data = [] #empty array
image_size = 100 
for i in range(len(train_data)):

    img_array = cv2.imread(train_data['filepaths'][i], cv2.IMREAD_GRAYSCALE) #gray scale conversion

    new_img_array = cv2.resize(img_array, (image_size, image_size)) 
    data.append([new_img_array, train_data['label'][i]])

In [12]:
data[4]

[array([[177, 180, 180, ..., 120, 131, 139],
        [178, 180, 178, ...,  92, 108, 121],
        [177, 177, 175, ...,  69,  85,  98],
        ...,
        [153, 156, 158, ..., 188, 189, 189],
        [152, 154, 157, ..., 188, 189, 190],
        [150, 153, 156, ..., 189, 190, 191]], dtype=uint8),
 0]

In [13]:
#shuffling the data
np.random.shuffle(data)

In [14]:
#separate images and labels
x = []
y = []
for image in data:
    x.append(image[0])
    y.append(image[1])

# converting x & y to numpy array as they are list
x = np.array(x)
y = np.array(y)

In [19]:
np.unique(y, return_counts=True)

(array([0, 1], dtype=int64), array([5058, 4162], dtype=int64))

In [20]:
x = x.reshape(-1, 100, 100, 1)

In [21]:
#splitting given data into training and validation dataset
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(x,y,test_size=0.2, random_state = 42)

In [26]:
X_train = X_train/255
X_val = X_val/255

In [27]:
# Defining the model
cnn = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(100, 100, 1)),
tf.keras.layers.MaxPooling2D((2, 2)),

tf.keras.layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
tf.keras.layers.MaxPooling2D((2, 2)),

# tf.keras.layers.Flatten(input_shape=(100, 100, 1)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid')
])

In [28]:
cnn.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])

In [64]:
cnn.fit(X_train, y_train, epochs=5, batch_size=10)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x2cdd51c0b80>

In [65]:
cnn.evaluate(X_val, y_val)



[0.6598443388938904, 0.9034706950187683]

In [31]:
#loading test data
test_labels = pd.read_csv("C:/Users/Dell/content/eye_gender_data/Testing_set.csv")
test_labels.head()

Unnamed: 0,filename
0,Image_1.jpg
1,Image_2.jpg
2,Image_3.jpg
3,Image_4.jpg
4,Image_5.jpg


In [32]:
#file path of test images
file_paths = [[fname, 'C:/Users/Dell/content/eye_gender_data/test/' + fname] for fname in test_labels['filename']]

In [33]:
#verifying if number of test images is same as test_labels
if len(test_labels) == len(file_paths):
    print('Number of image names i.e. ', len(test_labels), 'matches the number of file paths i.e. ', len(file_paths))
else:
    print('Number of image names does not match the number of filepaths')

Number of image names i.e.  2305 matches the number of file paths i.e.  2305


In [34]:
#file paths to dataframe
test_images = pd.DataFrame(file_paths, columns=['filename', 'filepaths'])
test_images.head()

Unnamed: 0,filename,filepaths
0,Image_1.jpg,C:/Users/Dell/content/eye_gender_data/test/Ima...
1,Image_2.jpg,C:/Users/Dell/content/eye_gender_data/test/Ima...
2,Image_3.jpg,C:/Users/Dell/content/eye_gender_data/test/Ima...
3,Image_4.jpg,C:/Users/Dell/content/eye_gender_data/test/Ima...
4,Image_5.jpg,C:/Users/Dell/content/eye_gender_data/test/Ima...


In [35]:
#preprocessing test data
testing_data = [] 
image_size = 100 
for i in range(len(test_images)):
    img_array = cv2.imread(test_images['filepaths'][i], cv2.IMREAD_GRAYSCALE) 
    new_img_array = cv2.resize(img_array, (image_size, image_size)) 
    testing_data.append(new_img_array)

In [36]:
testing_data = np.array(testing_data)

In [37]:
testing_data = testing_data.reshape(-1, 100, 100, 1)

In [49]:
testing_data = testing_data/255

In [66]:
pred = cnn.predict(testing_data)

In [67]:
pred[8]

array([0.05805859], dtype=float32)

In [71]:
prediction = []
for value in pred:
    if value < 0.75:
        prediction.append(0)
    else:
        prediction.append(1)

In [72]:
prediction[5]

0

In [73]:
res = pd.DataFrame({'filename': test_images['filename'], 'label': prediction})
res.to_csv("sub_morph4.csv", index = False)

In [None]:
#explicitly converted the obtained csv file into submission accepted format