In [4]:
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models

base_dir = 'wec-intelligence-sig-2024-recruitment-task-cv/train_images'
train_dir = os.path.join(base_dir, 'train_images')

# Image parameters and Batch size
img_height, img_width = 250, 250
batch_size = 32

# Data generators with Augmentation details
# Validation split is kept as 0.2
train_datagen = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    rescale=1.0 / 255.0,
    validation_split=0.2 
)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary', 
    subset='training',   
    seed=42              
)

validation_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary', 
    subset='validation', 
    seed=42          
)

Found 1280 images belonging to 2 classes.
Found 320 images belonging to 2 classes.


In [7]:
from tensorflow.keras import initializers

# CNN Model Architecture
model = models.Sequential()

# Weight initialisation is set to he
# First tried without using any weight initialisation but it was randomly guessing and the val accuracy was hovering between 47 to 52.
# After it increased to 80 in one random training thought abt weight initialisations and tried he
model.add(layers.Conv2D(32, (3, 3), activation='relu', 
                        kernel_initializer=initializers.HeNormal(), 
                        input_shape=(img_height, img_width, 3)))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))

model.add(layers.Conv2D(64, (3, 3), activation='relu', 
                        kernel_initializer=initializers.HeNormal()))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))

model.add(layers.Conv2D(128, (3, 3), activation='relu', 
                        kernel_initializer=initializers.HeNormal()))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))

model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu', 
                       kernel_initializer=initializers.HeNormal()))
model.add(layers.Dense(1, activation='sigmoid'))

# Compilation
# Used adam at first but achieved better accuracy in RMSprop
model.compile(optimizer='RMSprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.summary()

In [27]:
# Training
# Has already been trained for abt 35 epochs before this.
# Kept running the train for 10 epoch per train to fine tune. Early Stopping could have been tried but since the val loss was fluctuating a lot in the beginning had doubts abt what patience to use so did it manually.
epochs = 5
history = model.fit(
    train_generator,
    epochs=epochs,
    validation_data=validation_generator
)

Epoch 1/5
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m106s[0m 3s/step - accuracy: 0.9462 - loss: 0.1567 - val_accuracy: 0.9281 - val_loss: 0.1529
Epoch 2/5
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 2s/step - accuracy: 0.9585 - loss: 0.1194 - val_accuracy: 0.9438 - val_loss: 0.1357
Epoch 3/5
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 2s/step - accuracy: 0.9375 - loss: 0.1777 - val_accuracy: 0.9563 - val_loss: 0.1336
Epoch 4/5
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 2s/step - accuracy: 0.9422 - loss: 0.1387 - val_accuracy: 0.9625 - val_loss: 0.1190
Epoch 5/5
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m102s[0m 2s/step - accuracy: 0.9592 - loss: 0.1352 - val_accuracy: 0.9656 - val_loss: 0.0885


In [28]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array

test_dir = 'wec-intelligence-sig-2024-recruitment-task-cv/test_images/test_images' 

# Image parameters
img_height, img_width = 250, 250
batch_size = 32

def load_and_preprocess_image(image_path):
    img = load_img(image_path, target_size=(img_height, img_width))
    img_array = img_to_array(img) 
    img_array = img_array / 255.0
    return img_array

results = {'ID': [], 'TARGET': []}

for filename in os.listdir(test_dir):
    if filename.endswith('.jpg') or filename.endswith('.png'):
        image_id = filename.split('_')[-1].split('.')[0] 
        image_path = os.path.join(test_dir, filename)

        img_array = load_and_preprocess_image(image_path)
        img_array = np.expand_dims(img_array, axis=0)

        prediction = model.predict(img_array)
        predicted_class = 1 if prediction[0][0] > 0.5 else 0

        results['ID'].append(image_id)
        results['TARGET'].append(predicted_class)

results_df = pd.DataFrame(results)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63

In [29]:
#Since fake train directory is first while using flow from directory it is labeled as 0 and real is labeled as 1 so I swap it to get the correct labels.
results_df['TARGET']=1-results_df['TARGET']

In [30]:
results_df['TARGET'].value_counts()

TARGET
1    240
0    160
Name: count, dtype: int64

In [31]:
results_df

Unnamed: 0,ID,TARGET
0,1,0
1,10,0
2,100,0
3,101,1
4,102,1
...,...,...
395,95,0
396,96,1
397,97,0
398,98,1


In [32]:
results_df.to_csv('output_tf2.csv', index=False)

In [26]:
# Save the model in HDF5 format
model.save('tf_model.h5')

