In [9]:
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models

# Set directories
base_dir = 'wec-intelligence-sig-2024-recruitment-task-cv/train_images'
train_dir = os.path.join(base_dir, 'train_images')

# Image parameters
img_height, img_width = 250, 250  # Change according to your image size
batch_size = 32

# Data generators for training and validation with split ratio of 0.2
train_datagen = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    rescale=1.0 / 255.0,
    validation_split=0.2  # Use 20% for validation
)

# Create train generator
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary',  # Binary classification
    subset='training',     # Set as training data
    seed=42               # Seed for reproducibility
)

# Create validation generator
validation_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary',  # Binary classification
    subset='validation',   # Set as validation data
    seed=42               # Seed for reproducibility
)

Found 1280 images belonging to 2 classes.
Found 320 images belonging to 2 classes.


In [10]:
# Build CNN model
model = models.Sequential()

# Convolutional layer + MaxPooling layer
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, 3)))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))

model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))

model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))

# Flatten the output
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))  # Binary output

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.summary()

In [11]:
# Train the model
epochs = 10
history = model.fit(
    train_generator,
    epochs=epochs,
    validation_data=validation_generator
)

Epoch 1/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m126s[0m 3s/step - accuracy: 0.4961 - loss: 0.7796 - val_accuracy: 0.5031 - val_loss: 0.6897
Epoch 2/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 3s/step - accuracy: 0.5225 - loss: 0.6921 - val_accuracy: 0.6344 - val_loss: 0.6506
Epoch 3/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 3s/step - accuracy: 0.6206 - loss: 0.6571 - val_accuracy: 0.6094 - val_loss: 0.6831
Epoch 4/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m120s[0m 3s/step - accuracy: 0.5788 - loss: 0.6691 - val_accuracy: 0.6625 - val_loss: 0.6521
Epoch 5/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m121s[0m 3s/step - accuracy: 0.6493 - loss: 0.6430 - val_accuracy: 0.7188 - val_loss: 0.5487
Epoch 6/10
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 2s/step - accuracy: 0.6922 - loss: 0.5603 - val_accuracy: 0.7344 - val_loss: 0.5965
Epoch 7/10
[1m40/40[0m [32m━━━━━

In [12]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array

# Set the test directory path
test_dir = 'wec-intelligence-sig-2024-recruitment-task-cv/test_images/test_images'  # Change this to your actual test directory path

# Image parameters
img_height, img_width = 250, 250  # Same as used in training
batch_size = 32

# Function to load and preprocess test images
def load_and_preprocess_image(image_path):
    img = load_img(image_path, target_size=(img_height, img_width))  # Load image
    img_array = img_to_array(img)  # Convert to array
    img_array = img_array / 255.0  # Rescale to [0, 1]
    return img_array

# Prepare the DataFrame to hold image IDs and predictions
results = {'ID': [], 'TARGET': []}

# Iterate through test images in the test directory
for filename in os.listdir(test_dir):
    if filename.endswith('.jpg') or filename.endswith('.png'):  # Adjust extensions as needed
        # Extract image ID from filename (assuming format 'test_image_imageid')
        image_id = filename.split('_')[-1].split('.')[0]  # Extract the image ID part
        image_path = os.path.join(test_dir, filename)

        # Load and preprocess the image
        img_array = load_and_preprocess_image(image_path)
        img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension

        # Make prediction
        prediction = model.predict(img_array)
        predicted_class = 1 if prediction[0][0] > 0.5 else 0  # Threshold for binary classification

        # Append the results
        results['ID'].append(image_id)
        results['TARGET'].append(predicted_class)

# Convert results into a DataFrame
results_df = pd.DataFrame(results)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 202ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4

In [16]:
#Since fake train directory is first while using flow from directory it is labeled as 0 and real is labeled as 1 so I swap it to get the correct labels.
results_df['TARGET']=1-results_df['TARGET']

In [17]:
results_df['TARGET'].value_counts()

TARGET
1    279
0    121
Name: count, dtype: int64

In [19]:
results_df

Unnamed: 0,ID,TARGET
0,1,1
1,10,1
2,100,0
3,101,1
4,102,1
...,...,...
395,95,1
396,96,1
397,97,0
398,98,1


In [20]:
results_df.to_csv('output_tf1.csv', index=False)