In [10]:
import numpy as np
import pandas as pd
import os
import cv2
import random
import tensorflow as tf
from tensorflow import keras
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

# Set a random seed for reproducibility
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

# Load and shuffle the labeled dataset
data = pd.read_csv('/kaggle/input/labeled-data12/Labeled-set - Sheet1 (12).csv')
data = shuffle(data)

# Define image dimensions
img_width, img_height = 128, 128

# Load and preprocess the images
X = []
y = []

for _, row in data.iterrows():
    image_id, label = row['id'], row['class']
    img_path = os.path.join('/kaggle/input/hat-or-not-hat-that-is-the-question/test_set/test_set', image_id)
    img = cv2.imread(img_path)
    img = cv2.resize(img, (img_width, img_height))
    img = img / 255.0  # Normalize pixel values to [0, 1]
    X.append(img)
    y.append(label)

X = np.array(X)
y = np.array(y)

# Split the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a CNN model
model = keras.Sequential([
    keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(img_width, img_height, 3)),
    keras.layers.MaxPooling2D((2, 2)),
    keras.layers.Conv2D(64, (3, 3), activation='relu'),
    keras.layers.MaxPooling2D((2, 2)),
    keras.layers.Flatten(),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))

# Save the model weights
model.save_weights('hat_classification_model.h5')

# Make predictions on test images (similar to previous code)
# Load and preprocess the test images
test_image_folder = '/kaggle/input/hat-or-not-hat-that-is-the-question/test_set/test_set'
test_predictions = []

for image_id in os.listdir(test_image_folder):
    img_path = os.path.join(test_image_folder, image_id)
    img = cv2.imread(img_path)
    img = cv2.resize(img, (img_width, img_height))
    img = img / 255.0  # Normalize pixel values to [0, 1]
    img = np.expand_dims(img, axis=0)

    # Make predictions
    prediction = model.predict(img)
    if prediction >= 0.5:
        test_predictions.append("Hat")
    else:
        test_predictions.append("No Hat")

# Create a DataFrame for the test predictions
test_ids = [os.path.splitext(image_id)[0] for image_id in os.listdir(test_image_folder)]
test_data = {'id': test_ids, 'class': test_predictions}
test_df = pd.DataFrame(test_data)

# Save the test predictions to a CSV file
test_df.to_csv('test_predictions.csv', index=False)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
