# Kaggle 4

## Imports

In [None]:
import os
import numpy as np
import pandas as pd

from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.preprocessing.image import ImageDataGenerator

## Pathway to Images

In [None]:
# Set the paths to your training and testing data
train_data_dir = 'images_train'
test_data_dir = 'images_test'

In [None]:
# Load the pre-trained model without the top (fully connected) layers
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(150, 150, 3))

In [None]:
# Freeze the layers from the pre-trained model
for layer in base_model.layers:
    layer.trainable = False

In [None]:
# Add your own classification layers on top
x = base_model.output
x = GlobalAveragePooling2D()(x)  # Global average pooling to reduce dimensionality
x = Dense(256, activation='relu')(x)  # Add a dense layer
predictions = Dense(1, activation='sigmoid')(x)  # Final classification layer

In [None]:
# Create a new model by combining the base model with your custom layers
model = Model(inputs=base_model.input, outputs=predictions)

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Define the data generator for training and testing
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary')

In [None]:
test_generator = test_datagen.flow_from_directory(
    test_data_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode=None,  # Set class_mode to None as labels are not available
    shuffle=False      # Disable shuffling to keep predictions in order
)

## Model

In [None]:
# Train the model using only the training data
model.fit(train_generator, epochs=10)

In [None]:
# # Create a Sequential model
# model = Sequential()

# # Add convolutional layers
# model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(img_width, img_height, 3)))
# model.add(MaxPooling2D((2, 2)))
# model.add(Conv2D(64, (3, 3), activation='relu'))
# model.add(MaxPooling2D((2, 2)))
# model.add(Conv2D(128, (3, 3), activation='relu'))
# model.add(MaxPooling2D((2, 2)))
# model.add(Conv2D(128, (3, 3), activation='relu'))
# model.add(MaxPooling2D((2, 2)))

In [None]:
# # Flatten the output for fully connected layers
# model.add(Flatten())

In [None]:
# # Add fully connected layers
# model.add(Dense(512, activation='relu'))
# model.add(Dense(1, activation='sigmoid'))

In [None]:
# # Compile the model
# model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

# # Compile the model with SGD optimizer
# #model.compile(optimizer=SGD(learning_rate=0.01, momentum=0.9), loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# # Train the model
# model.fit(train_generator, epochs=10)

## Test Predictions

In [None]:
# Make predictions on the test set
predictions = model.predict(test_generator)

In [None]:
# Get the filenames of the test images
test_filenames = test_generator.filenames

# Extract image IDs from filenames
image_ids = [int(filename.split('/')[1].split('.')[0]) for filename in test_filenames]

In [None]:
# Convert predictions to classes (1 for kitchen sink, 0 for bathroom sink)
predicted_classes = 1 - np.round(predictions).astype(int).reshape(-1)

## Create CSV

In [None]:
# Create a DataFrame for the predictions
df = pd.DataFrame({'id': image_ids, 'class': predicted_classes})

In [None]:
# Save predictions to a CSV file
df.to_csv('image_pretrained.csv', index=False)