In [3]:

base_dir = "/content/cats_vs_dogs_small"

import os
import random
import shutil
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.applications import VGG16

#STEP 1:
print("\nStep 1: Train from scratch with 1,000 training images")

train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')

train_datagen_s1 = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True
)
test_val_datagen_s1 = ImageDataGenerator(rescale=1./255)

train_gen_s1 = train_datagen_s1.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=20,
    class_mode='binary'
)
val_gen_s1 = test_val_datagen_s1.flow_from_directory(
    validation_dir,
    target_size=(150, 150),
    batch_size=20,
    class_mode='binary'
)
test_gen_s1 = test_val_datagen_s1.flow_from_directory(
    test_dir,
    target_size=(150, 150),
    batch_size=20,
    class_mode='binary'
)

model_s1 = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    layers.MaxPooling2D(2, 2),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(2, 2),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D(2, 2),
    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

model_s1.compile(
    loss='binary_crossentropy',
    optimizer=optimizers.Adam(learning_rate=1e-4),
    metrics=['accuracy']
)

history_s1 = model_s1.fit(
    train_gen_s1,
    steps_per_epoch=15,
    epochs=15,
    validation_data=val_gen_s1,
    validation_steps=15
)

test_loss_s1, test_acc_s1 = model_s1.evaluate(test_gen_s1)
print(f"Step 1 Test Accuracy: {test_acc_s1:.4f}")

#STEP 2
print("\nStep 2: Train from scratch with MORE training images")

train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')

train_datagen_s2 = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.3,
    horizontal_flip=True
)
test_val_datagen_s2 = ImageDataGenerator(rescale=1./255)

train_gen_s2 = train_datagen_s2.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=20,
    class_mode='binary'
)
val_gen_s2 = test_val_datagen_s2.flow_from_directory(
    validation_dir,
    target_size=(150, 150),
    batch_size=20,
    class_mode='binary'
)
test_gen_s2 = test_val_datagen_s2.flow_from_directory(
    test_dir,
    target_size=(150, 150),
    batch_size=20,
    class_mode='binary'
)

model_s2 = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    layers.MaxPooling2D(2, 2),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(2, 2),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D(2, 2),
    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

model_s2.compile(
    loss='binary_crossentropy',
    optimizer=optimizers.Adam(learning_rate=1e-4),
    metrics=['accuracy']
)

history_s2 = model_s2.fit(
    train_gen_s2,
    steps_per_epoch=25,
    epochs=20,
    validation_data=val_gen_s2,
    validation_steps=25
)

test_loss_s2, test_acc_s2 = model_s2.evaluate(test_gen_s2)
print(f"Step 2 Test Accuracy (larger training set): {test_acc_s2:.4f}")

#Step 3
import os
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.callbacks import EarlyStopping

base_dir = "/content/cats_vs_dogs_small"
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')

train_datagen_s3 = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.3,
    horizontal_flip=True
)
test_val_datagen_s3 = ImageDataGenerator(rescale=1./255)

train_gen_s3 = train_datagen_s3.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=20,
    class_mode='binary'
)
val_gen_s3 = test_val_datagen_s3.flow_from_directory(
    validation_dir,
    target_size=(150, 150),
    batch_size=20,
    class_mode='binary'
)
test_gen_s3 = test_val_datagen_s3.flow_from_directory(
    test_dir,
    target_size=(150, 150),
    batch_size=20,
    class_mode='binary'
)

model_s3 = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    layers.MaxPooling2D(2, 2),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D(2, 2),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D(2, 2),
    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

model_s3.compile(
    loss='binary_crossentropy',
    optimizer=optimizers.Adam(learning_rate=1e-4),
    metrics=['accuracy']
)

early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history_s3 = model_s3.fit(
    train_gen_s3,
    steps_per_epoch=25,
    epochs=20,
    validation_data=val_gen_s3,
    validation_steps=25,
    callbacks=[early_stop]
)

test_loss_s3, test_acc_s3 = model_s3.evaluate(test_gen_s3)
print(f"Step 3 Test Accuracy (optimized training size): {test_acc_s3:.4f}")

#STEP 4
import os
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.applications import VGG16

base_dir = "/content/cats_vs_dogs_small"
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')

train_datagen_s4 = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.3,
    horizontal_flip=True
)
test_val_datagen_s4 = ImageDataGenerator(rescale=1./255)

train_gen_s4 = train_datagen_s4.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=20,
    class_mode='binary'
)
val_gen_s4 = test_val_datagen_s4.flow_from_directory(
    validation_dir,
    target_size=(150, 150),
    batch_size=20,
    class_mode='binary'
)
test_gen_s4 = test_val_datagen_s4.flow_from_directory(
    test_dir,
    target_size=(150, 150),
    batch_size=20,
    class_mode='binary'
)

conv_base = VGG16(weights='imagenet', include_top=False, input_shape=(150, 150, 3))
conv_base.trainable = False

model_s4 = models.Sequential([
    conv_base,
    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

model_s4.compile(
    loss='binary_crossentropy',
    optimizer=optimizers.Adam(learning_rate=1e-4),
    metrics=['accuracy']
)

early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history_s4 = model_s4.fit(
    train_gen_s4,
    steps_per_epoch=25,
    epochs=20,
    validation_data=val_gen_s4,
    validation_steps=25,
    callbacks=[early_stop]
)

test_loss_s4, test_acc_s4 = model_s4.evaluate(test_gen_s4)
print(f"Step 4 Test Accuracy (Pretrained VGG16): {test_acc_s4:.4f}")

import matplotlib.pyplot as plt

def plot_all_steps(histories, titles):
    plt.figure(figsize=(16, 10))

    plt.subplot(2, 1, 1)
    for history, title in zip(histories, titles):
        plt.plot(history.history['accuracy'], label=f'{title} - Train')
        plt.plot(history.history['val_accuracy'], linestyle='--', label=f'{title} - Val')
    plt.title('Model Accuracy Across Steps')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.subplot(2, 1, 2)
    for history, title in zip(histories, titles):
        plt.plot(history.history['loss'], label=f'{title} - Train')
        plt.plot(history.history['val_loss'], linestyle='--', label=f'{title} - Val')
    plt.title('Model Loss Across Steps')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.tight_layout()
    plt.show()

plot_all_steps(
    histories=[history_s1, history_s2, history_s3, history_s4],
    titles=[
        "Step 1: Scratch (1,000 images)",
        "Step 2: Scratch (More images)",
        "Step 3: Scratch (Optimized size)",
        "Step 4: Pretrained VGG16"
    ]
)




Step 1: Train from scratch with 1,000 training images


FileNotFoundError: [Errno 2] No such file or directory: '/content/cats_vs_dogs_small/train'