## <b>Simple CNN<b>

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Paths to your data
train_dir = r"C:\Users\ASUS\.cache\kagglehub\datasets\samuelcortinhas\cats-and-dogs-image-classification\versions\4\train"
test_dir  = r"C:\Users\ASUS\.cache\kagglehub\datasets\samuelcortinhas\cats-and-dogs-image-classification\versions\4\test"

# Image data generators
train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,  # split train into train + validation
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

test_datagen = ImageDataGenerator(rescale=1./255)

# Generators
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary',
    subset='training'
)

validation_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary',
    subset='validation'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary',
    shuffle=False
)

# Build CNN model
model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(150,150,3)),
    MaxPooling2D(2,2),
    
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    
    Conv2D(128, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # binary classification
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // validation_generator.batch_size,
    epochs=15
)

# Evaluate on test set
test_loss, test_acc = model.evaluate(test_generator)
print(f"Test Accuracy: {test_acc*100:.2f}%")

## <b>CNN + Transformer Hybrid<b>

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.optimizers import Adam
import os

# -------------------------------
# Paths
# -------------------------------
train_dir = r"C:\Users\ASUS\.cache\kagglehub\datasets\samuelcortinhas\cats-and-dogs-image-classification\versions\4\train"
test_dir  = r"C:\Users\ASUS\.cache\kagglehub\datasets\samuelcortinhas\cats-and-dogs-image-classification\versions\4\test"

# -------------------------------
# Data Augmentation
# -------------------------------
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2
)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary',
    subset='training'
)

validation_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary',
    subset='validation'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='binary',
    shuffle=False
)

# -------------------------------
# Transfer Learning: MobileNetV2
# -------------------------------
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(150,150,3))
base_model.trainable = False  # Freeze pre-trained layers

x = GlobalAveragePooling2D()(base_model.output)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=output)

# Compile model
model.compile(optimizer=Adam(learning_rate=1e-4),
              loss='binary_crossentropy',
              metrics=['accuracy'])

# -------------------------------
# Train the model
# -------------------------------
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // validation_generator.batch_size,
    epochs=15
)

# -------------------------------
# Evaluate on test set
# -------------------------------
test_loss, test_acc = model.evaluate(test_generator)
print(f"Test Accuracy: {test_acc*100:.2f}%")

# -------------------------------
# Optional: Save model
# -------------------------------
model.save("cats_vs_dogs_mobilenetv2.h5")

## <b>Vision Transformer (ViT)<b>

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

# -------------------------------
# Paths
# -------------------------------
train_dir = r"C:\Users\ASUS\.cache\kagglehub\datasets\samuelcortinhas\cats-and-dogs-image-classification\versions\4\train"
test_dir  = r"C:\Users\ASUS\.cache\kagglehub\datasets\samuelcortinhas\cats-and-dogs-image-classification\versions\4\test"

# -------------------------------
# Data Augmentation
# -------------------------------
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2
)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),  # ViT usually expects 224x224
    batch_size=16,
    class_mode='binary',
    subset='training'
)

validation_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=16,
    class_mode='binary',
    subset='validation'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(224, 224),
    batch_size=16,
    class_mode='binary',
    shuffle=False
)

# -------------------------------
# Vision Transformer Model
# -------------------------------
# Load pre-trained ViT from keras.applications
# Note: This requires TensorFlow >= 2.12
vit_base = tf.keras.applications.VisionTransformer(
    include_top=False,
    weights='imagenet21k',  # pre-trained on ImageNet-21k
    input_shape=(224,224,3),
    include_preprocessing=False
)

vit_base.trainable = False  # Freeze base for transfer learning

# Add custom classification head
x = tf.keras.layers.GlobalAveragePooling2D()(vit_base.output)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(1, activation='sigmoid')(x)

model = Model(inputs=vit_base.input, outputs=output)

# Compile model
model.compile(
    optimizer=Adam(learning_rate=1e-4),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# -------------------------------
# Train the model
# -------------------------------
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // validation_generator.batch_size,
    epochs=15
)

# -------------------------------
# Evaluate on test set
# -------------------------------
test_loss, test_acc = model.evaluate(test_generator)
print(f"Test Accuracy: {test_acc*100:.2f}%")

# -------------------------------
# Save model
# -------------------------------
model.save("cats_vs_dogs_vit.h5")

## <b>Self-Supervised Learning (SSL) CNN/ViT<b>

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam

# -------------------------------
# Paths
# -------------------------------
train_dir = r"C:\Users\ASUS\.cache\kagglehub\datasets\samuelcortinhas\cats-and-dogs-image-classification\versions\4\train"
test_dir  = r"C:\Users\ASUS\.cache\kagglehub\datasets\samuelcortinhas\cats-and-dogs-image-classification\versions\4\test"

# -------------------------------
# Data Augmentation
# -------------------------------
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2
)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224,224),
    batch_size=16,
    class_mode='binary',
    subset='training'
)

validation_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224,224),
    batch_size=16,
    class_mode='binary',
    subset='validation'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(224,224),
    batch_size=16,
    class_mode='binary',
    shuffle=False
)

# -------------------------------
# Load Pre-trained Vision Transformer
# -------------------------------
# Requires TensorFlow >= 2.12
vit_base = tf.keras.applications.VisionTransformer(
    include_top=False,
    weights='imagenet21k',
    input_shape=(224,224,3)
)

vit_base.trainable = False  # Freeze base for initial training

# -------------------------------
# Add Custom Head
# -------------------------------
x = GlobalAveragePooling2D()(vit_base.output)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(1, activation='sigmoid')(x)

model = Model(inputs=vit_base.input, outputs=output)

# -------------------------------
# Compile and Train Top Layers
# -------------------------------
model.compile(
    optimizer=Adam(learning_rate=1e-4),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

print("Training top layers...")
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // validation_generator.batch_size,
    epochs=5  # Train only top layers first
)

# -------------------------------
# Fine-tune the Base Model
# -------------------------------
# Unfreeze last few layers of ViT
vit_base.trainable = True
for layer in vit_base.layers[:-50]:  # Freeze all but last 50 layers
    layer.trainable = False

# Recompile with lower learning rate for fine-tuning
model.compile(
    optimizer=Adam(learning_rate=1e-5),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

print("Fine-tuning base model...")
history_fine = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // validation_generator.batch_size,
    epochs=10  # Fine-tune for more epochs
)

# -------------------------------
# Evaluate on Test Set
# -------------------------------
test_loss, test_acc = model.evaluate(test_generator)
print(f"Test Accuracy: {test_acc*100:.2f}%")

# -------------------------------
# Save Fine-tuned Model
# -------------------------------
model.save("cats_vs_dogs_vit_finetuned.h5")

## <b>Graph-based Methods<b>

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Input, 
    GlobalAveragePooling2D, Reshape, LayerNormalization, MultiHeadAttention, Add
)
from tensorflow.keras.optimizers import Adam

# -------------------------------
# Paths
# -------------------------------
train_dir = r"C:\Users\ASUS\.cache\kagglehub\datasets\samuelcortinhas\cats-and-dogs-image-classification\versions\4\train"
test_dir  = r"C:\Users\ASUS\.cache\kagglehub\datasets\samuelcortinhas\cats-and-dogs-image-classification\versions\4\test"

# -------------------------------
# Data Augmentation
# -------------------------------
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2
)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(128,128),  # Smaller size for CNN+Transformer
    batch_size=16,
    class_mode='binary',
    subset='training'
)

validation_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(128,128),
    batch_size=16,
    class_mode='binary',
    subset='validation'
)

test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(128,128),
    batch_size=16,
    class_mode='binary',
    shuffle=False
)

# -------------------------------
# CNN + Transformer Hybrid Model
# -------------------------------
def cnn_transformer(input_shape=(128,128,3), num_heads=4, ff_dim=128, num_classes=1):
    inputs = Input(shape=input_shape)

    # --- CNN feature extractor ---
    x = Conv2D(32, (3,3), activation='relu', padding='same')(inputs)
    x = MaxPooling2D((2,2))(x)
    x = Conv2D(64, (3,3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2,2))(x)
    x = Conv2D(128, (3,3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2,2))(x)  # Shape: (16,16,128)
    
    # --- Prepare for Transformer ---
    shape = x.shape
    x = Reshape((shape[1]*shape[2], shape[3]))(x)  # Flatten to (tokens, channels)
    
    # --- Transformer Encoder Block ---
    # Multi-head self-attention
    attn_output = MultiHeadAttention(num_heads=num_heads, key_dim=ff_dim)(x, x)
    x = Add()([x, attn_output])
    x = LayerNormalization()(x)
    
    # Feed-forward
    ff = Dense(ff_dim, activation='relu')(x)
    ff = Dense(shape[3])(ff)
    x = Add()([x, ff])
    x = LayerNormalization()(x)

    # --- Classification Head ---
    x = GlobalAveragePooling2D()(Reshape((shape[1], shape[2], shape[3]))(x))
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    outputs = Dense(num_classes, activation='sigmoid')(x)

    model = Model(inputs, outputs)
    return model

model = cnn_transformer()

# Compile model
model.compile(
    optimizer=Adam(learning_rate=1e-4),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# -------------------------------
# Train the model
# -------------------------------
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // validation_generator.batch_size,
    epochs=20
)

# -------------------------------
# Evaluate on Test Set
# -------------------------------
test_loss, test_acc = model.evaluate(test_generator)
print(f"Test Accuracy: {test_acc*100:.2f}%")

# -------------------------------
# Save Model
# -------------------------------
model.save("cats_vs_dogs_cnn_transformer.h5")

## <b>Capsule Networks (CapsNet)<b>

In [None]:
import os
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow_addons as tfa

# -------------------------------
# Paths & parameters
# -------------------------------
train_dir = r"C:\Users\ASUS\.cache\kagglehub\datasets\samuelcortinhas\cats-and-dogs-image-classification\versions\4\train"
test_dir  = r"C:\Users\ASUS\.cache\kagglehub\datasets\samuelcortinhas\cats-and-dogs-image-classification\versions\4\test"

IMG_SIZE = 128
BATCH_SIZE = 16
EPOCHS_SSL = 10
EPOCHS_FINETUNE = 15

# -------------------------------
# Self-Supervised Data Augmentation
# -------------------------------
def augment(image):
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_brightness(image, 0.3)
    image = tf.image.random_contrast(image, 0.8, 1.2)
    image = tf.image.random_crop(image, size=[IMG_SIZE, IMG_SIZE, 3])
    image = tf.cast(image, tf.float32) / 255.0
    return image

def preprocess(image_path, label):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [IMG_SIZE, IMG_SIZE])
    return augment(image), augment(image)

# Create dataset for SSL
image_paths = []
for class_dir in os.listdir(train_dir):
    class_path = os.path.join(train_dir, class_dir)
    for fname in os.listdir(class_path):
        if fname.lower().endswith(('.jpg','.png')):
            image_paths.append(os.path.join(class_path, fname))

labels = [0]*len(image_paths)
dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels))
dataset = dataset.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
dataset = dataset.shuffle(500).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

# -------------------------------
# CNN + Transformer Encoder
# -------------------------------
def get_encoder():
    inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
    # CNN backbone
    x = layers.Conv2D(32, (3,3), activation='relu', padding='same')(inputs)
    x = layers.MaxPooling2D((2,2))(x)
    x = layers.Conv2D(64, (3,3), activation='relu', padding='same')(x)
    x = layers.MaxPooling2D((2,2))(x)
    x = layers.Conv2D(128, (3,3), activation='relu', padding='same')(x)
    x = layers.MaxPooling2D((2,2))(x)  # Shape (16,16,128)
    
    # Flatten for Transformer
    shape = x.shape
    x = layers.Reshape((shape[1]*shape[2], shape[3]))(x)
    
    # Transformer Encoder
    attn = layers.MultiHeadAttention(num_heads=4, key_dim=128)(x, x)
    x = layers.Add()([x, attn])
    x = layers.LayerNormalization()(x)
    
    ff = layers.Dense(128, activation='relu')(x)
    ff = layers.Dense(shape[3])(ff)
    x = layers.Add()([x, ff])
    x = layers.LayerNormalization()(x)
    
    # Global pooling and projection head for SSL
    x = layers.Reshape((shape[1], shape[2], shape[3]))(x)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(128, activation='relu')(x)
    outputs = layers.Dense(64)(x)
    
    return models.Model(inputs, outputs, name="encoder")

encoder = get_encoder()

# -------------------------------
# Contrastive Loss (NT-Xent)
# -------------------------------
def nt_xent_loss(z_i, z_j, temperature=0.5):
    z_i = tf.math.l2_normalize(z_i, axis=1)
    z_j = tf.math.l2_normalize(z_j, axis=1)
    return tfa.losses.npairs_loss(tf.zeros_like(z_i[:,0]), z_i, z_j)

# -------------------------------
# Self-Supervised Pretraining
# -------------------------------
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)

for epoch in range(EPOCHS_SSL):
    for batch in dataset:
        x1, x2 = batch
        with tf.GradientTape() as tape:
            z1 = encoder(x1, training=True)
            z2 = encoder(x2, training=True)
            loss = nt_xent_loss(z1, z2)
        grads = tape.gradient(loss, encoder.trainable_variables)
        optimizer.apply_gradients(zip(grads, encoder.trainable_variables))
    print(f"[SSL] Epoch {epoch+1}, Loss: {loss.numpy():.4f}")

# -------------------------------
# Fine-tuning on Labeled Data
# -------------------------------
# Add classification head
inputs = encoder.input
x = encoder.output
x = layers.Dense(256, activation='relu')(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1, activation='sigmoid')(x)
ssl_model = models.Model(inputs, outputs)

ssl_model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-4),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Data generators
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
train_gen = datagen.flow_from_directory(
    train_dir, target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE, class_mode='binary', subset='training'
)
val_gen = datagen.flow_from_directory(
    train_dir, target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE, class_mode='binary', subset='validation'
)
test_gen = datagen.flow_from_directory(
    test_dir, target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE, class_mode='binary', shuffle=False
)

# Fine-tune
ssl_model.fit(train_gen, validation_data=val_gen, epochs=EPOCHS_FINETUNE)

# -------------------------------
# Evaluate on Test Set
# -------------------------------
test_loss, test_acc = ssl_model.evaluate(test_gen)
print(f"Test Accuracy: {test_acc*100:.2f}%")

# -------------------------------
# Save model
# -------------------------------
ssl_model.save("cats_dogs_cnn_transformer_ssl.h5")

## <b>NAS / Attention-Augmented CNNs<b>

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from spektral.layers import GCNConv
from spektral.data import Graph
from spektral.models import GCN
from sklearn.neighbors import kneighbors_graph

# -------------------------------
# Parameters
# -------------------------------
IMG_SIZE = 128
BATCH_SIZE = 16
K = 5  # Number of neighbors in graph

train_dir = r"C:\Users\ASUS\.cache\kagglehub\datasets\samuelcortinhas\cats-and-dogs-image-classification\versions\4\train"

# -------------------------------
# Step 1: Extract CNN Features
# -------------------------------
datagen = ImageDataGenerator(rescale=1./255)
train_gen = datagen.flow_from_directory(
    train_dir, target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE, class_mode='binary', shuffle=False
)

cnn = ResNet50(weights='imagenet', include_top=False, pooling='avg', input_shape=(IMG_SIZE,IMG_SIZE,3))

features = cnn.predict(train_gen)
labels = train_gen.classes

print("Feature shape:", features.shape)  # (num_samples, 2048)

# -------------------------------
# Step 2: Build Graph (k-NN)
# -------------------------------
A = kneighbors_graph(features, n_neighbors=K, mode='connectivity', include_self=True)
A = A.toarray().astype('float32')

# -------------------------------
# Step 3: GCN for Node Classification
# -------------------------------
X = features.astype('float32')
y = tf.keras.utils.to_categorical(labels, 2)

# Simple GCN model
inputs = layers.Input(shape=X.shape[1])
A_input = layers.Input(shape=(X.shape[0],), sparse=False)

x = GCNConv(64, activation='relu')([inputs, A_input])
x = GCNConv(32, activation='relu')([x, A_input])
outputs = GCNConv(2, activation='softmax')([x, A_input])

gcn_model = tf.keras.Model([inputs, A_input], outputs)
gcn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# -------------------------------
# Step 4: Train
# -------------------------------
gcn_model.fit([X, A], y, epochs=20, batch_size=X.shape[0], verbose=1)

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models, backend as K
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np

# -------------------------------
# Parameters
# -------------------------------
IMG_SIZE = 128
BATCH_SIZE = 16
EPOCHS = 20

train_dir = r"C:\Users\ASUS\.cache\kagglehub\datasets\samuelcortinhas\cats-and-dogs-image-classification\versions\4\train"
test_dir = r"C:\Users\ASUS\.cache\kagglehub\datasets\samuelcortinhas\cats-and-dogs-image-classification\versions\4\test"

# -------------------------------
# Data Generators
# -------------------------------
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
train_gen = datagen.flow_from_directory(train_dir, target_size=(IMG_SIZE, IMG_SIZE),
                                        batch_size=BATCH_SIZE, class_mode='binary', subset='training')
val_gen = datagen.flow_from_directory(train_dir, target_size=(IMG_SIZE, IMG_SIZE),
                                      batch_size=BATCH_SIZE, class_mode='binary', subset='validation')
test_gen = datagen.flow_from_directory(test_dir, target_size=(IMG_SIZE, IMG_SIZE),
                                       batch_size=BATCH_SIZE, class_mode='binary', shuffle=False)

# -------------------------------
# Capsule Network Layers
# -------------------------------
def squash(vectors, axis=-1):
    s_squared_norm = K.sum(K.square(vectors), axis, keepdims=True)
    scale = s_squared_norm / (1 + s_squared_norm) / K.sqrt(s_squared_norm + K.epsilon())
    return scale * vectors

class CapsuleLayer(layers.Layer):
    def __init__(self, num_capsules, dim_capsule, routings=3, **kwargs):
        super(CapsuleLayer, self).__init__(**kwargs)
        self.num_capsules = num_capsules
        self.dim_capsule = dim_capsule
        self.routings = routings

    def build(self, input_shape):
        self.W = self.add_weight(shape=[input_shape[1], self.num_capsules * self.dim_capsule],
                                 initializer='glorot_uniform', trainable=True)
        super(CapsuleLayer, self).build(input_shape)

    def call(self, inputs):
        u_hat = K.dot(inputs, self.W)
        u_hat = K.reshape(u_hat, (-1, inputs.shape[1], self.num_capsules, self.dim_capsule))
        b = tf.zeros_like(u_hat[..., 0])
        for i in range(self.routings):
            c = tf.nn.softmax(b, axis=2)
            s = tf.reduce_sum(c[..., None] * u_hat, axis=1)
            v = squash(s)
            if i < self.routings - 1:
                b += tf.reduce_sum(u_hat * tf.expand_dims(v, 1), axis=-1)
        return v

# -------------------------------
# Build the Model
# -------------------------------
inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
x = layers.Conv2D(64, kernel_size=5, strides=1, activation='relu')(inputs)
x = layers.Conv2D(128, kernel_size=5, strides=1, activation='relu')(x)
x = layers.Reshape((-1, 8))(x)  # Flatten into capsules
caps = CapsuleLayer(num_capsules=10, dim_capsule=16, routings=3)(x)
out = layers.Lambda(lambda z: K.sqrt(K.sum(K.square(z), 2)))(caps)
out = layers.Dense(1, activation='sigmoid')(out)

model = models.Model(inputs, out)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

# -------------------------------
# Train the Model
# -------------------------------
model.fit(train_gen, validation_data=val_gen, epochs=EPOCHS)

# -------------------------------
# Evaluate
# -------------------------------
test_loss, test_acc = model.evaluate(test_gen)
print(f"Test Accuracy: {test_acc*100:.2f}%")

# -------------------------------
# Save Model
# -------------------------------
model.save("cats_dogs_capsnet.h5")

## <b>Pretrained CNN / ViT Fine-Tuning<b>

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# -------------------------------
# Parameters
# -------------------------------
IMG_SIZE = 128
BATCH_SIZE = 16
EPOCHS = 20

train_dir = r"C:\Users\ASUS\.cache\kagglehub\datasets\samuelcortinhas\cats-and-dogs-image-classification\versions\4\train"
test_dir = r"C:\Users\ASUS\.cache\kagglehub\datasets\samuelcortinhas\cats-and-dogs-image-classification\versions\4\test"

# -------------------------------
# Data
# -------------------------------
datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
train_gen = datagen.flow_from_directory(train_dir, target_size=(IMG_SIZE, IMG_SIZE),
                                        batch_size=BATCH_SIZE, class_mode='binary', subset='training')
val_gen = datagen.flow_from_directory(train_dir, target_size=(IMG_SIZE, IMG_SIZE),
                                      batch_size=BATCH_SIZE, class_mode='binary', subset='validation')
test_gen = datagen.flow_from_directory(test_dir, target_size=(IMG_SIZE, IMG_SIZE),
                                       batch_size=BATCH_SIZE, class_mode='binary', shuffle=False)

# -------------------------------
# Attention-Augmented Conv Block
# -------------------------------
def attention_augmented_conv(x, filters, kernel_size, num_heads=4):
    # Convolution
    conv_out = layers.Conv2D(filters, kernel_size, padding='same', activation='relu')(x)
    
    # Flatten for attention
    b, h, w, c = conv_out.shape
    flatten = layers.Reshape((h*w, c))(conv_out)
    
    # Multi-head Self Attention
    attn_out = layers.MultiHeadAttention(num_heads=num_heads, key_dim=c)(flatten, flatten)
    
    # Reshape back to image
    attn_out = layers.Reshape((h, w, c))(attn_out)
    
    # Add residual connection
    out = layers.Add()([conv_out, attn_out])
    return out

# -------------------------------
# Model
# -------------------------------
inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
x = attention_augmented_conv(inputs, 32, 3)
x = layers.MaxPooling2D()(x)
x = attention_augmented_conv(x, 64, 3)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(128, 3, activation='relu', padding='same')(x)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(64, activation='relu')(x)
outputs = layers.Dense(1, activation='sigmoid')(x)

model = models.Model(inputs, outputs)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

# -------------------------------
# Train
# -------------------------------
model.fit(train_gen, validation_data=val_gen, epochs=EPOCHS)

# -------------------------------
# Evaluate
# -------------------------------
test_loss, test_acc = model.evaluate(test_gen)
print(f"Test Accuracy: {test_acc*100:.2f}%")

# -------------------------------
# Save Model
# -------------------------------
model.save("cats_dogs_attention_augmented.h5")

| **Model**                                  | **Architecture / Type**                                         | **Strengths**                                                                         | **Weaknesses**                                                          | **Use Case**                                                     |
| ------------------------------------------ | --------------------------------------------------------------- | ------------------------------------------------------------------------------------- | ----------------------------------------------------------------------- | ---------------------------------------------------------------- |
| **Simple CNN**                             | Sequential CNN (Conv + Pool + FC)                               | Easy to implement, fast to train on small datasets                                    | Limited feature learning, may underfit complex images                   | Basic image classification tasks, educational purposes           |
| **CNN + Transformer Hybrid**               | CNN backbone + Vision Transformer layers                        | Captures both local (CNN) and global (Transformer) features, better accuracy          | More complex, heavier, longer training                                  | Medium-scale image classification with structured patterns       |
| **Vision Transformer (ViT)**               | Pure Transformer on image patches                               | Captures long-range dependencies, scales well with large datasets                     | Needs large datasets for good performance, computationally heavy        | Large-scale image classification, fine-grained image recognition |
| **Self-Supervised Learning (SSL) CNN/ViT** | Pretext task (e.g., rotation prediction, contrastive learning)  | Learns powerful representations without labels, can improve downstream classification | Requires careful pretext design, slower training                        | Pretraining on unlabeled data, semi-supervised classification    |
| **Graph-based Methods**                    | Images as nodes/features + GNN                                  | Captures relationships between samples, can incorporate structured info               | Less common for raw images, requires graph construction                 | Image classification with relational or structural data          |
| **Capsule Networks (CapsNet)**             | Capsules + dynamic routing                                      | Preserves spatial hierarchies, robust to affine transformations                       | Complex, slower to train, less mature                                   | Small-scale datasets with complex spatial features               |
| **NAS / Attention-Augmented CNNs**         | CNN optimized via Neural Architecture Search + attention layers | Potentially high accuracy, automatic architecture design                              | Very computationally expensive                                          | High-performance image classification when compute is available  |
| **Pretrained CNN / ViT Fine-Tuning**       | ResNet, EfficientNet, or ViT pretrained on ImageNet             | High accuracy even on small datasets, fast convergence                                | Less flexible for completely new domains, may require domain adaptation | Transfer learning for medium-scale image classification          |