# Basic CNN

In [3]:
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping

2025-04-13 20:47:45.302539: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-13 20:47:45.575480: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744595265.674944  126833 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744595265.702675  126833 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1744595265.921545  126833 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [None]:
# Set memory growth to avoid allocating all GPU memory at once
physical_devices = tf.config.list_physical_devices("GPU")
if physical_devices:
    print(f"Found {len(physical_devices)} GPU(s)")
    for device in physical_devices:
        tf.config.experimental.set_memory_growth(device, True)
        print(f"Memory growth set to True for {device}")
else:
    print("No GPU found, using CPU")

Found 1 GPU(s)
Memory growth set to True for PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


In [None]:
# Set random seed for reproducibility
tf.random.set_seed(42)

In [None]:
# Use mixed precision to reduce memory usage.
try:
    policy = tf.keras.mixed_precision.Policy("mixed_float16")
    tf.keras.mixed_precision.set_global_policy(policy)
    print("Using mixed precision policy")
except:
    print("Mixed precision not supported or enabled")

Using mixed precision policy


## Loading Imagenette Dataset.

Using Tensorflow Datasets to download the data. The 160px version was used for this project, which contains images resized to 160x160 pixels. This smaller dataset allows for faster training while still providing a meaningful image classification challenge.

In [None]:
print("Loading Imagenette dataset...")
dataset, info = tfds.load("imagenette/160px", as_supervised=True, with_info=True)

Loading Imagenette dataset...


I0000 00:00:1744505192.342440   43720 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5563 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


In [None]:
# After loading the dataset
print("Training samples:", info.splits["train"].num_examples)
print("Validation samples:", info.splits["validation"].num_examples)

Training samples: 12894
Validation samples: 500


In [None]:
num_classes = info.features["label"].num_classes
class_names = info.features["label"].names
train_ds = dataset["train"]
valid_ds = dataset["validation"]

## Preprocessing

In [None]:
# Target size for all images
TARGET_SIZE = (160, 160)

Preprocess the images.
- Resize to 160x160
- Normalize the pixel values.
- One hot encode the labels.

The created dataset is shuffled and mini batches are made out of it. Prefetch is used to prepare the next batch while the current is being processed.

In [None]:
# Preprocess the data - including resizing to handle varying dimensions
def preprocess_data(image, label):
    # Resize images to consistent dimensions
    image = tf.image.resize(image, TARGET_SIZE)
    image = tf.cast(image, tf.float32) / 255.0  # Normalize to [0,1]
    return image, tf.one_hot(label, num_classes)

In [None]:
BATCH_SIZE = 16
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.map(preprocess_data, num_parallel_calls=AUTOTUNE)
# train_ds = train_ds.cache().shuffle(1000).batch(BATCH_SIZE).prefetch(AUTOTUNE)
train_ds = train_ds.shuffle(1000).batch(BATCH_SIZE).prefetch(AUTOTUNE)

valid_ds = valid_ds.map(preprocess_data, num_parallel_calls=AUTOTUNE)
valid_ds = valid_ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)

## Building the Model

### Architecture Overview

The model consists of three convolutional blocks followed by two fully connected layers:

1. **Input Layer**: Accepts images of size 160×160 pixels with 3 color channels (RGB)

2. **Three Convolutional Blocks**, each containing:
   - A convolutional layer that extracts features
   - A max pooling layer that reduces spatial dimensions

3. **Classifier Head** with two fully connected layers:
   - A hidden layer with 256 neurons 
   - An output layer with `num_classes` neurons (using softmax activation for classification)

### Layer-by-Layer Breakdown

- **First Block**:
  - Conv2D: 32 filters of size 3×3, ReLU activation, same padding
  - MaxPooling2D: 2×2 pool size (reduces dimensions by half)
  - Output shape: 80×80×32

- **Second Block**:
  - Conv2D: 64 filters of size 3×3, ReLU activation, same padding
  - MaxPooling2D: 2×2 pool size
  - Output shape: 40×40×64

- **Third Block**:
  - Conv2D: 128 filters of size 3×3, ReLU activation, same padding
  - MaxPooling2D: 2×2 pool size
  - Output shape: 20×20×128

- **Flatten**: Converts the 3D feature maps to 1D vector
  - Output shape: 51,200 (20×20×128)

- **Dense Layer**: 256 neurons with ReLU activation
  - Output shape: 256

- **Output Layer**: `num_classes` neurons with softmax activation
  - Output shape: num_classes


In [None]:
def build_cnn_model():
    return models.Sequential([
        # First Convolutional Block
        layers.Conv2D(32, (3, 3), activation="relu", padding="same", input_shape=(160, 160, 3)),
        layers.MaxPooling2D((2, 2)),

        # Second Convolutional Block
        layers.Conv2D(64, (3, 3), activation="relu", padding="same"),
        layers.MaxPooling2D((2, 2)),

        # Third Convolutional Block
        layers.Conv2D(128, (3, 3), activation="relu", padding="same"),
        layers.MaxPooling2D((2, 2)),

        # Fully Connected Layers
        layers.Flatten(),
        layers.Dense(256, activation="relu"),
        layers.Dense(num_classes, activation="softmax"),
    ])

In [None]:
# Create and compile the model
print("Building and compiling the model...")
model = build_cnn_model()
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss="categorical_crossentropy",
    metrics=["accuracy"],
)

model.summary()

Building and compiling the model...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


```mermaid
graph TD
    Input[Input Image: 160×160×3] --> Conv1
    
    subgraph "Convolutional Block 1"
    Conv1[Conv2D: 32 filters, 3×3, ReLU] --> Pool1[MaxPooling2D: 2×2]
    end
    
    Pool1 --> Conv2
    
    subgraph "Convolutional Block 2"
    Conv2[Conv2D: 64 filters, 3×3, ReLU] --> Pool2[MaxPooling2D: 2×2]
    end
    
    Pool2 --> Conv3
    
    subgraph "Convolutional Block 3"
    Conv3[Conv2D: 128 filters, 3×3, ReLU] --> Pool3[MaxPooling2D: 2×2]
    end
    
    Pool3 --> Flat[Flatten: 20×20×128 → 51,200]
    Flat --> Dense1[Dense: 256 neurons, ReLU]
    Dense1 --> Output[Dense: num_classes, Softmax]
    
    style Input fill:#f9f9f9,stroke:#333,stroke-width:2px
    style Conv1 fill:#d9edf7,stroke:#31708f,stroke-width:1px
    style Pool1 fill:#d9edf7,stroke:#31708f,stroke-width:1px
    style Conv2 fill:#d9edf7,stroke:#31708f,stroke-width:1px
    style Pool2 fill:#d9edf7,stroke:#31708f,stroke-width:1px
    style Conv3 fill:#d9edf7,stroke:#31708f,stroke-width:1px
    style Pool3 fill:#d9edf7,stroke:#31708f,stroke-width:1px
    style Flat fill:#fcf8e3,stroke:#8a6d3b,stroke-width:1px
    style Dense1 fill:#dff0d8,stroke:#3c763d,stroke-width:1px
    style Output fill:#dff0d8,stroke:#3c763d,stroke-width:1px
```

In [None]:
# Save checkpoints only when validation improves (reduces disk I/O)
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath="best_model.weights.h5",
    save_best_only=True,
    save_weights_only=True,
    monitor="val_loss",
    mode="min",
    verbose=1,
)

In [None]:
# Implement early stopping to prevent overfitting
early_stopping = EarlyStopping(
    monitor="val_loss",
    patience=5,
    restore_best_weights=True,
    verbose=1,
)

In [None]:
# Reduce learning rate when plateauing
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.5,
    patience=3,
    min_lr=0.00001,
    verbose=1,
)

In [None]:
# Train the model
print("Training the model...")
epochs = 50

Training the model...


In [None]:
for images, _ in train_ds.take(1):
    print(f"Batch shape: {images.shape}")
    print(f"Memory footprint of batch: ~{images.numpy().nbytes / (1024 * 1024):.2f} MB")
    break

2025-04-12 19:46:34.457181: I tensorflow/core/kernels/data/tf_record_dataset_op.cc:387] The default buffer size is 262144, which is overridden by the user specified `buffer_size` of 8388608


Batch shape: (16, 160, 160, 3)
Memory footprint of batch: ~4.69 MB


2025-04-12 19:46:34.766979: W tensorflow/core/kernels/data/cache_dataset_ops.cc:916] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


In [None]:
# Train with history stored but with memory-efficient callbacks
history = model.fit(
    train_ds,
    validation_data=valid_ds,
    epochs=epochs,
    callbacks=[early_stopping, reduce_lr, checkpoint_callback ],
    verbose=2,  # Less output to console
)

Epoch 1/50


I0000 00:00:1744505196.167171   43841 service.cc:152] XLA service 0x7f53c0004c10 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1744505196.167218   43841 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 4060 Laptop GPU, Compute Capability 8.9
2025-04-12 19:46:36.230491: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1744505203.204782   43841 cuda_dnn.cc:529] Loaded cuDNN version 90300
I0000 00:00:1744505201.411625   43841 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.






Epoch 1: val_loss improved from inf to 1.05909, saving model to best_model.weights.h5
806/806 - 26s - 32ms/step - accuracy: 0.4669 - loss: 1.5907 - val_accuracy: 0.6680 - val_loss: 1.0591 - learning_rate: 1.0000e-03
Epoch 2/50

Epoch 2: val_loss improved from 1.05909 to 0.93624, saving model to best_model.weights.h5
806/806 - 10s - 13ms/step - accuracy: 0.6698 - loss: 1.0110 - val_accuracy: 0.6840 - val_loss: 0.9362 - learning_rate: 1.0000e-03
Epoch 3/50

Epoch 3: val_loss did not improve from 0.93624
806/806 - 10s - 12ms/step - accuracy: 0.7783 - loss: 0.6896 - val_accuracy: 0.7000 - val_loss: 0.9933 - learning_rate: 1.0000e-03
Epoch 4/50

Epoch 4: val_loss did not improve from 0.93624
806/806 - 9s - 12ms/step - accuracy: 0.8738 - loss: 0.3827 - val_accuracy: 0.6640 - val_loss: 1.2718 - learning_rate: 1.0000e-03
Epoch 5/50

Epoch 5: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.

Epoch 5: val_loss did not improve from 0.93624
806/806 - 9s - 12ms/step - accuracy: 0

In [None]:
# Function to plot metrics that clears data after plotting
def plot_metrics(history):
    plt.figure(figsize=(12, 4))

    # Plot training and validation loss
    plt.subplot(1, 2, 1)
    plt.plot(history.history["loss"], label="Training Loss")
    plt.plot(history.history["val_loss"], label="Validation Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()
    plt.title("Training and Validation Loss")

    # Plot training and validation accuracy
    plt.subplot(1, 2, 2)
    plt.plot(history.history["accuracy"], label="Training Accuracy")
    plt.plot(history.history["val_accuracy"], label="Validation Accuracy")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.legend()
    plt.title("Training and Validation Accuracy")

    plt.tight_layout()
    plt.savefig("training_history.png")
    plt.close()  # Close to free memory

# Plot and save metrics
plot_metrics(history)

![Loss and Accuracy](training_history.png)

In [None]:
# Evaluate the model on the validation set
print("Evaluating the model...")
test_loss, test_accuracy = model.evaluate(valid_ds, verbose=2)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")

# Generate a report
print("\n--- Model Report ---")
print("Architecture:")
model.summary()
print("\nTraining Results:")
print(f"Final Training Loss: {history.history['loss'][-1]:.4f}")
print(f"Final Training Accuracy: {history.history['accuracy'][-1]:.4f}")
print(f"Final Validation Loss: {history.history['val_loss'][-1]:.4f}")
print(f"Final Validation Accuracy: {history.history['val_accuracy'][-1]:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")
print("\nTraining stopped after {0} epochs".format(len(history.history["loss"])))


Evaluating the model...
32/32 - 0s - 5ms/step - accuracy: 0.6840 - loss: 0.9362
Test Loss: 0.9362
Test Accuracy: 0.6840

--- Model Report ---
Architecture:



Training Results:
Final Training Loss: 0.0282
Final Training Accuracy: 0.9932
Final Validation Loss: 1.9474
Final Validation Accuracy: 0.6860
Test Accuracy: 0.6840

Training stopped after 7 epochs


In [None]:
# Clean up to free memory
import gc

del model
gc.collect()
if physical_devices:
    tf.keras.backend.clear_session()