# Training using Subclassing API
Class implementation to store metadata

a) Training

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications.xception import Xception, preprocess_input
from tensorflow.keras.utils import (
    image_dataset_from_directory,
    img_to_array,
    load_img,
    register_keras_serializable,
)

2024-12-24 15:33:26.063240: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1735072406.092373   32711 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1735072406.103218   32711 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-24 15:33:26.130993: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
tf.__version__

'2.18.0'

In [3]:
# incrementally use GPU memory as needed instead of allocating all at once
gpus = tf.config.experimental.list_physical_devices("GPU")
tf.config.experimental.set_memory_growth(gpus[0], True)

In [4]:
@register_keras_serializable(package="Custom")
class CustomModel(keras.Model):
    def __init__(
        self,
        input_size,
        num_classes,
        size_inner=100,
        droprate=0.5,
        metadata=None,
        **kwargs,
    ):
        super().__init__(**kwargs)
        self.input_size = input_size
        self.num_classes = num_classes
        self.size_inner = size_inner
        self.droprate = droprate
        self.metadata = metadata  # Metadata for saving class names and input size

        # Base Model (Xception)
        self.base_model = Xception(
            weights="imagenet",
            include_top=False,
            input_shape=(input_size, input_size, 3),
        )
        self.base_model.trainable = False

        # Layers for the custom model
        self.global_pool = layers.GlobalAveragePooling2D()
        self.inner_dense = layers.Dense(size_inner, activation="relu")
        self.dropout = layers.Dropout(droprate)
        self.output_layer = layers.Dense(num_classes)

    def call(self, inputs, training=False):
        x = self.base_model(inputs, training=False)
        x = self.global_pool(x)
        x = self.inner_dense(x)
        x = self.dropout(x, training=training)
        return self.output_layer(x)

    def get_config(self):
        # Include metadata in the configuration
        config = super().get_config()
        config.update(
            {
                "input_size": self.input_size,
                "num_classes": self.num_classes,
                "size_inner": self.size_inner,
                "droprate": self.droprate,
                "metadata": self.metadata,
            }
        )
        return config

    @classmethod
    def from_config(cls, config):
        metadata = config.pop("metadata", None)
        return cls(metadata=metadata, **config)

In [5]:
def make_model(
    input_size, learning_rate, class_names, metadata, size_inner=100, droprate=0.5
):
    model = CustomModel(
        input_size=input_size,
        num_classes=len(class_names),
        size_inner=size_inner,
        droprate=droprate,
        metadata=metadata,
    )
    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
    loss_fn = keras.losses.CategoricalCrossentropy(from_logits=True)
    model.compile(optimizer=optimizer, loss=loss_fn, metrics=["accuracy"])
    return model

In [6]:
# Best parameters
best_size = 100
best_droprate = 0.2
best_lr = 0.001
input_size = 299

In [7]:
# Define data augmentation layers
data_augmentation = keras.Sequential(
    [
        layers.RandomFlip("vertical"),  # Equivalent to vertical_flip=True
        layers.RandomRotation(0.01),  # Approximation of shear_range
        layers.RandomZoom(height_factor=0.1),  # Equivalent to zoom_range=0.1
    ]
)

I0000 00:00:1735072414.445530   32711 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 119 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:02:00.0, compute capability: 6.1


In [9]:
# Load the dataset
train_ds = image_dataset_from_directory(
    "../data/clothing-dataset-small/train",
    image_size=(input_size, input_size),
    batch_size=32,
    label_mode="categorical",
)

class_names = train_ds.class_names
metadata = {"class_names": class_names}

train_ds = train_ds.map(
    lambda x, y: (
        data_augmentation(preprocess_input(x)),
        y,
    ),
    num_parallel_calls=tf.data.AUTOTUNE,
)

# Load the dataset
val_ds = image_dataset_from_directory(
    "../data/clothing-dataset-small/validation",
    image_size=(input_size, input_size),
    batch_size=32,
    label_mode="categorical",
    shuffle=False,
)

val_ds = val_ds.map(
    lambda x, y: (preprocess_input(x), y),
    num_parallel_calls=tf.data.AUTOTUNE,
)

Found 3068 files belonging to 10 classes.
Found 341 files belonging to 10 classes.


In [9]:
# Prefetch for performance
train_ds = train_ds.prefetch(buffer_size=tf.data.AUTOTUNE)
val_ds = val_ds.prefetch(buffer_size=tf.data.AUTOTUNE)

In [10]:
checkpoint = keras.callbacks.ModelCheckpoint(
    "xception_final_{epoch:02d}_{val_accuracy:.3f}.keras",
    save_best_only=True,
    monitor="val_accuracy",
    mode="max",
)

In [None]:
model = make_model(input_size, best_lr, class_names, metadata, best_size, best_droprate)
history = model.fit(
    train_ds, epochs=50, validation_data=val_ds, callbacks=[checkpoint], verbose=1
)

Epoch 1/50


I0000 00:00:1735058092.550747   19213 service.cc:148] XLA service 0x77e45443b440 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1735058092.550778   19213 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce GTX 1080 Ti, Compute Capability 6.1
2024-12-24 11:34:52.863898: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1735058093.708177   19213 cuda_dnn.cc:529] Loaded cuDNN version 90300
2024-12-24 11:34:56.069448: W external/local_xla/xla/tsl/framework/bfc_allocator.cc:306] Allocator (GPU_0_bfc) ran out of memory trying to allocate 10.09GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2024-12-24 11:34:56.887660: W external/local_xla/xla/tsl/framework/bfc_allocator.cc:378] Garbage collection: deall

[1m 1/96[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m23:54[0m 15s/step - accuracy: 0.1562 - loss: 2.2959

I0000 00:00:1735058101.726702   19213 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m95/96[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 136ms/step - accuracy: 0.5614 - loss: 1.3392

2024-12-24 11:35:18.880615: W external/local_xla/xla/tsl/framework/bfc_allocator.cc:306] Allocator (GPU_0_bfc) ran out of memory trying to allocate 9.84GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
E0000 00:00:1735058119.725974   19209 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1735058119.954156   19209 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1735058121.288767   19209 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1735058121.553803   19209 gpu_timer.cc:82] Delay kernel timed out: measured tim

[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 262ms/step - accuracy: 0.5625 - loss: 1.3359

2024-12-24 11:35:32.346825: W external/local_xla/xla/tsl/framework/bfc_allocator.cc:306] Allocator (GPU_0_bfc) ran out of memory trying to allocate 9.40GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
E0000 00:00:1735058132.825025   19212 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1735058133.050873   19212 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1735058133.704786   19212 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1735058133.966677   19212 gpu_timer.cc:82] Delay kernel timed out: measured tim

[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 377ms/step - accuracy: 0.5636 - loss: 1.3328 - val_accuracy: 0.8270 - val_loss: 0.5355
Epoch 2/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 145ms/step - accuracy: 0.7812 - loss: 0.6670 - val_accuracy: 0.8446 - val_loss: 0.5066
Epoch 3/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 144ms/step - accuracy: 0.8110 - loss: 0.5693 - val_accuracy: 0.8534 - val_loss: 0.4379
Epoch 4/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 142ms/step - accuracy: 0.8251 - loss: 0.5136 - val_accuracy: 0.8446 - val_loss: 0.4294
Epoch 5/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 143ms/step - accuracy: 0.8238 - loss: 0.5128 - val_accuracy: 0.8504 - val_loss: 0.4419
Epoch 6/50
[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 142ms/step - accuracy: 0.8705 - loss: 0.4282 - val_accuracy: 0.8416 - val_loss: 0.4255
Epoch 7/50
[1m96/96[0m [32m━━━

# Using the model

In [None]:
model = keras.models.load_model("xception_final_14_0.871.keras")

In [None]:
input_size = model.input_size
class_names = model.metadata["class_names"]

a) test dataset evaluation

In [None]:
# Load the dataset
test_ds = image_dataset_from_directory(
    "data/clothing-dataset-small/test",
    image_size=(input_size, input_size),
    batch_size=32,
    label_mode="categorical",
    shuffle=False,
)

test_ds = test_ds.map(
    lambda x, y: (preprocess_input(x), y),
    num_parallel_calls=tf.data.AUTOTUNE,
)

In [None]:
loss, accuracy = model.evaluate(test_ds)

print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

b) single image prediction

In [None]:
path = "data/clothing-dataset-small/test/shoes/0dd87e47-ca85-4d5c-9fd1-59f5a01eb656.jpg"
img = load_img(path, target_size=(input_size, input_size))
x = img_to_array(img)
X = tf.expand_dims(x, 0)
X.shape

In [None]:
img

In [None]:
X = preprocess_input(X)
pred = model.predict(X)

In [None]:
predicted_class = class_names[tf.argmax(pred[0])]
predicted_class

In [None]:
dict(zip(class_names, pred[0]))