## AlexNet

Mô hình AlexNet


| Lớp | Loại                      | Kích thước đầu vào | Kích thước đầu ra | Ghi chú              |
| --- | ------------------------- | ------------------ | ----------------- | -------------------- |
| 1   | Conv(11×11, 96, stride=4) | 227×227×3          | 55×55×96          | Đặc trưng thấp       |
| 2   | MaxPool(3×3, stride=2)    | 55×55×96           | 27×27×96          | Giảm kích thước      |
| 3   | Conv(5×5, 256, pad=2)     | 27×27×96           | 27×27×256         | Đặc trưng trung bình |
| 4   | MaxPool(3×3, stride=2)    | 27×27×256          | 13×13×256         |                      |
| 5   | Conv(3×3, 384)            | 13×13×256          | 13×13×384         |                      |
| 6   | Conv(3×3, 384)            | 13×13×384          | 13×13×384         |                      |
| 7   | Conv(3×3, 256)            | 13×13×384          | 13×13×256         |                      |
| 8   | MaxPool(3×3, stride=2)    | 13×13×256          | 6×6×256           |                      |
| 9   | FC (4096)                 | 9216               | 4096              |                      |
| 10  | FC (4096)                 | 4096               | 4096              |                      |
| 11  | FC (1000)                 | 4096               | 1000              | Softmax output       |


In [1]:
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, Dropout, BatchNormalization
# from tensorflow.keras.optimizers import Adam
# import tensorflow as tf

# def AlexNet(input_shape=(224, 224, 3), classes=1000, learning_rate=0.0001, drop_rate=0.5):
#     """
#     Optimized implementation of AlexNet architecture using Sequential API.

#     Arguments:
#         input_shape -- shape of the input images (default: (224, 224, 3))
#         classes     -- number of output classes (default: 1000)
#         learning_rate -- learning rate for Adam optimizer (default: 0.0001)
#         drop_rate   -- dropout rate for fully connected layers (default: 0.5)

#     Returns:
#         model       -- compiled Keras Sequential model
#     """
#     # Initialize Adam optimizer
#     adam = Adam(learning_rate=learning_rate)
    
#     # Create Sequential model
#     model = Sequential(name="AlexNet")
    
#     # Layer 1: Conv + MaxPool + BatchNorm
#     model.add(Conv2D(
#         filters=96,
#         kernel_size=(11, 11),
#         strides=(4, 4),
#         activation="relu",
#         padding="same",
#         kernel_initializer="he_normal",
#         input_shape=input_shape
#     ))
#     model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
#     model.add(BatchNormalization())
    
#     # Layer 2: Conv + MaxPool + BatchNorm
#     model.add(Conv2D(
#         filters=256,
#         kernel_size=(5, 5),
#         strides=(1, 1),
#         activation="relu",
#         padding="same",
#         kernel_initializer="he_normal"
#     ))
#     model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
#     model.add(BatchNormalization())
    
#     # Layer 3: Conv
#     model.add(Conv2D(
#         filters=384,
#         kernel_size=(3, 3),
#         strides=(1, 1),
#         activation="relu",
#         padding="same",
#         kernel_initializer="he_normal"
#     ))
    
#     # Layer 4: Conv
#     model.add(Conv2D(
#         filters=384,
#         kernel_size=(3, 3),
#         strides=(1, 1),
#         activation="relu",
#         padding="same",
#         kernel_initializer="he_normal"
#     ))
    
#     # Layer 5: Conv + MaxPool + BatchNorm
#     model.add(Conv2D(
#         filters=256,
#         kernel_size=(3, 3),
#         strides=(1, 1),
#         activation="relu",
#         padding="same",
#         kernel_initializer="he_normal"
#     ))
#     model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
#     model.add(BatchNormalization())
    
#     # Fully connected layers
#     model.add(Flatten())
#     model.add(Dense(4096, activation="relu", kernel_initializer="he_normal"))
#     model.add(Dropout(drop_rate))
#     model.add(Dense(4096, activation="relu", kernel_initializer="he_normal"))
#     model.add(Dropout(drop_rate))
#     model.add(Dense(classes, activation="softmax"))
    
#     # Compile model
#     model.compile(
#         loss="categorical_crossentropy",
#         optimizer=adam,
#         metrics=["accuracy"]
#     )
    
#     return model



In [2]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.datasets import cifar10
import numpy as np

def AlexNet(input_shape=(32, 32, 3), classes=10, learning_rate=0.0001, drop_rate=0.5):
    """
    Adjusted AlexNet architecture for CIFAR-10 with input shape (32, 32, 3) using BatchNormalization.

    Arguments:
        input_shape -- shape of the input images (default: (32, 32, 3) for CIFAR-10)
        classes     -- number of output classes (default: 10 for CIFAR-10)
        learning_rate -- learning rate for Adam optimizer (default: 0.0001)
        drop_rate   -- dropout rate for fully connected layers (default: 0.5)

    Returns:
        model       -- compiled Keras Sequential model
    """
    adam = Adam(learning_rate=learning_rate)
    model = Sequential(name="AlexNet_CIFAR10")
    
    # Layer 1: Conv + MaxPool + BatchNorm
    model.add(Conv2D(
        filters=64, kernel_size=(5, 5), strides=(1, 1), activation="relu",
        padding="same", kernel_initializer="he_normal", input_shape=input_shape
    ))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(BatchNormalization())
    
    # Layer 2: Conv + MaxPool + BatchNorm
    model.add(Conv2D(
        filters=128, kernel_size=(3, 3), strides=(1, 1), activation="relu",
        padding="same", kernel_initializer="he_normal"
    ))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(BatchNormalization())
    
    # Layer 3: Conv
    model.add(Conv2D(
        filters=192, kernel_size=(3, 3), strides=(1, 1), activation="relu",
        padding="same", kernel_initializer="he_normal"
    ))
    
    # Layer 4: Conv
    model.add(Conv2D(
        filters=192, kernel_size=(3, 3), strides=(1, 1), activation="relu",
        padding="same", kernel_initializer="he_normal"
    ))
    
    # Layer 5: Conv + MaxPool + BatchNorm
    model.add(Conv2D(
        filters=128, kernel_size=(3, 3), strides=(1, 1), activation="relu",
        padding="same", kernel_initializer="he_normal"
    ))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(BatchNormalization())
    
    # Fully connected layers
    model.add(Flatten())
    model.add(Dense(1024, activation="relu", kernel_initializer="he_normal"))
    model.add(Dropout(drop_rate))
    model.add(Dense(1024, activation="relu", kernel_initializer="he_normal"))
    model.add(Dropout(drop_rate))
    model.add(Dense(classes, activation="softmax"))
    
    model.compile(
        loss="categorical_crossentropy",
        optimizer=adam,
        metrics=["accuracy"]
    )
    
    return model

2025-10-08 05:23:30.063737: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1759901010.231800      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1759901010.287738      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


---
## Áp dụng mô hình

### Chuẩn bị dữ liệu

In [3]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.datasets import cifar10
import numpy as np

In [4]:
# Load and preprocess CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Normalize pixel values to [0, 1]
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Convert labels to one-hot encoding
y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)
y_test = tf.keras.utils.to_categorical(y_test, num_classes=10)

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)
datagen.fit(x_train)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 0us/step


### Train mô hình

In [5]:
# Initialize model
model = AlexNet(input_shape=(32, 32, 3), classes=10, learning_rate=0.0001, drop_rate=0.5)
model.summary()

I0000 00:00:1759901056.370241      19 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1759901056.370905      19 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [6]:
# Callbacks for training
callbacks = [
    EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
    ModelCheckpoint('alexnet_cifar10_best.h5', monitor='val_accuracy', save_best_only=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-6)
]

# Train the model
batch_size = 64
epochs = 50

history = model.fit(
    datagen.flow(x_train, y_train, batch_size=batch_size),
    epochs=epochs,
    validation_data=(x_test, y_test),
    callbacks=callbacks,
    verbose=1
)


Epoch 1/50


  self._warn_if_super_not_called()
I0000 00:00:1759901062.365754      59 service.cc:148] XLA service 0x7e46800093b0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1759901062.366479      59 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1759901062.366583      59 service.cc:156]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1759901062.880744      59 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m  5/782[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m24s[0m 32ms/step - accuracy: 0.0911 - loss: 4.4701

I0000 00:00:1759901067.508173      59 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 41ms/step - accuracy: 0.2540 - loss: 2.4621 - val_accuracy: 0.4722 - val_loss: 1.4598 - learning_rate: 1.0000e-04
Epoch 2/50
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 34ms/step - accuracy: 0.4225 - loss: 1.5972 - val_accuracy: 0.5434 - val_loss: 1.2807 - learning_rate: 1.0000e-04
Epoch 3/50
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 34ms/step - accuracy: 0.4816 - loss: 1.4359 - val_accuracy: 0.5328 - val_loss: 1.3357 - learning_rate: 1.0000e-04
Epoch 4/50
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 34ms/step - accuracy: 0.5204 - loss: 1.3310 - val_accuracy: 0.5455 - val_loss: 1.3169 - learning_rate: 1.0000e-04
Epoch 5/50
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 34ms/step - accuracy: 0.5563 - loss: 1.2418 - val_accuracy: 0.6167 - val_loss: 1.0975 - learning_rate: 1.0000e-04
Epoch 6/50
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━