# Cardboard Box Defect Detection Using Ensemble Stacking

In [29]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
from PIL import Image
from skimage import io, color, feature, filters
from skimage.feature import graycomatrix, graycoprops
from skimage import data
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import KFold
import tensorflow as tf
import tensorflow_addons as tfa
import os

In [2]:
print(tf.test.is_gpu_available())

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
True


In [3]:
os.chdir("../")

### A. Data Preparation

In [4]:
CLASSES = {
    "carton box": "0",
    "opened carton box": "1",
    "wet carton box": "2",
    "cracked carton box": "3"
}

In [5]:
def assign_label(row, CLASSES):
    for class_name, label in CLASSES.items():
        if row[class_name]:
            return label 
        
    return None

In [6]:
def load_dataset(split, BASE_DIR = "data", FILE_NAME = "_classes.csv", CLASSES = CLASSES):
    '''
    Load the dataset from the directory, remove multi-class, and return a DataFrame with filenames and labels.
    '''
    csv_path = os.path.join(BASE_DIR, split, FILE_NAME)

    df = pd.read_csv(csv_path)

    df["is_multiple_class"] = (df["carton box"] + df["cracked carton box"] + df["opened carton box"] + df["wet carton box"]) > 1
    
    df = df[~df["is_multiple_class"]]

    df["filename"] = df["filename"].apply(lambda x: os.path.join(BASE_DIR, split, x))
    df["label"] = df.apply(assign_label, axis = 1, args = (CLASSES, )) 

    return df[["filename", "label"]]

In [7]:
train_df = load_dataset("train")
valid_df = load_dataset("valid")
test_df = load_dataset("test")

In [8]:
train_category_count = train_df["label"].value_counts().reset_index()
train_category_count.columns = ["label", "count"]
train_category_count["label"] = train_category_count["label"].map({v: k for k, v in CLASSES.items()})
train_category_count.sort_values(by = "count", ascending = False, inplace = True)
train_category_count

Unnamed: 0,label,count
0,carton box,1254
1,cracked carton box,984
2,opened carton box,312
3,wet carton box,270


In [9]:
valid_category_count = valid_df["label"].value_counts().reset_index()
valid_category_count.columns = ["label", "count"]
valid_category_count["label"] = valid_category_count["label"].map({v: k for k, v in CLASSES.items()})
valid_category_count.sort_values(by = "count", ascending = False, inplace = True)
valid_category_count

Unnamed: 0,label,count
0,carton box,113
1,cracked carton box,98
2,opened carton box,32
3,wet carton box,22


In [10]:
test_category_count = test_df["label"].value_counts().reset_index()
test_category_count.columns = ["label", "count"]
test_category_count["label"] = test_category_count["label"].map({v: k for k, v in CLASSES.items()})
test_category_count.sort_values(by = "count", ascending = False, inplace = True)
test_category_count

Unnamed: 0,label,count
0,carton box,67
1,cracked carton box,55
2,wet carton box,6
3,opened carton box,4


### B. Data Preprocessing

##### 1. Combining Validation and Testing Dataset

Since testing dataset have very low amount of wet carton box and opened carton box class, the validation and testing dataset will be combined first to increase accuracy stability. 

In [11]:
test_df = pd.concat([valid_df, test_df])

In [12]:
test_category_count = test_df["label"].value_counts().reset_index()
test_category_count.columns = ["label", "count"]
test_category_count["label"] = test_category_count["label"].map({v: k for k, v in CLASSES.items()})
test_category_count.sort_values(by = "count", ascending = False, inplace = True)
test_category_count

Unnamed: 0,label,count
0,carton box,180
1,cracked carton box,153
2,opened carton box,36
3,wet carton box,28


##### 2. Image Augmentation

In [13]:
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale = 1./255,
    rotation_range = 30,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    horizontal_flip = True,
    vertical_flip = True,
    brightness_range = [0.4, 1.5],
    zoom_range = 0.3,
    shear_range = 0.2,
    fill_mode = "nearest",
    validation_split = 0.2
)

In [14]:
test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale = 1./255
)

### C. Exploratory Data Analysis (EDA)

##### 1. RGB  Analysis

In [15]:
def calculate_rgb_stats(df):
    '''
    Calculate the mean, standard deviation, and coefficeint of variation (CV) for RGB channels of images in the DataFrame.
    Returns a DataFrame with these statistics for each label.
    '''
    rgb_dict = {}

    for label in df["label"].unique():
        subset = df[df["label"] == label]
        r_vals, g_vals, b_vals = [], [], []

        for file in subset["filename"]:
            img = Image.open(file).convert("RGB")
            img_np = np.array(img)
            
            r_vals.append(np.mean(img_np[:, :, 0]))
            g_vals.append(np.mean(img_np[:, :, 1]))
            b_vals.append(np.mean(img_np[:, :, 2]))

        rgb_dict[label] = {
            "R_mean": np.mean(r_vals),
            "G_mean": np.mean(g_vals),
            "B_mean": np.mean(b_vals),
            "R_std": np.std(r_vals),
            "G_std": np.std(g_vals),
            "B_std": np.std(b_vals),
            "R_CV": np.std(r_vals) / np.mean(r_vals) if np.mean(r_vals) != 0 else 0,
            "G_CV": np.std(g_vals) / np.mean(g_vals) if np.mean(g_vals) != 0 else 0,
            "B_CV": np.std(b_vals) / np.mean(b_vals) if np.mean(b_vals) != 0 else 0
        }

    return pd.DataFrame.from_dict(rgb_dict, orient="index")

In [16]:
all_df = pd.concat([train_df, valid_df, test_df])
rgb_results = calculate_rgb_stats(all_df)

In [19]:
label_to_name = {v: k for k, v in CLASSES.items()}
rgb_results["label"] = rgb_results.index.map(label_to_name)
rgb_results = rgb_results.reset_index(drop = True)

In [20]:
rgb_results

Unnamed: 0,R_mean,G_mean,B_mean,R_std,G_std,B_std,R_CV,G_CV,B_CV,label
0,207.10599,188.17914,169.185145,33.370517,34.808467,38.162431,0.161128,0.184975,0.225566,opened carton box
1,198.995598,181.990249,161.690462,40.992066,41.698371,43.404778,0.205995,0.229124,0.268444,wet carton box
2,176.772568,161.778089,146.701919,43.458303,41.705282,42.054956,0.245843,0.257793,0.286669,cracked carton box
3,214.551047,195.737669,175.546176,29.291017,32.448195,36.789169,0.136522,0.165774,0.20957,carton box


##### 2. Texture Analysis

In [31]:
def calculate_texture_features(df, distances=[1], angles=[0]):
    """
    Calculate GLCM texture features and edge density for each image class.
    Returns a DataFrame with mean texture stats for each label.
    """
    texture_dict = {}

    for label in df["label"].unique():
        subset = df[df["label"] == label]
        contrast_vals, homogeneity_vals, entropy_vals, edge_density_vals = [], [], [], []

        for file in subset["filename"]:
            # Load and convert to grayscale
            img = Image.open(file).convert("L")
            img_np = np.array(img)

            # --- 1. GLCM Features ---
            glcm = graycomatrix(img_np, 
                                distances=distances, 
                                angles=angles, 
                                levels=256, 
                                symmetric=True, 
                                normed=True)

            contrast_vals.append(graycoprops(glcm, 'contrast')[0, 0])
            homogeneity_vals.append(graycoprops(glcm, 'homogeneity')[0, 0])
            
            # Entropy calculation
            glcm_prob = glcm / np.sum(glcm)
            entropy = -np.sum(glcm_prob * np.log2(glcm_prob + 1e-10))
            entropy_vals.append(entropy)

            # --- 2. Edge Density ---
            edges = feature.canny(img_np, sigma=1.5)
            edge_density_vals.append(np.sum(edges) / edges.size)

        texture_dict[label] = {
            "GLCM_Contrast": np.mean(contrast_vals),
            "GLCM_Homogeneity": np.mean(homogeneity_vals),
            "GLCM_Entropy": np.mean(entropy_vals),
            "Edge_Density": np.mean(edge_density_vals)
        }

    return pd.DataFrame.from_dict(texture_dict, orient="index")


In [32]:
texture_result = calculate_texture_features(all_df)

In [33]:
texture_result["label"] = texture_result.index.map(label_to_name)
texture_result = texture_result.reset_index(drop = True)

In [34]:
texture_result

Unnamed: 0,GLCM_Contrast,GLCM_Homogeneity,GLCM_Entropy,Edge_Density,label
0,1205.705737,0.422274,9.671843,0.064789,opened carton box
1,1191.949486,0.374575,10.254663,0.076353,wet carton box
2,1126.399506,0.337308,11.229209,0.076896,cracked carton box
3,1199.602262,0.440705,9.242963,0.059439,carton box


### D. Modeling 

In [23]:
def split_data(train_datagen, train_df, test_datagen, test_df, SIZE = (224, 224)):
    train_set = train_datagen.flow_from_dataframe(
        train_df,
        x_col = "filename",
        y_col = "label",
        batch_size = 32,
        target_size = SIZE,
        shuffle = True,
        color_mode = "rgb",
        subset = "training"
    )   

    valid_set = train_datagen.flow_from_dataframe(
        train_df,
        x_col = "filename",
        y_col = "label",
        batch_size = 32,
        target_size = SIZE,
        shuffle = False,
        color_mode = "rgb",
        subset = "validation"
    )   

    test_set = test_datagen.flow_from_dataframe(
        test_df,
        x_col = "filename",
        y_col = "label",
        batch_size = 32,
        target_size = SIZE,
        shuffle = False,
        color_mode = "rgb"
    )

    return train_set, valid_set, test_set

In [24]:
train_set, valid_set, test_set = split_data(train_datagen, train_df, test_datagen, test_df, SIZE = (299, 299))

Found 2256 validated image filenames belonging to 4 classes.
Found 564 validated image filenames belonging to 4 classes.
Found 397 validated image filenames belonging to 4 classes.


##### 1. Baseline CNN 

In [25]:
cnn = tf.keras.models.Sequential()

# Convulational Layer 1
cnn.add(tf.keras.layers.Conv2D(
    filters = 32, 
    kernel_size = 3, 
    input_shape = [299, 299, 3], 
    kernel_regularizer = tf.keras.regularizers.l2(0.00005),
    padding = "same"
))
cnn.add(tf.keras.layers.BatchNormalization())
cnn.add(tf.keras.layers.ReLU())

# Pooling Layer 1
cnn.add(tf.keras.layers.MaxPool2D(
    pool_size = 2, 
    strides = 2
))

# Convulational Layer 2
cnn.add(tf.keras.layers.Conv2D(
    filters = 32, 
    kernel_size = 3, 
    kernel_regularizer = tf.keras.regularizers.l2(0.00005),
    padding = "same"
))
cnn.add(tf.keras.layers.BatchNormalization())
cnn.add(tf.keras.layers.ReLU())

# Pooling Layer 2
cnn.add(tf.keras.layers.MaxPool2D(
    pool_size = 2, 
    strides = 2
))

# Flatten Layer 
cnn.add(tf.keras.layers.Flatten())

# Fully Connected Layer 1
cnn.add(tf.keras.layers.Dense(
    units = 128,
    kernel_regularizer = tf.keras.regularizers.l2(0.00005)
))
cnn.add(tf.keras.layers.BatchNormalization())
cnn.add(tf.keras.layers.ReLU())

# Droput Layer
cnn.add(tf.keras.layers.Dropout(0.3))

# Output Layer
cnn.add(tf.keras.layers.Dense(
    units = 4, 
    activation = "softmax"
))

In [26]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor = "val_loss", 
    patience = 10, 
    restore_best_weights = True
)

In [27]:
cnn.compile(
    optimizer = tf.keras.optimizers.Adam(
        learning_rate = tf.keras.optimizers.schedules.ExponentialDecay(
            initial_learning_rate = 1e-4,
            decay_steps = 1000,
            decay_rate = 0.95
        )
    ), 
    loss = "categorical_crossentropy", 
    metrics = [tfa.metrics.F1Score(num_classes = 4)]
    # metrics = [tf.keras.metrics.F1Score()]
)

In [28]:
cnn_history = cnn.fit(
    train_set, 
    validation_data = valid_set, 
    epochs = 100, 
    callbacks = [early_stopping]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100


In [None]:
cnn.save("models/baseline_cnn.keras")

##### 2. ResNet-50

In [30]:
resnet50 = tf.keras.applications.resnet50.ResNet50(
    input_shape = (299, 299, 3),
    include_top = False,
    weights = "imagenet"
)

In [31]:
for layer in resnet50.layers:
    layer.trainable = False

In [32]:
resnet50_model = tf.keras.models.Sequential()

# Layers From ResNet50
resnet50_model.add(resnet50)

# Flatten Layer
resnet50_model.add(tf.keras.layers.Flatten())

# Fully Connected Layer
resnet50_model.add(tf.keras.layers.Dense(
    units = 128,
    kernel_regularizer = tf.keras.regularizers.l2(0.00005)
))
resnet50_model.add(tf.keras.layers.BatchNormalization())
resnet50_model.add(tf.keras.layers.ReLU())

# Dropout Layer
resnet50_model.add(tf.keras.layers.Dropout(0.3))

# Output Layer
resnet50_model.add(tf.keras.layers.Dense(
    units = 4, 
    activation = "softmax"
))

In [34]:
resnet50_model.compile(
    optimizer = tf.keras.optimizers.Adam(
        learning_rate = tf.keras.optimizers.schedules.ExponentialDecay(
            initial_learning_rate = 1e-4,
            decay_steps = 1000,
            decay_rate = 0.95
        )
    ), 
    loss = "categorical_crossentropy", 
    metrics = [tfa.metrics.F1Score(num_classes = 4)]
)

In [37]:
resnet50_history = resnet50_model.fit(
    train_set, 
    validation_data = valid_set, 
    epochs = 100, 
    callbacks = [early_stopping]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100


In [77]:
resnet50_model.save("models/resnet50.keras")

##### 3. Inception V3

In [39]:
inceptionv3 = tf.keras.applications.inception_v3.InceptionV3(
    input_shape = (299, 299, 3),
    include_top = False,
    weights = "imagenet"
)

In [40]:
for layer in inceptionv3.layers:
    layer.trainable = False

In [41]:
inceptionv3_model = tf.keras.models.Sequential()

# Layers From Inception V3
inceptionv3_model.add(inceptionv3)

# Flatten Layer
inceptionv3_model.add(tf.keras.layers.Flatten())

# Fully Connected Layer
inceptionv3_model.add(tf.keras.layers.Dense(
    units = 128,
    kernel_regularizer = tf.keras.regularizers.l2(0.00005)
))
inceptionv3_model.add(tf.keras.layers.BatchNormalization())
inceptionv3_model.add(tf.keras.layers.ReLU())

# Dropout Layer
inceptionv3_model.add(tf.keras.layers.Dropout(0.3))

# Output Layer
inceptionv3_model.add(tf.keras.layers.Dense(
    units = 4, 
    activation = "softmax"
))

In [42]:
inceptionv3_model.compile(
    optimizer = tf.keras.optimizers.Adam(
        learning_rate = tf.keras.optimizers.schedules.ExponentialDecay(
            initial_learning_rate = 1e-4,
            decay_steps = 1000,
            decay_rate = 0.95
        )
    ), 
    loss = "categorical_crossentropy", 
    metrics = [tfa.metrics.F1Score(num_classes = 4)]
)

In [43]:
inceptionv3_history = inceptionv3_model.fit(
    train_set, 
    validation_data = valid_set, 
    epochs = 100, 
    callbacks = [early_stopping]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100


In [83]:
inceptionv3_model.save("models/inceptionv3.h5")

##### 4. Ensemble Stacking

In [14]:
import numpy as np

In [15]:
baseline_cnn_model = tf.keras.models.load_model("models/baseline_cnn.keras")
resnet50_model = tf.keras.models.load_model("models/resnet50.keras")
inceptionv3_model = tf.keras.models.load_model("models/inceptionv3.keras")

a. Stacking Model 1

- Base Learner: ResNet-50
- Meta Learner: Inception V3

In [None]:
def create_stacking_model_1(resnet50_model, inceptionv3_model):
    # Step 1: Input for both models (same input shape)
    input_layer = tf.keras.layers.Input(shape = (299, 299, 3))

    # Step 2: Pass input through ResNet50 base model
    resnet50_output = resnet50_model(input_layer)

    # Step 3: Pass the same input through InceptionV3 model (meta-learner)
    inceptionv3_output = inceptionv3_model(input_layer)

    # Step 4: Flatten both outputs
    resnet50_output_flattened = tf.keras.layers.Flatten()(resnet50_output)
    inceptionv3_output_flattened = tf.keras.layers.Flatten()(inceptionv3_output)

    # Step 5: Concatenate the flattened outputs of both models
    combined_features = tf.keras.layers.Concatenate()([resnet50_output_flattened, inceptionv3_output_flattened])

    # Step 6: Final classification layer (Softmax for multi-class classification)
    final_output = tf.keras.layers.Dense(4, activation = "softmax")(combined_features)

    # Create the stacking model
    stacking_model = tf.keras.models.Model(inputs = input_layer, outputs = final_output)
    
    return stacking_model


In [51]:
stacking_model_1 = create_stacking_model_1(resnet50_model, inceptionv3_model)

In [52]:
stacking_model_1.compile(
    optimizer = tf.keras.optimizers.Adam(
        learning_rate = tf.keras.optimizers.schedules.ExponentialDecay(
            initial_learning_rate = 1e-4,
            decay_steps = 1000,
            decay_rate = 0.95
        )
    ), 
    loss = "categorical_crossentropy", 
    metrics = [tfa.metrics.F1Score(num_classes = 4)]
)

In [53]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor = "val_loss", 
    patience = 10, 
    restore_best_weights = True
)

In [54]:
stacking_model_1_history = stacking_model_1.fit(
    train_set,
    validation_data = valid_set,
    epochs = 100,
    callbacks = [early_stopping]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
stacking_model_1.save("models/stacking_model_1.h5")

b. Stacking Model-2

- Base Learner: Inception V3
- Meta Learner: ResNet-50

In [66]:
def create_stacking_model_2(resnet50_model, inceptionv3_model):
    # Step 1: Input for both models (same input shape)
    input_layer = tf.keras.layers.Input(shape = (299, 299, 3))

    # Step 2: Pass input through InceptionV3 base model
    inceptionv3_output = inceptionv3_model(input_layer)

    # Step 3: Pass the same input through ResNet50 model (meta-learner)
    resnet50_output = resnet50_model(input_layer)

    # Step 4: Flatten both outputs
    inceptionv3_output_flattened = tf.keras.layers.Flatten()(inceptionv3_output)
    resnet50_output_flattened = tf.keras.layers.Flatten()(resnet50_output)

    # Step 5: Concatenate the flattened outputs of both models
    combined_features = tf.keras.layers.Concatenate()([inceptionv3_output_flattened, resnet50_output_flattened])

    # Step 6: Final classification layer (Softmax for multi-class classification)
    final_output = tf.keras.layers.Dense(4, activation = "softmax")(combined_features)

    # Create the stacking model
    stacking_model = tf.keras.models.Model(inputs = input_layer, outputs = final_output)
    
    return stacking_model    

In [67]:
stacking_model_2 = create_stacking_model_2(resnet50_model, inceptionv3_model)

In [68]:
stacking_model_2.compile(
    optimizer = tf.keras.optimizers.Adam(
        learning_rate = tf.keras.optimizers.schedules.ExponentialDecay(
            initial_learning_rate = 1e-4,
            decay_steps = 1000,
            decay_rate = 0.95
        )
    ), 
    loss = "categorical_crossentropy", 
    metrics = [tfa.metrics.F1Score(num_classes = 4)]
)

In [69]:
stacking_model_2_history = stacking_model_2.fit(
    train_set,
    validation_data = valid_set,
    epochs = 100,
    callbacks = [early_stopping]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
stacking_model_2.save("models/stacking_model_2.keras")

### E. Model Evaluation

In [None]:
import numpy as np


In [61]:
y_true = test_set.classes
classes = ["carton box", "opened carton box", "wet carton box", "cracked carton box"]

In [62]:
y_true_one_hot = tf.keras.utils.to_categorical(y_true, num_classes = 4)

##### 1. Baseline CNN

In [48]:
y_prob_baseline_cnn = cnn.predict(test_set)



In [49]:
y_pred_baseline_cnn = np.argmax(y_prob_baseline_cnn, axis = 1)

In [50]:
print(classification_report(y_true, y_pred_baseline_cnn, target_names = classes))

                    precision    recall  f1-score   support

        carton box       0.74      0.91      0.82       180
 opened carton box       0.15      0.06      0.08        36
    wet carton box       0.52      0.54      0.53        28
cracked carton box       0.81      0.71      0.76       153

          accuracy                           0.73       397
         macro avg       0.56      0.55      0.55       397
      weighted avg       0.70      0.73      0.71       397



In [51]:
print(confusion_matrix(y_true, y_pred_baseline_cnn))

[[164   5   1  10]
 [ 24   2   0  10]
 [  7   1  15   5]
 [ 26   5  13 109]]


##### 2. ResNet-50

In [52]:
y_prob_resnet50 = resnet50_model.predict(test_set)



In [53]:
y_pred_resnet50 = np.argmax(y_prob_resnet50, axis = 1)

In [54]:
print(classification_report(y_true, y_pred_resnet50, target_names = classes))

                    precision    recall  f1-score   support

        carton box       0.72      0.83      0.77       180
 opened carton box       0.25      0.17      0.20        36
    wet carton box       0.29      0.07      0.11        28
cracked carton box       0.74      0.78      0.76       153

          accuracy                           0.70       397
         macro avg       0.50      0.46      0.46       397
      weighted avg       0.66      0.70      0.67       397



In [55]:
print(confusion_matrix(y_true, y_pred_resnet50))

[[149   8   3  20]
 [ 19   6   0  11]
 [ 14   2   2  10]
 [ 24   8   2 119]]


##### 3. Inception V3

In [56]:
y_prob_inceptionv3 = inceptionv3_model.predict(test_set)



In [57]:
y_pred_inceptionv3 = np.argmax(y_prob_inceptionv3, axis = 1)

In [58]:
print(classification_report(y_true, y_pred_inceptionv3, target_names = classes))

                    precision    recall  f1-score   support

        carton box       0.87      0.75      0.81       180
 opened carton box       0.33      0.47      0.39        36
    wet carton box       0.63      0.68      0.66        28
cracked carton box       0.78      0.82      0.80       153

          accuracy                           0.75       397
         macro avg       0.66      0.68      0.66       397
      weighted avg       0.77      0.75      0.76       397



In [59]:
print(confusion_matrix(y_true, y_pred_inceptionv3))

[[135  29   3  13]
 [  4  17   1  14]
 [  1   0  19   8]
 [ 15   5   7 126]]


##### 4. Ensemble Stacking

a. Stacking Model-1

In [57]:
y_prob_stacking_model_1 = stacking_model_1.predict(test_set)



In [58]:
y_pred_stacking_model_1 = np.argmax(y_prob_stacking_model_1, axis = 1)

In [63]:
print(classification_report(y_true, y_pred_stacking_model_1, target_names = classes))

                    precision    recall  f1-score   support

        carton box       0.86      0.88      0.87       180
 opened carton box       0.59      0.28      0.38        36
    wet carton box       0.75      0.64      0.69        28
cracked carton box       0.76      0.86      0.80       153

          accuracy                           0.80       397
         macro avg       0.74      0.66      0.69       397
      weighted avg       0.79      0.80      0.79       397



In [65]:
print(confusion_matrix(y_true, y_pred_stacking_model_1))

[[158   5   1  16]
 [  7  10   0  19]
 [  3   0  18   7]
 [ 15   2   5 131]]


b. Stacking Model-2

In [72]:
y_prob_stacking_model_2 = stacking_model_2.predict(test_set)



In [73]:
y_pred_stacking_model_2 = np.argmax(y_prob_stacking_model_2, axis = 1)

In [74]:
print(classification_report(y_true, y_pred_stacking_model_2, target_names = classes))

                    precision    recall  f1-score   support

        carton box       0.77      0.93      0.84       180
 opened carton box       0.00      0.00      0.00        36
    wet carton box       0.00      0.00      0.00        28
cracked carton box       0.73      0.87      0.80       153

          accuracy                           0.76       397
         macro avg       0.38      0.45      0.41       397
      weighted avg       0.63      0.76      0.69       397



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [75]:
print(confusion_matrix(y_true, y_pred_stacking_model_2))

[[167   0   0  13]
 [ 20   0   0  16]
 [  9   0   0  19]
 [ 20   0   0 133]]


### F. Conclusion

<table>
    <thead>
        <td> Model </td>
        <td> F1-Score </td>
        <td> Precision </td>
        <td> Recall </td>
        <td> Accuracy </td>
    </thead>
    <tbody>
        <tr>
            <td> Baseline CNN </td>
            <td> 71% </td>
            <td> 70% </td>
            <td> 73% </td>   
            <td> 73% </td>      
        </tr>
        <tr>
            <td> ResNet-50 </td>
            <td> 67% </td>
            <td> 66% </td>
            <td> 70% </td>   
            <td> 67% </td>      
        </tr>
        <tr>
            <td> Inception V3 </td>
            <td> 76% </td>
            <td> 77% </td>
            <td> 75% </td>   
            <td> 75% </td>      
        </tr> 
        <tr>
            <td> Stacking Model-1 </td>
            <td> 79% </td>
            <td> 79% </td>
            <td> 80% </td>   
            <td> 80% </td>      
        </tr>   
        <tr>
            <td> Stacking Model-2 </td>
            <td> 69% </td>
            <td> 63% </td>
            <td> 76% </td>   
            <td> 76% </td>      
        </tr>                  
    </tbody>
</table>