In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import DenseNet121, ResNet50, InceptionV3
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
def load_dataset(data_dir, img_size=(224, 224), batch_size=32):
    datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
    train_gen = datagen.flow_from_directory(
        data_dir, target_size=img_size, batch_size=batch_size, subset='training', class_mode='categorical')
    val_gen = datagen.flow_from_directory(
        data_dir, target_size=img_size, batch_size=batch_size, subset='validation', class_mode='categorical')
    return train_gen, val_gen

In [3]:
def apply_pca(X_train, n_components=100):
    pca = PCA(n_components=n_components)
    X_train_pca = pca.fit_transform(X_train)
    return X_train_pca, pca

In [4]:
def create_model(base_model, n_classes):
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    predictions = Dense(n_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    return model

def load_models(n_classes):
    googlenet_base = InceptionV3(weights='imagenet', include_top=False)
    densenet_base = DenseNet121(weights='imagenet', include_top=False)
    resnet_base = ResNet50(weights='imagenet', include_top=False)

    googlenet = create_model(googlenet_base, n_classes)
    densenet = create_model(densenet_base, n_classes)
    resnet = create_model(resnet_base, n_classes)

    return googlenet, densenet, resnet

In [5]:
def train_model(model, train_gen, val_gen, epochs=15):
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    model.fit(train_gen, validation_data=val_gen, epochs=epochs)
    return model

In [6]:
def ensemble_predict(models, X_test):
    predictions = [model.predict(X_test) for model in models]
    avg_predictions = np.mean(predictions, axis=0)
    final_predictions = np.argmax(avg_predictions, axis=1)
    return final_predictions

In [7]:
def save_models(models, model_names):
    for model, name in zip(models, model_names):
        model.save(f'{name}.h5')

In [8]:
if __name__ == "__main__":
    data_dir = '/kaggle/input/whole-proper-dataset-for-freshness-detection/FRESHNESSDATASET(PROPER)'

    # Load and preprocess the dataset
    train_gen, val_gen = load_dataset(data_dir)
    n_classes = len(train_gen.class_indices)

    # Load models
    googlenet, densenet, resnet = load_models(n_classes)

    # Train models
    googlenet = train_model(googlenet, train_gen, val_gen)
    densenet = train_model(densenet, train_gen, val_gen)
    resnet = train_model(resnet, train_gen, val_gen)
    
    # Save models
    save_models([googlenet, densenet, resnet], ['googlenet', 'densenet', 'resnet'])

    # Test accuracy using ensemble

    X_test, y_test = next(val_gen)  # Correct way to fetch the next batch from DirectoryIterator
    y_pred = ensemble_predict([googlenet, densenet, resnet], X_test)
    accuracy = accuracy_score(np.argmax(y_test, axis=1), y_pred)

    print(f'Ensemble Test Accuracy: {accuracy * 100:.2f}%')

Found 14927 images belonging to 26 classes.
Found 3715 images belonging to 26 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m87910968/87910968[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m29084464/29084464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/15


  self._warn_if_super_not_called()
I0000 00:00:1729561746.301579      77 service.cc:145] XLA service 0x7d76dc004af0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1729561746.301631      77 service.cc:153]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0


[1m  1/467[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m9:14:26[0m 71s/step - accuracy: 0.0312 - loss: 3.4448

I0000 00:00:1729561780.619554      77 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m111/467[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m2:53[0m 488ms/step - accuracy: 0.3955 - loss: 2.0923



[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m249s[0m 382ms/step - accuracy: 0.5453 - loss: 1.4435 - val_accuracy: 0.2398 - val_loss: 3.8621
Epoch 2/15
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 175ms/step - accuracy: 0.7693 - loss: 0.6335 - val_accuracy: 0.5728 - val_loss: 1.6158
Epoch 3/15
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 172ms/step - accuracy: 0.8341 - loss: 0.4659 - val_accuracy: 0.6587 - val_loss: 1.1823
Epoch 4/15
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 173ms/step - accuracy: 0.8447 - loss: 0.4191 - val_accuracy: 0.6721 - val_loss: 1.1135
Epoch 5/15
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 174ms/step - accuracy: 0.8606 - loss: 0.3667 - val_accuracy: 0.7052 - val_loss: 1.1629
Epoch 6/15
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 173ms/step - accuracy: 0.8815 - loss: 0.3220 - val_accuracy: 0.6778 - val_loss: 1.1952
Epoch 7/15
[1m467/




[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m168s[0m 241ms/step - accuracy: 0.5451 - loss: 1.4811 - val_accuracy: 0.0328 - val_loss: 8.2027
Epoch 2/15
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 185ms/step - accuracy: 0.8103 - loss: 0.5163 - val_accuracy: 0.4156 - val_loss: 2.1525
Epoch 3/15
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 186ms/step - accuracy: 0.8491 - loss: 0.4017 - val_accuracy: 0.6945 - val_loss: 0.8757
Epoch 4/15
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 187ms/step - accuracy: 0.8795 - loss: 0.3036 - val_accuracy: 0.6471 - val_loss: 1.1718
Epoch 5/15
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 184ms/step - accuracy: 0.8976 - loss: 0.2710 - val_accuracy: 0.6124 - val_loss: 1.4868
Epoch 6/15
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 186ms/step - accuracy: 0.9074 - loss: 0.2334 - val_accuracy: 0.6584 - val_loss: 1.2560
Epoch 7/15
[1m467/4

In [9]:
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, log_loss
import numpy as np


In [10]:
def evaluate_model(y_test, y_pred, num_classes):
    # Flatten the one-hot encoded test labels
    y_test_flat = np.argmax(y_test, axis=1)
    
    # Calculate metrics
    accuracy = accuracy_score(y_test_flat, y_pred)
    precision = precision_score(y_test_flat, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_test_flat, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test_flat, y_pred, average='weighted', zero_division=0)

    # Ensure that y_pred is properly encoded as a one-hot matrix based on num_classes
    y_pred_one_hot = np.eye(num_classes)[y_pred]
    
    # Truncate y_test and y_pred_one_hot to the number of unique classes in the predictions
    truncated_y_test = y_test[:, :num_classes]  # Truncate y_test to match the number of classes in y_pred
    truncated_y_pred_one_hot = y_pred_one_hot[:, :num_classes]  # Truncate predictions if needed

    # Calculate ROC AUC and Log Loss if applicable
    try:
        roc_auc = roc_auc_score(truncated_y_test, truncated_y_pred_one_hot, multi_class='ovr', average='weighted')
        log_loss_value = log_loss(truncated_y_test, truncated_y_pred_one_hot)
    except ValueError:
        roc_auc = np.nan
        log_loss_value = np.nan
        print("ROC AUC and Log Loss cannot be calculated due to shape mismatch.")
      # Print metrics
    print(f"Accuracy: {accuracy * 100:.2f}%")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print(f"ROC AUC: {roc_auc:.4f}")
    print(f"Log Loss: {log_loss_value:.4f}")
    
    return accuracy, precision, recall, f1, roc_auc, log_loss_value

In [11]:
# Example call
# Assuming X_test and y_test are the test data from the validation set
X_test, y_test = next(val_gen)  # Get the test data from validation generator

# Assuming y_pred is the predicted class indices from the ensemble model
y_pred = ensemble_predict([googlenet, densenet, resnet], X_test)

# Number of classes in the dataset
num_classes = len(train_gen.class_indices)

# Call the evaluate_model function
accuracy, precision, recall, f1, roc_auc, log_loss_value = evaluate_model(y_test, y_pred, num_classes)

# The function will print metrics and return the values

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
ROC AUC and Log Loss cannot be calculated due to shape mismatch.
Accuracy: 78.12%
Precision: 0.8073
Recall: 0.7812
F1 Score: 0.7812
ROC AUC: nan
Log Loss: nan
