In [None]:
import os
import zipfile
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt

In [None]:
# Unzip the dataset
zip_file = "resized_data (1).zip"
extraction_path = "./resized_data"
with zipfile.ZipFile(zip_file, 'r') as zip_ref:
    zip_ref.extractall(extraction_path)

In [None]:
# Define dataset directories and parameters
data_dir = extraction_path
batch_size = 32
img_height = 224
img_width = 224
num_classes = 4
epochs = 10

In [None]:
# Prepare K-fold cross-validation
k = 5
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Prepare data augmentation
datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    horizontal_flip=True,
    zoom_range=0.2,
    rotation_range=10,
    validation_split=0.2
)

# Load and preprocess images for k-fold
image_generator = datagen.flow_from_directory(
    data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=True
)

# Convert file paths to NumPy array for indexing
filepaths = np.array(image_generator.filepaths)

# K-fold cross-validation
fold_no = 1
accuracy_per_fold = []

for train_idx, val_idx in kf.split(filepaths):
    # Split file paths into training and validation sets
    train_filepaths = filepaths[train_idx]
    val_filepaths = filepaths[val_idx]

    # Extract class labels from file paths
    train_labels = [os.path.basename(os.path.dirname(path)) for path in train_filepaths]
    val_labels = [os.path.basename(os.path.dirname(path)) for path in val_filepaths]

    # Create DataFrames for train and validation data
    train_df = pd.DataFrame({"filename": train_filepaths, "class": train_labels})
    val_df = pd.DataFrame({"filename": val_filepaths, "class": val_labels})

    # Create train and validation generators
    train_gen = datagen.flow_from_dataframe(
        dataframe=train_df,
        x_col="filename",
        y_col="class",
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=True
    )
    val_gen = datagen.flow_from_dataframe(
        dataframe=val_df,
        x_col="filename",
        y_col="class",
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=False
    )
        # Define the model
    base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(img_height, img_width, 3))
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    predictions = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)

    # Freeze base layers
    for layer in base_model.layers:
        layer.trainable = False

    # Compile the model
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    # Early stopping
    early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

    # Train the model
    print(f"Training for fold {fold_no}...")
    history = model.fit(
        train_gen,
        validation_data=val_gen,
        epochs=epochs,
        callbacks=[early_stopping],
        verbose=1
    )


In [None]:
# Evaluate the model
scores = model.evaluate(val_gen, verbose=0)
print(f"Fold {fold_no} - Accuracy: {scores[1]*100:.2f}%")
accuracy_per_fold.append(scores[1])

fold_no += 1

In [None]:
print(f"Average Accuracy: {np.mean(accuracy_per_fold)*100:.2f}%")

# Plot training and validation accuracy for the last fold
plt.figure(figsize=(10, 5))
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy (Last Fold)')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()