In [98]:
import numpy as np
from pathlib import Path
import cv2
import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import classification_report
from tensorflow.keras import backend as K
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [100]:
# Define the path to the dataset
dataset_path = Path("D:/SVM project/Images")

# Set up ImageDataGenerators for training, validation, and testing
train_datagen = ImageDataGenerator(
    rescale=1./255,  # Normalize pixel values to [0, 1]
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator(
    rescale=1./255  # Normalize pixel values to [0, 1]
)

# Define the parameters for the flow_from_directory method
batch_size = 12
img_height, img_width = 128, 128

# Create generators
train_generator = train_datagen.flow_from_directory(
    directory=dataset_path,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='sparse',  # Use 'sparse' for integer labels
    shuffle=True
)

# Split the training data into training and validation sets
# First, generate a small sample of data to estimate the validation split
train_samples = train_generator.samples
train_steps_per_epoch = np.ceil(train_samples / batch_size)

# Split training data for validation
split_ratio = 0.8
val_samples = int(train_samples * (1 - split_ratio))
train_samples = train_samples - val_samples

# Define validation generator
val_generator = val_datagen.flow_from_directory(
    directory=dataset_path,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='sparse',  # Use 'sparse' for integer labels
    shuffle=False
)

# Define test generator similarly
test_generator = val_datagen.flow_from_directory(
    directory=dataset_path,  # Use a separate test directory if you have one
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='sparse',  # Use 'sparse' for integer labels
    shuffle=False
)

print("Data Generators Setup Complete")

Found 20580 images belonging to 120 classes.
Found 20580 images belonging to 120 classes.
Found 20580 images belonging to 120 classes.
Data Generators Setup Complete


In [104]:
# Load a pre-trained VGG16 model for feature extraction
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(img_height, img_width, 3))
base_model.trainable = False

def extract_features(generator, model):
    features = []
    labels = []
    start_time = time.time()  # Start time for progress tracking
    
    # Determine the total number of batches if possible
    total_batches = len(generator) if hasattr(generator, 'n') else None
    
    while True:
        try:
            batch_images, batch_labels = next(generator)
            batch_features = model.predict(batch_images)
            batch_features = batch_features.reshape((batch_features.shape[0], -1))
            features.append(batch_features)
            labels.append(batch_labels)
            
            # Clear session to free memory
            K.clear_session()
            
            # Progress update
            if total_batches is not None:
                elapsed_time = time.time() - start_time
                estimated_time = (elapsed_time / (len(features))) * (total_batches - len(features))
                print(f"Processing batch {len(features)}/{total_batches} - Estimated time left: {estimated_time:.2f} seconds")
        
        except StopIteration:
            # End of generator
            break
    
    return np.concatenate(features), np.concatenate(labels)

# Extract features for training and validation sets
train_features, train_labels = extract_features(train_generator, base_model)
val_features, val_labels = extract_features(val_generator, base_model)

# Save features and labels for later use
np.save('D:/SVM project/train_features.npy', train_features)
np.save('D:/SVM project/train_labels.npy', train_labels)
np.save('D:/SVM project/val_features.npy', val_features)
np.save('D:/SVM project/val_labels.npy', val_labels)

print("Feature extraction complete and data saved.")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 769ms/step
Processing batch 1/1715 - Estimated time left: 2473.52 seconds
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 643ms/step
Processing batch 2/1715 - Estimated time left: 2096.19 seconds
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 598ms/step
Processing batch 3/1715 - Estimated time left: 1933.79 seconds
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 614ms/step
Processing batch 4/1715 - Estimated time left: 1865.49 seconds
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 593ms/step
Processing batch 5/1715 - Estimated time left: 1832.41 seconds
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 790ms/step
Processing batch 6/1715 - Estimated time left: 1874.82 seconds
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 754ms/step
Processing batch 7/1715 - Estimated time left: 1889.79 seconds
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

MemoryError: Unable to allocate 2.25 MiB for an array with shape (12, 128, 128, 3) and data type float32