In [5]:
# Install kagglehub
!pip install -q kagglehub

import kagglehub
import shutil
import os

# Download the dataset (downloads to a temp path by default)
dataset_path = kagglehub.dataset_download("pulavendranselvaraj/oasis-dataset")

# Define target path in /content
target_path = "/content/oasis_dataset"

# Copy downloaded files to /content
shutil.copytree(dataset_path, target_path, dirs_exist_ok=True)

print("Dataset saved to:", target_path)
print("Files:", os.listdir(target_path))


Dataset saved to: /content/oasis_dataset
Files: ['input']


In [6]:
import os

print("Inside /content/oasis_dataset:")
print(os.listdir("/content/oasis_dataset"))


Inside /content/oasis_dataset:
['input']


In [7]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Set image dimensions and batch size
img_size = (208, 176)
batch_size = 32

# Load datasets with 80/20 split
data_dir = "/content/oasis_dataset/input"

train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="training",
    seed=42,
    image_size=img_size,
    batch_size=batch_size,
    color_mode='grayscale',
    label_mode='int'
)

val_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="validation",
    seed=42,
    image_size=img_size,
    batch_size=batch_size,
    color_mode='grayscale',
    label_mode='int'
)

Found 9488 files belonging to 4 classes.
Using 7591 files for training.
Found 9488 files belonging to 4 classes.
Using 1897 files for validation.


In [10]:
#Define preprocessing function
def preprocess_dataset(dataset, base_model):
    all_features = []
    all_labels = []

    for images, labels in dataset:
        #Convert grayscale images to RGB
        images = tf.image.grayscale_to_rgb(images)
        #Preprocess the images for VGG16
        images = preprocess_input(images)
        #Extract features
        features = base_model.predict(images)
        all_features.append(features)
        all_labels.append(labels.numpy())

    return np.vstack(all_features), np.concatenate(all_labels)

#Initialize VGG16 model for feature extraction
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(*img_size, 3), pooling='avg')

#Extract features and labels
train_features, train_labels = preprocess_dataset(train_dataset, base_model)
test_features, test_labels = preprocess_dataset(val_dataset, base_model)

#Flatten the labels if they're one-hot encoded
train_labels = np.argmax(train_labels, axis=1) if train_labels.ndim > 1 else train_labels
test_labels = np.argmax(test_labels, axis=1) if test_labels.ndim > 1 else test_labels

#Split the training data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(train_features, train_labels, test_size=0.2, random_state=42)

#Scale the feature data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
test_features = scaler.transform(test_features)



Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 0us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 10s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 224ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 176ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 171ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 171ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 167ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 167ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 164ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 163ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 163ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [11]:
#Train SVM classifier
svm = SVC(kernel='linear', C=1.0, random_state=42)
svm.fit(X_train, y_train)

#Validate the SVM classifier
val_predictions = svm.predict(X_val)
val_accuracy = accuracy_score(y_val, val_predictions)
print(f'Validation Accuracy: {val_accuracy:.4f}')

#Evaluate the SVM classifier on the test set
test_predictions = svm.predict(test_features)
test_accuracy = accuracy_score(test_labels, test_predictions)
print(f'Test Accuracy: {test_accuracy:.4f}')

Validation Accuracy: 0.9598
Test Accuracy: 0.9605


In [12]:
import pickle

# Save the trained SVM model to a file
with open('/content/svm_model.pkl', 'wb') as file:
    pickle.dump(svm, file)

print("SVM model saved as 'svm_model.pkl' in /content/")


SVM model saved as 'svm_model.pkl' in /content/
