In [1]:
import cv2
import numpy as np
import os
from sklearn.cluster import KMeans
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Read Folders
folder_path = 'Bone_Fracture_Binary_Classification'
print("Folders in 'Bone_Fracture_Binary_Classification':", os.listdir(folder_path))

subfolder_path_train = os.path.join(folder_path, 'train')
subfolder_path_val = os.path.join(folder_path, 'val')
subfolder_path_test = os.path.join(folder_path, 'test')

print("Files in 'train':", os.listdir(subfolder_path_train))
print("Files in 'val':", os.listdir(subfolder_path_val))
print("Files in 'test':", os.listdir(subfolder_path_test))


Folders in 'Bone_Fracture_Binary_Classification': ['test', 'train', 'val']
Files in 'train': ['fractured', 'not fractured']
Files in 'val': ['fractured', 'not fractured']
Files in 'test': ['fractured', 'not fractured']


In [3]:
#Preprocessing Functions
def preprocess_image(img):

    # Step 1: Convert to gray For SIFT
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Step 2: Resize Images
    resized = cv2.resize(gray, (224, 224))

    # Step 3: Image Enhancement using CLAHE ()
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    enhanced = clahe.apply(resized)

    # Step 4: Using Gaussian Blur to remove Noise
    denoised = cv2.GaussianBlur(enhanced, (3, 3), 0)

    return denoised

In [4]:
# Apply Preprocessing on data

def load_and_preprocess_images(folder_path):
    images = []
    labels = []

    for class_name in ['fractured', 'not fractured']:
        class_path = os.path.join(folder_path, class_name)
        label = 1 if class_name == 'fractured' else 0

        for filename in os.listdir(class_path):
            if filename.lower().endswith(('.jpg', '.png', '.jpeg')):
                img_path = os.path.join(class_path, filename)
                img = cv2.imread(img_path)
                if img is not None:
                    processed_img = preprocess_image(img)
                    images.append(processed_img)
                    labels.append(label)
    return np.array(images), np.array(labels)

In [5]:
X_train, y_train = load_and_preprocess_images(os.path.join(folder_path, 'train'))
X_val, y_val     = load_and_preprocess_images(os.path.join(folder_path, 'val'))
X_test, y_test   = load_and_preprocess_images(os.path.join(folder_path, 'test'))

print(f"Train set: {X_train.shape}, Labels: {y_train.shape}")
print(f"Val set:   {X_val.shape}, Labels: {y_val.shape}")
print(f"Test set:  {X_test.shape}, Labels: {y_test.shape}")

Train set: (9243, 224, 224), Labels: (9243,)
Val set:   (829, 224, 224), Labels: (829,)
Test set:  (506, 224, 224), Labels: (506,)


In [6]:
# SIFT Feature Extraction

def extract_sift_features(folder_path):
    sift = cv2.SIFT_create()
    features = {}

    for label in os.listdir(folder_path):
        label_path = os.path.join(folder_path, label)

        if not os.path.isdir(label_path):
            print(f"Skipping {label_path}")
            continue

        features[label] = {}

        for img_name in os.listdir(label_path):
            img_path = os.path.join(label_path, img_name)

            try:
                img = cv2.imread(img_path)
                if img is None:
                    continue  # Skip this image if it cannot be read

                preprocessed = preprocess_image(img)
                keypoints, descriptors = sift.detectAndCompute(preprocessed, None)

                features[label][img_name] = {
                    "Keypoints Numbers": len(keypoints),
                    "Descriptors Shape": descriptors.shape if descriptors is not None else (0, 0)
                }

            except Exception as e:
                print(f"Failed to process image: {img_path} — {e}")
                continue

    print("SIFT feature extraction completed.")
    return features


In [7]:
# Aplly SIFT Features Extraction on Data
features = extract_sift_features("Bone_Fracture_Binary_Classification/train")
features = extract_sift_features("Bone_Fracture_Binary_Classification/test")
features = extract_sift_features("Bone_Fracture_Binary_Classification/val")

SIFT feature extraction completed.
SIFT feature extraction completed.
SIFT feature extraction completed.


In [8]:
def extract_all_sift_descriptors(folder_path):
    sift = cv2.SIFT_create()
    all_descriptors = []

    for label in os.listdir(folder_path):
        label_path = os.path.join(folder_path, label)

        if not os.path.isdir(label_path):
            continue

        for img_name in os.listdir(label_path):
            img_path = os.path.join(label_path, img_name)
            img = cv2.imread(img_path)
            if img is None:
                continue

            preprocessed = preprocess_image(img)
            _, descriptors = sift.detectAndCompute(preprocessed, None)
            if descriptors is not None:
                all_descriptors.extend(descriptors)

    return np.array(all_descriptors)

# Extract descriptors from training images
all_descriptors = extract_all_sift_descriptors(subfolder_path_train)
print("Total extracted descriptors:", all_descriptors.shape)


Total extracted descriptors: (1399195, 128)


In [None]:
K = 100  
kmeans = KMeans(n_clusters=K, random_state=42)
kmeans.fit(all_descriptors)

visual_words = kmeans.cluster_centers_
print("Visual vocabulary shape:", visual_words.shape)

Visual vocabulary shape: (100, 128)


In [None]:
def compute_bow_histogram(img, sift, kmeans):
    preprocessed = preprocess_image(img)
    _, descriptors = sift.detectAndCompute(preprocessed, None)

    if descriptors is None:
        return np.zeros(K)  # Empty histogram

    labels = kmeans.predict(descriptors)
    hist = np.bincount(labels, minlength=K)

    return hist

In [11]:
def extract_bow_features(folder_path):
    sift = cv2.SIFT_create()
    X = []
    y = []

    for class_name in ['fractured', 'not fractured']:
        class_path = os.path.join(folder_path, class_name)
        label = 1 if class_name == 'fractured' else 0

        for img_name in os.listdir(class_path):
            img_path = os.path.join(class_path, img_name)
            img = cv2.imread(img_path)
            if img is None:
                continue

            hist = compute_bow_histogram(img, sift, kmeans)
            X.append(hist)
            y.append(label)

    return np.array(X), np.array(y)

In [12]:
# Convert images to BoW histograms
x_train, y_train = extract_bow_features(subfolder_path_train)
x_val, y_val = extract_bow_features(subfolder_path_val)
x_test, y_test = extract_bow_features(subfolder_path_test)

print(f"BoW Train Set: {x_train.shape}, Labels: {y_train.shape}")
print(f"BoW Validation Set: {x_val.shape}, Labels: {y_val.shape}")
print(f"BoW Test Set: {x_test.shape}, Labels: {y_test.shape}")

BoW Train Set: (9243, 100), Labels: (9243,)
BoW Validation Set: (829, 100), Labels: (829,)
BoW Test Set: (506, 100), Labels: (506,)


In [13]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix

svc = SVC()
svc.fit(x_train, y_train)

y_val_pred = svc.predict(x_val)
y_pred = svc.predict(x_test)

In [14]:
# Evaluate accuracy
val_accuracy = accuracy_score(y_val, y_val_pred)
print("Validation Accuracy: ", val_accuracy)
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy: ", accuracy)

Validation Accuracy:  0.9360675512665863
Test Accuracy:  0.950592885375494


In [15]:
# Confusion Matrix
print(f'Validation Confusion Matrix: \n{confusion_matrix(y_val, y_val_pred)}\n')
print(f'Test Confusion Matrix: \n{confusion_matrix(y_test, y_pred)}')

Validation Confusion Matrix: 
[[474  18]
 [ 35 302]]

Test Confusion Matrix: 
[[250  18]
 [  7 231]]
