In [3]:
import cv2
import h5py
import numpy as np
from sklearn.cluster import KMeans
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

train_data = r'C:\Users\vrinda\Documents\GitHub\computer-vision-hands-on\Problem\dataset\wood_species_train.hdF5'

# Load data from the HDF5 file
# Load data from the HDF5 file
with h5py.File(train_data, 'r') as f:
    imgs = f['images'][:]
    labels = f['labels'][:]

print("Data shape:", imgs.shape, labels.shape)

# Split the data into training and validation sets
trainX, valX, trainY, valY = train_test_split(imgs, labels, test_size=0.2, random_state=42)

print("Training data shape:", trainX.shape, trainY.shape)
print("Validation data shape:", valX.shape, valY.shape)

# Initialize ORB detector
orb = cv2.ORB_create()

# Extract descriptors and labels for training set
trainX_descriptors = []
trainY_desc = []

for i, img in enumerate(trainX):
    image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    key, desc = orb.detectAndCompute(image, None)
    if desc is not None:
        trainX_descriptors.append(desc)
        trainY_desc.append(trainY[i])

print("Train descriptors:", len(trainX_descriptors))
print("Train labels:", len(trainY_desc))

# Perform k-means clustering on training descriptors
fitX_descriptors = np.vstack(trainX_descriptors)
kmeans = KMeans(n_clusters=10, init='k-means++')
kmeans.fit(fitX_descriptors)

# Generate histograms for training set
hist_list = []
for descriptors in trainX_descriptors:
    pred = kmeans.predict(descriptors)
    histogram, _ = np.histogram(pred, bins=10)
    hist_list.append(histogram)

print("Histograms:", len(hist_list))

# Train SVM classifier
clf = LinearSVC(max_iter=200000)
clf.fit(hist_list, trainY_desc)

# Extract descriptors and labels for validation set
valX_descriptors = []
valY_desc = []

for i, img in enumerate(valX):
    image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    key, desc = orb.detectAndCompute(image, None)
    if desc is not None:
        valX_descriptors.append(desc)
        valY_desc.append(valY[i])

print("Validation descriptors:", len(valX_descriptors))
print("Validation labels:", len(valY_desc))

# Generate histograms for validation set
val_hist_list = []
for descriptors in valX_descriptors:
    pred = kmeans.predict(descriptors)
    histogram, _ = np.histogram(pred, bins=10)
    val_hist_list.append(histogram)

print("Validation histograms:", len(val_hist_list))

# Predict labels for validation set using the trained classifier
val_predictions = clf.predict(val_hist_list)

# Calculate accuracy for validation set
val_accuracy = accuracy_score(valY_desc, val_predictions)
print("Validation accuracy:", val_accuracy * 100)



Data shape: (5708, 200, 200, 3) (5708,)
Training data shape: (4566, 200, 200, 3) (4566,)
Validation data shape: (1142, 200, 200, 3) (1142,)
Train descriptors: 3861
Train labels: 3861




Histograms: 3861




Validation descriptors: 969
Validation labels: 969
Validation histograms: 969
Validation accuracy: 30.753353973168213
