In [None]:
"""
Load data module
The dataset is divided into training and testing sets, each containing positive and negative samples

Dataset
│
├── train                # Training dataset
│   ├── pos              # Positive samples
│   └── neg              # Negative samples
│
└── test                 # Testing dataset
    ├── pos              # Positive samples
    └── neg              # Negative samples
"""

import os
import cv2

# Traverse the directory to load images and resize them to the specified size
def traversal_data(path, image_size):
    images_list = []
    for file in os.listdir(path):
        img_path = os.path.join(path, file)
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if img is not None:
            img = cv2.resize(img, image_size)
            images_list.append(img)
    return images_list

# Load positive and negative samples from the given directories
def load_dataset(pos_dir, neg_dir, image_size):
    positive =  traversal_data(pos_dir, image_size)
    negative =  traversal_data(neg_dir, image_size)
    return positive, negative

# # Given the dataset path and the image size to resize, load the training and testing sets
dataset = r"C:\Users\Xiaolong\Desktop\peopleDet\DataSet"
image_size=(32, 96)
train = os.path.join(dataset, 'train')
test = os.path.join(dataset, 'test')

# Print the number of loaded samples for both training and testing sets
train_positive, train_negative = load_dataset(os.path.join(train, 'pos'), os.path.join(train, 'neg'), image_size)
test_positive, test_negative = load_dataset(os.path.join(test, 'pos'), os.path.join(test, 'neg'), image_size)
print(f"Loaded {len(train_positive)} positive samples and {len(train_negative)} negative samples.")
print(f"Loaded {len(test_positive)} positive samples and {len(test_negative)} negative samples.")

In [None]:
"""
Data preprocessing: Extract HOG features and generate corresponding labels, stored in X and y respectively, using default HOG parameters.
Train a non-linear SVM model, select the best model parameters through cross-validation, and save the corresponding model.
"""

import numpy as np
from skimage.feature import hog
from itertools import chain
from sklearn.svm import LinearSVC, SVC
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
import joblib

# positive: samples containing human bodies; negative: samples without human bodies
# Extract HOG features and generate corresponding labels, stored in X and y respectively, using default HOG parameters
X_train = np.array([hog(im) for im in chain(train_positive, train_negative)])
y_train = np.hstack([np.ones(len(train_positive)), np.zeros(len(train_negative))])
X_test = np.array([hog(im) for im in chain(test_positive, test_negative)])
y_test = np.hstack([np.ones(len(test_positive)), np.zeros(len(test_negative))])

# Data standardization
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
joblib.dump(scaler, r"model\scaler.pkl")

# model = LinearSVC()  # Linear
model = SVC() # Non-linear

# Select the best model parameters through cross-validation and save the model
param_grid = {
    'C': [0.1, 1, 10, 100],  # Regularization parameter
    'kernel': ['linear', 'rbf'],  # Kernel function type
    'gamma': ['scale', 'auto', 0.1, 0.01, 0.001]  # Kernel function parameter
}
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='accuracy')

# Train
grid_search.fit(X_train, y_train)
# Output the best parameters and the best score
print("Best parameters:", grid_search.best_params_)
print("Best cross-validation score: {:.2f}".format(grid_search.best_score_))
# Use the best parameters to fit the test set
model_best_svm = grid_search.best_estimator_
print("Test set score: {:.2f}".format(model_best_svm.score(X_test, y_test)))

joblib.dump(model_best_svm, r"model\model_best_svm.pkl")

In [None]:
"""
Using a sliding window mechanism to traverse different sized images, 
extract HOG features from the current window, 
calculate distances using the SVM model, and filter out redundant boxes through NMS.
"""
import cv2
import joblib
import matplotlib.pyplot as plt
import numpy as np
import joblib
from skimage.feature import hog
from imutils.object_detection import non_max_suppression

# Sliding window
def sliding_window(image, step_size, window_size):
    for y in range(0, image.shape[0] - window_size[1] + 1, step_size):
        for x in range(0, image.shape[1] - window_size[0] + 1, step_size):
            yield (x, y, image[y:y + window_size[1], x:x + window_size[0]])

# Pyramid
def pyramid(image, scale=1.2, min_size=(64, 128)):
    yield image
    while image.shape[0] > min_size[1] and image.shape[1] > min_size[0]:
        image = cv2.resize(image, 
                           (int(image.shape[1] / scale), int(image.shape[0] / scale)), 
                           interpolation=cv2.INTER_LINEAR)
        yield image

# Adjust the size of the box
def adjust_detection_boxes(detections):
    adjusted_boxes = []
    for (x1, y1, x2, y2, scale) in detections:
        adjusted_boxes.append((int(x1 * scale), int(y1 * scale), int(x2 * scale), int(y2 * scale)))
    return adjusted_boxes

# Sliding window detection
def detect(image, svm, scaler, window_size, step_size, scale, min_size, dis_thre):
    detections = []
    for resized_image in pyramid(image, scale, min_size):
        # Calculate the scaling factor for the current pyramid layer
        scale = image.shape[0] / float(resized_image.shape[0])  
        for (x, y, window) in sliding_window(resized_image, step_size, window_size):
            if window.shape[:2] != (window_size[1], window_size[0]):
                continue
            # Extract HOG features, using default hog parameters
            features = hog(window)
            features = scaler.transform([features])
            # SVM calculates the distance of the test sample to the decision plane
            dis = svm.decision_function(features)
            if dis > dis_thre:
                detections.append((x, y, x + window_size[0], y + window_size[1], scale))
    detections = adjust_detection_boxes(detections)
    return detections

# Filter out redundant boxes through nms
def apply_nms(detections, overlap_thresh=0.1):
    rects = np.array([[x, y, x_end, y_end]for (x, y, x_end, y_end) in detections])
    return non_max_suppression(rects, probs=None, overlapThresh=overlap_thresh)

# Test process
imgp = r"test_imgs\1.jpeg"
image_bgr = cv2.imread(imgp)
image_gray = cv2.cvtColor(image_bgr.copy(), cv2.COLOR_BGR2GRAY)
Scaler = joblib.load(r"model\scaler.pkl")
SVM_model= joblib.load(r"model\model_best_svm.pkl")

window_size = (32, 96)  # Sliding window size, must be consistent with the size of the feature extracted during training
step_size = 8           # Sliding window step size
min_size = (64, 128)    # Final size of the image pyramid
scale = 1.2             # Scaling factor for the image pyramid
dis_thre = 1.0          # Threshold: distance of the sample point to the decision plane

detections = detect(image_gray, SVM_model, Scaler, window_size, step_size, scale, min_size, dis_thre)
final_detections = apply_nms(detections)

for (x, y, x_end, y_end) in final_detections:
    cv2.rectangle(image_bgr, (x, y), (x_end, y_end), (0, 255, 0), 2)
plt.imshow(cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB))
plt.show()