In [4]:
import numpy as np
import os
import cv2
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import Model
import time  # To track detection time

# Load a pre-trained CNN for feature extraction
base_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')

# Function to load images and labels from the dataset
def load_images_and_labels(data_dir):
    images = []
    labels = []
    for label in os.listdir(data_dir):
        folder_path = os.path.join(data_dir, label)
        if os.path.isdir(folder_path):
            for img_name in os.listdir(folder_path):
                img_path = os.path.join(folder_path, img_name)
                img = cv2.imread(img_path)
                img = cv2.resize(img, (224, 224))  # Resize for ResNet50
                img = preprocess_input(img)  # Preprocess for ResNet50
                images.append(img)
                labels.append(label)
    return np.array(images), np.array(labels)

# Load the dataset
data_dir = 'Dataset'  # Update this with your dataset path
images, labels = load_images_and_labels(data_dir)

# Extract features using the pre-trained CNN
features = base_model.predict(images)

# Encode the labels using LabelEncoder
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels_encoded, test_size=0.2, random_state=42)

# Create the XGBoost classifier
xgb_clf = xgb.XGBClassifier(objective='multi:softmax', num_class=len(np.unique(y_train)))

# Train the XGBoost model
xgb_clf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = xgb_clf.predict(X_test)

# Evaluate the model's accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Real-time object detection using camera
cap = cv2.VideoCapture(0)  # Open the webcam

detected_counts = {}
label_names = label_encoder.classes_

# Initialize counts for each class (excluding "null")
for label in label_names:
    detected_counts[label] = 0

# Variables to keep track of detection time
current_label = "null"
start_time = time.time()
detection_threshold = 3  # seconds

while True:
    ret, frame = cap.read()  # Read a frame from the camera
    if not ret:
        break

    # Preprocess the frame for ResNet50
    img = cv2.resize(frame, (224, 224))
    img = preprocess_input(img)
    img = np.expand_dims(img, axis=0)

    # Extract features from the frame using ResNet50
    features = base_model.predict(img)

    # Predict the class using XGBoost
    prediction = xgb_clf.predict(features)

    # Get the predicted label
    predicted_label = label_encoder.inverse_transform(prediction)[0]

    # Check if the prediction is confident enough, else assign "null"
    confidence = np.max(xgb_clf.predict_proba(features))
    if confidence < 0.5:  # You can adjust this threshold for "null" detection
        predicted_label = "null"

    # Check if the detected label has changed
    if predicted_label != current_label:
        # Reset the timer if a new label is detected
        current_label = predicted_label
        start_time = time.time()
    else:
        # Calculate how long the current label has been detected
        elapsed_time = time.time() - start_time

        # Count the object only if it's detected for more than the threshold and it's not "null"
        if elapsed_time > detection_threshold and current_label != "null":
            detected_counts[current_label] += 1
            # Reset the timer after counting
            start_time = time.time()

    # Display the result on the screen
    cv2.putText(frame, f"Detected: {predicted_label}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv2.imshow('Object Detection', frame)

    # Quit if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()  # Release the camera
cv2.destroyAllWindows()  # Close all OpenCV windows

# Print the total counts of each detected object, excluding "null"
for label, count in detected_counts.items():
    print(f"{label}: {count}")


[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m120s[0m 2s/step
Accuracy: 91.47%
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 110ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 122ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 90ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m