In [1]:
import mediapipe as mp
import cv2
import time  # For defining fps 

In [2]:
mp_objectron = mp.solutions.objectron  # For 3d object detection
mp_drawing = mp.solutions.drawing_utils  # For drawing on images like 3d boundry boxes that we are going to detect and drag around in the image frame

In [3]:
cap = cv2.VideoCapture(0)  #  Whatever the video will capture it will be stored in this cap folder it will tell us which camera to choose 

In [4]:
with mp_objectron.Objectron(static_image_mode = False, #  Using video for object detection thats why False
                            max_num_objects = 2,  #   Maximum number of objects to detect in a frame
                            min_detection_confidence = 0.5,  #   only object detections with confidence scores greater than or equal to 0.5 will be considered valid.  the detected object belongs to a certain class and is correctly localized within the image.
                            min_tracking_confidence = 0.8,  # Tracking the points should be at least this confidence level
                            model_name= "Cup") as objectron:  #as objectron means Perform objectron-related operations using the 'objectron' object
    
    while cap.isOpened():  #  While our webcam is opened on the computer then we will perform the below task
        
        
        success,image = cap.read()  #  cap.red will  read frames from the camera and store it into image variable and success will define weather the image is true or false.
        start =  time.time()  # Starting time of how long it takes to process one image and have this 3d object detection algorithm running
        
        # Convert the BGR image to RGB
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        # To improve performance, optionally mark the image as not writable
        # To pass the reference
        image.flags.writeable = False;   #  Any attempt to modify the image data will raise an error
        results = objectron.process(image);  #  Processing  the frames and getting the output from it
        
        image.flags.writeable = True; #  Writeable flag is set to True to allow modifications to the array.
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
        
        if results.detected_objects: 
            for detected_objects in results.detected_objects:  #  The for loop is running through all the objects present in the image and storing it in the results variable
                mp_drawing.draw_landmarks(image, detected_objects.landmarks_2d, mp_objectron.BOX_CONNECTIONS)
                mp_drawing.draw_axis(image, detected_objects.rotation, detected_objects.translation)
                
        end =  time.time() 
        totalTime = end - start 
        fps = 1/totalTime  #  Dividing 1 by this value gives you the instantaneous FPS for that frame.
        
        cv2.putText(image, f'FPS: {int(fps)}', (20,70), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0,255,0), 2)
        cv2.imshow('MediaPipe Objectron', image)
        if cv2.waitKey (5) & 0xFF == 27:
            break
cap.release()
cv2.destroyAllWindows()