In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import torch
import numpy as np
import cv2
# import pafy
from time import time


class ObjectDetection:
    """
    Class implements Yolov7 model to make inferences on a video using OpenCV.
    """
    
    def __init__(self, directory, out_file, bike_rider_model, helmet_npr_model):
        """
        Initializes the class with youtube url and output file.
        :param url: Has to be as youtube URL,on which prediction is made.
        :param out_file: A valid output file name.
        """
        self.directory = directory
        self.bike_rider_model = (bike_rider_model)
        self.helmet_npr_model = (helmet_npr_model)
        self.out_file = out_file
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        print("\n\nDevice Used:",self.device)


    def load_bike_model(self, bike_rider_model):
        """
        Loads Yolov7 model from pytorch hub.
        :return: Trained Pytorch model.
        """
        model_bike = torch.hub.load('WongKinYiu/yolov7', 'custom', path_or_model=bike_rider_model, force_reload=True,trust_repo=True)
        return model_bike

    def load_npr_model(self, helmet_npr_model):
        """
        Loads Yolov7 model from pytorch hub.
        :return: Trained Pytorch model.
        """
        model_npr = torch.hub.load('WongKinYiu/yolov7', 'custom', path_or_model=helmet_npr_model, force_reload=True,trust_repo=True)
        self.classes = model_npr.names
        return model_npr


    def score_frame(self, frame):
        """
        Takes a single frame as input, and scores the frame using yolo5 model.
        :param frame: input frame in numpy/list/tuple format.
        :return: Labels and Coordinates of objects detected by model in the frame.
        """
        self.model_bike.to(self.device)    #runs on gpu 
        frame = [frame]
        results = self.model_bike(frame)   #bike riders prediction
     
        labels, cords = results.xyxyn[0][:, -1], results.xyxyn[0][:, :-1]  #predicted labels and coordinates
        detection_label = []         #list to store the results
        detection_result = []
        n = len(labels)
        x_shape, y_shape = frame[0].shape[1], frame[0].shape[0]  

        for i in range(n):       #loop on all bike riders in a frame 
            if cords[i,4]>0.5:   #Accept confidence level of 0.5 
                row = cords[i]
                y1, x1, y2, x2 = int(row[0]*x_shape), int(row[1]*y_shape), int(row[2]*x_shape), int(row[3]*y_shape)  #x and y coordinates pixels of bike riders in frame
                cropped_image = frame[0][x1:x2,y1:y2]       #cropping image of bike riders
                cropped_image = cv2.resize(cropped_image,(640,640))    #resizing the image
                cropped_image = [cropped_image]
                self.model_npr.to(self.device)  #runs on gpu
                results = self.model_npr(cropped_image)  #prediction of helmet and number plate for cropped bike rider image
                labels1, cords1 = results.xyxyn[0][:, -1], results.xyxyn[0][:, :-1]  #storing lables and co-ordinates
                n1 = len(labels1)
                if (0 in labels1) & (n>0):  #If any helmet is detected in bike rider image then number plate is not saved.
  
                    for j in range(n):
                      if (labels1[j]==0) & (cords1[j,4]>0.3):

                            detection_result.append(row) #Appending to results to draw bounding box
                            detection_label.append(0)
                            break
                else:
                  for j in range(n):
                    if (labels1[j]==1) & (cords1[j,4]>0.5): #If there are any bike rider with no helmet 

                        detection_result.append(row)   #Appending to results to draw bounding box
                        detection_label.append(1)

                        for k in range(n):
                          if (labels1[k]==2) & (cords1[k,4]>0.5):
                            row1 = cords1[k]
                            y1a, x1a, y2a, x2a = int(row1[0]*x_shape), int(row1[1]*y_shape), int(row1[2]*x_shape), int(row1[3]*y_shape)  #x and y coordinates pixels of bike riders in frame
                            number_plate_image = cropped_image[0][x1a:x2a,y1a:y2a]   
                            
                            time_stamp = str(time.time())
                            cv2.imwrite(f'number_plates/{time_stamp}_{cords1[k,4]}.jpg', number_plate_image) #saves number plate image
        return detection_label,detection_result



    def class_to_label(self, x):
        """
        For a given label value, return corresponding string label.
        :param x: numeric label
        :return: corresponding string label
        """
        return self.classes[int(x)]


    def plot_boxes(self, results, frame):
        """
        Takes a frame and its results as input, and plots the bounding boxes and label on to the frame.
        :param results: contains labels and coordinates predicted by model on the given frame.
        :param frame: Frame which has been scored.
        :return: Frame with bounding boxes and labels ploted on it.
        """
        labels, cord = results
        n = len(labels)
        x_shape, y_shape = frame.shape[1], frame.shape[0]
        for i in range(n):
            row = cord[i]
            
            x1, y1, x2, y2 = int(row[0]*x_shape), int(row[1]*y_shape), int(row[2]*x_shape), int(row[3]*y_shape)
            if labels[i]==0:
              bgr = (0, 255, 0)
            else:
              bgr = (0, 0, 255)
            cv2.rectangle(frame, (x1, y1), (x2, y2), bgr, 2)
            cv2.putText(frame, self.class_to_label(labels[i]), (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.9, bgr, 2)

        return frame


    def __call__(self):
        """
        This function is called when class is executed, it runs the loop to read the video frame by frame,
        and write the output into a new file.
        :return: void
        """
        player = cv2.VideoCapture(self.directory)
        assert player.isOpened()

        #code for writing video from frames
        x_shape = int(player.get(cv2.CAP_PROP_FRAME_WIDTH))
        y_shape = int(player.get(cv2.CAP_PROP_FRAME_HEIGHT))
        four_cc = cv2.VideoWriter_fourcc(*"MJPG")
        out = cv2.VideoWriter(self.out_file, four_cc, 20, (x_shape, y_shape))

        #loading custom trained yolov7 models
        self.model_bike = self.load_bike_model(self.bike_rider_model)
        self.model_npr = self.load_npr_model(self.helmet_npr_model)

        #loop till video completes prediction
        while True:          
            start_time = time()
            ret, frame = player.read()
            if not ret:
                break
            results = self.score_frame(frame)               #returns the labels and coordinates
            frame = self.plot_boxes(results, frame)         #returns plotted bounding box frames
            end_time = time()
            fps = 1/np.round(end_time - start_time, 3)
            print(f"Frames Per Second : {fps}")             #prints At what frames per second predicting
            out.write(frame)                                #writes the predicted frame to video



video = input("Enter video directory: ") 

pred_directory = input("Directory to save the video with file name .mp4 extension: ")

# Create a new object and execute.
detection = ObjectDetection(video, pred_directory, bike_rider_model ='/content/drive/MyDrive/bike_rider_v7.pt', helmet_npr_model= '/content/drive/MyDrive/helmet_npr_v7.pt')
detection()



Device Used: cuda


Downloading: "https://github.com/WongKinYiu/yolov7/zipball/main" to /root/.cache/torch/hub/main.zip


Adding autoShape... 


Downloading: "https://github.com/WongKinYiu/yolov7/zipball/main" to /root/.cache/torch/hub/main.zip


Adding autoShape... 
Frames Per Second : 4.484304932735426
Frames Per Second : 5.847953216374268
Frames Per Second : 5.617977528089888
Frames Per Second : 5.88235294117647
Frames Per Second : 5.88235294117647
Frames Per Second : 5.813953488372094
Frames Per Second : 5.319148936170213
Frames Per Second : 5.9171597633136095
Frames Per Second : 6.849315068493151
Frames Per Second : 6.993006993006993
Frames Per Second : 7.194244604316546
Frames Per Second : 6.8965517241379315
Frames Per Second : 6.329113924050633
Frames Per Second : 7.246376811594202
Frames Per Second : 6.5359477124183005
Frames Per Second : 7.092198581560284
Frames Per Second : 7.142857142857142
Frames Per Second : 6.944444444444445
Frames Per Second : 4.504504504504505
Frames Per Second : 5.1020408163265305
Frames Per Second : 6.0606060606060606
Frames Per Second : 5.154639175257731
Frames Per Second : 6.0606060606060606
Frames Per Second : 4.739336492890995
Frames Per Second : 6.172839506172839
Frames Per Second : 4.739