In [1]:
import cv2
import matplotlib as plt
%matplotlib inline
import numpy as np

In [2]:
labels = open('coco.names.txt').read().strip().split('\n')#reading pretrained object labels

In [3]:
weights = 'yolov3.weights'#pretrained weights
architecture = 'yolov3.cfg.txt'#Neural network architecture

In [4]:
model = cv2.dnn.readNet(architecture,weights)#reading the neural network

In [28]:
colors = np.random.uniform(0, 255, size=(len(labels), 3))#bounding box color for each class

In [6]:
layers = model.getLayerNames()#get all layer names
output_layer = [layers[i[0]-1] for i in model.getUnconnectedOutLayers()]#get names of output layers

In [7]:
CONFIDENCE = 0.65
IOU_THRESHOLD = 0.5

In [29]:
cap = cv2.VideoCapture(0)#Capture video frm web cam
# frame_width = int(cap.get(3)) 
# frame_height = int(cap.get(4)) 
   
# size = (frame_width, frame_height)

# writer = cv2.VideoWriter('filename.avi',  
#                          cv2.VideoWriter_fourcc(*'MJPG'), 
#                          10, size)

In [30]:
while cap.isOpened():
    grabbed, image1 = cap.read()#reading all frames
    if not grabbed:
        break
    Height,Width = image1.shape[:2]#for scaling our bounding box wrt to image
    blob = cv2.dnn.blobFromImage(image1, 1/255.0, (416, 416), swapRB=True, crop=False)#changes the image to required input format for neural network 
    model.setInput(blob)#Sets the new input value for the network
    outputs = model.forward(output_layer)#outputs of the last layer of NN
    boxes, confidences, class_ids = [], [], []
    for output in outputs:#loop through output of NN
        for pred in output:#loop through all predictions
            class_scores = pred[5:]#get class probs
            class_id = np.argmax(class_scores)#find max class prob
            class_prob = class_scores[class_id]#get class_id of class with max prob
            if class_prob>CONFIDENCE:
                #scale dimensions of bounding box wrt to image
                center_x = int(pred[0] * Width)
                center_y = int(pred[1] * Height)
                w = int(pred[2] * Width)
                h = int(pred[3] * Height)
                #getting top-left co-ordinates
                x = center_x - (w / 2)
                y = center_y - (h / 2)
                class_ids.append(class_id)
                confidences.append(float(class_prob))
                boxes.append([x, y, w, h])
    indices=cv2.dnn.NMSBoxes(boxes,confidences,CONFIDENCE,IOU_THRESHOLD)#performs nms and returns boxes to be kept
    if len(indices) > 0:
        for i in indices.flatten():
            x,y,w,h = boxes[i][0],boxes[i][1],boxes[i][2],boxes[i][3]
            prob=round(confidences[i], 3)#restricting to 3 decimals
            label = labels[class_ids[i]]#getting label
            color = colors[class_ids[i]]#getting random color
            text = label+' : '+str(prob)
            cv2.rectangle(image1,(int(x),int(y)),(int(x+w),int(y+h)),color=color,thickness=2)#bounding box rectangle
            (width,height)=cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, fontScale=1.3, thickness=1)[0]#getting size of text
            cv2.rectangle(image1,(int(x),int(y-height)),(int(x+width+2),int(y)),color=color,thickness=-1)#box for placing text
            cv2.putText(image1,text,(int(x),int(y-5)),cv2.FONT_HERSHEY_SIMPLEX,fontScale=1,color=(0,0,0),thickness=1)#displaying text
    cv2.imshow('vid',image1)
#     writer.write(image1)
    if cv2.waitKey(25) & 0xFF ==ord('q'):
        break
# writer.release()        
cap.release()
cv2.destroyAllWindows()        