In [1]:
import cv2
import numpy as np

In [2]:
image = cv2.imread('group-of-people-talking-to-each-other-in-front-of-brown-1181360.jpg')
image = cv2.resize(image,(800,800))
image.shape

(800, 800, 3)

In [3]:
height,width,_ = image.shape

In [4]:
cv2.imshow("image",image)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [5]:

net = cv2.dnn.readNet("yolov3.weights",'yolov3.cfg')

In [6]:
classes = []
with open('coco.names','r') as f:
    classes = f.read().splitlines()

In [7]:

classes

['person',
 'bicycle',
 'car',
 'motorbike',
 'aeroplane',
 'bus',
 'train',
 'truck',
 'boat',
 'traffic light',
 'fire hydrant',
 'stop sign',
 'parking meter',
 'bench',
 'bird',
 'cat',
 'dog',
 'horse',
 'sheep',
 'cow',
 'elephant',
 'bear',
 'zebra',
 'giraffe',
 'backpack',
 'umbrella',
 'handbag',
 'tie',
 'suitcase',
 'frisbee',
 'skis',
 'snowboard',
 'sports ball',
 'kite',
 'baseball bat',
 'baseball glove',
 'skateboard',
 'surfboard',
 'tennis racket',
 'bottle',
 'wine glass',
 'cup',
 'fork',
 'knife',
 'spoon',
 'bowl',
 'banana',
 'apple',
 'sandwich',
 'orange',
 'broccoli',
 'carrot',
 'hot dog',
 'pizza',
 'donut',
 'cake',
 'chair',
 'sofa',
 'pottedplant',
 'bed',
 'diningtable',
 'toilet',
 'tvmonitor',
 'laptop',
 'mouse',
 'remote',
 'keyboard',
 'cell phone',
 'microwave',
 'oven',
 'toaster',
 'sink',
 'refrigerator',
 'book',
 'clock',
 'vase',
 'scissors',
 'teddy bear',
 'hair drier',
 'toothbrush']

In [8]:
blob = cv2.dnn.blobFromImage(image,1/255,(416,416),(0,0,0),swapRB=True,crop=False)

In [9]:
for each in blob:
    for n, img in enumerate(each):
        cv2.imshow(str(n),img)
        cv2.waitKey(1000)
        cv2.destroyAllWindows()

In [10]:
net.setInput(blob)
outputLayerNames = net.getUnconnectedOutLayersNames()
layerOutputs = net.forward(outputLayerNames)

In [11]:
print(layerOutputs)

[array([[0.03081441, 0.04995409, 0.33897823, ..., 0.        , 0.        ,
        0.        ],
       [0.04263433, 0.02830686, 0.27890372, ..., 0.        , 0.        ,
        0.        ],
       [0.04988158, 0.03377305, 0.7703465 , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.954616  , 0.94869876, 0.44408756, ..., 0.        , 0.        ,
        0.        ],
       [0.95874566, 0.9653474 , 0.28929326, ..., 0.        , 0.        ,
        0.        ],
       [0.9661312 , 0.96422225, 0.7518378 , ..., 0.        , 0.        ,
        0.        ]], dtype=float32), array([[0.02230685, 0.02246624, 0.05788217, ..., 0.        , 0.        ,
        0.        ],
       [0.01082074, 0.02111425, 0.28748366, ..., 0.        , 0.        ,
        0.        ],
       [0.01957769, 0.0172476 , 0.07985993, ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.9724157 , 0.9758456 , 0.05216026, ..., 0.        , 0.        ,
        0.        ],
       [0.98063505

In [12]:
boxes = []
confidences = []
class_ids = []

for each in layerOutputs:
    for detection in each:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        if confidence > 0.5:
            center_x = int(detection[0] * width)
            center_y = int(detection[1] * height)
            w = int(detection[2]*width)
            h = int(detection[3]*height)
            
            x = int(center_x - (w/2))
            y = int(center_y - (h/2))
            
            boxes.append([x,y,w,h])
            confidences.append(float(confidence))
            class_ids.append(class_id)
print(boxes)
print(confidences)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5,0.4)
print(indexes.flatten())

font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0,255,size = (len(boxes),3))

for i in indexes.flatten():
    x,y,w,h = boxes[i]
    label = str(classes[class_ids[i]])
    confidence = str(round(confidences[i],2))
    color = colors[i]
    cv2.rectangle(image,(x,y),(x+w,y+h),color,2)
    cv2.putText(image,label+" " + confidence, (x,y+20),font,2,(255,255,255),)

[[548, 165, 228, 384], [504, 259, 291, 404], [516, 362, 276, 302], [14, 447, 782, 358], [263, 218, 107, 222], [19, 254, 117, 186], [379, 212, 143, 263], [91, 366, 99, 87], [186, 375, 85, 65], [2, 324, 16, 131], [183, 378, 75, 63]]
[0.9576922655105591, 0.8126387596130371, 0.995136022567749, 0.8923608064651489, 0.9988596439361572, 0.997711181640625, 0.999811589717865, 0.9926705360412598, 0.6228459477424622, 0.5604188442230225, 0.7711459994316101]
[ 6  4  5  2  7  0  3 10  9]


In [None]:
cv2.imshow("image",image)
cv2.waitKey(0)
cv2.destroyAllWindows()