# 第8章　物体跟踪

## 实现基本背景差分器

In [None]:
import cv2
OPENCV_MAJOR_VERSION = int(cv2.__version__.split('.')[0])

BLUR_RADIUS = 21
# 腐蚀
erode_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
# 膨胀
dilate_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9, 9))

cap = cv2.VideoCapture(1)
for i in range(10):
    success, frame = cap.read()
if not success:
    exit(1)

gray_background = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
gray_background = cv2.GaussianBlur(gray_background, (BLUR_RADIUS, BLUR_RADIUS), 0)
success, frame = cap.read()
while success:
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    gray_frame = cv2.GaussianBlur(gray_frame, (BLUR_RADIUS, BLUR_RADIUS), 0)
    diff = cv2.absdiff(gray_background, gray_frame)
    _, thresh = cv2.threshold(diff, 40, 255, cv2.THRESH_BINARY)
    cv2.erode(thresh, erode_kernel, thresh, iterations=2)
    cv2.dilate(thresh, dilate_kernel, thresh, iterations=2)
    if OPENCV_MAJOR_VERSION >= 4:
        # OpenCV 4 or a later version is being used.
        contours, hier = cv2.findContours(thresh, cv2.RETR_EXTERNAL,
                                          cv2.CHAIN_APPROX_SIMPLE)
    else:
        # OpenCV 3 or an earlier version is being used.
        # cv2.findContours has an extra return value.
        # The extra return value is the thresholded image, which is
        # unchanged, so we can ignore it.
        _, contours, hier = cv2.findContours(thresh, cv2.RETR_EXTERNAL,
                                             cv2.CHAIN_APPROX_SIMPLE)
    for c in contours:
        if cv2.contourArea(c) > 4000:
            x, y, w, h = cv2.boundingRect(c)
            cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 255, 0), 2)
    cv2.imshow('diff', diff)
    cv2.imshow('thresh', thresh)
    cv2.imshow('detect', frame)
    k = cv2.waitKey(1)
    if k == 27:
        break
    success, frame = cap.read()

## 使用MOG背景差分器

In [None]:
import cv2
import time
bg_subtractor = cv2.createBackgroundSubtractorMOG2(detectShadows=True)
erode_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
dilate_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))
cap  = cv2.VideoCapture("data/hallway.mpg")
success, frame = cap.read()
while success:
    time.sleep(1)
    fg_mask = bg_subtractor.apply(frame)
    _, thresh = cv2.threshold(fg_mask, 244, 255, cv2.THRESH_BINARY)
    cv2.erode(thresh, erode_kernel, thresh, iterations=2)
    cv2.dilate(thresh, dilate_kernel, thresh, iterations=2)
    contours, hier = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    for c in contours:
        if cv2.contourArea(c) > 1000:
            x, y, w, h = cv2.boundingRect(c)
            cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 255, 0), 2)
    cv2.imshow('mog', fg_mask)
    cv2.imshow('thresh', thresh)
    cv2.imshow('detection', frame)
    k = cv2.waitKey(1)
    if k == 27:
        break
    success, frame = cap.read()

## 使用KNN背景差分器

In [None]:
import cv2
import time
bg_subtractor = cv2.createBackgroundSubtractorKNN(detectShadows=True)
erode_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 5))
dilate_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (17, 11))
cap  = cv2.VideoCapture("data/traffic.flv")
success, frame = cap.read()
while success:
    time.sleep(1)
    fg_mask = bg_subtractor.apply(frame)
    _, thresh = cv2.threshold(fg_mask, 244, 255, cv2.THRESH_BINARY)
    cv2.erode(thresh, erode_kernel, thresh, iterations=2)
    cv2.dilate(thresh, dilate_kernel, thresh, iterations=2)
    contours, hier = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    for c in contours:
        if cv2.contourArea(c) > 1000:
            x, y, w, h = cv2.boundingRect(c)
            cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 255, 0), 2)
    cv2.imshow('knn', fg_mask)
    cv2.imshow('thresh', thresh)
    cv2.imshow('detection', frame)
    k = cv2.waitKey(1)
    if k == 27:
        break
    success, frame = cap.read()

## 使用GMG和其他背景差分器

In [None]:
import cv2
import time
bg_subtractor = cv2.bgsegm.createBackgroundSubtractorGMG()
erode_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (13, 9))
dilate_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (17, 11))
cap  = cv2.VideoCapture("data/traffic.flv")
success, frame = cap.read()
while success:
    fg_mask = bg_subtractor.apply(frame)
    cv2.imshow('gmg', fg_mask)
    _, thresh = cv2.threshold(fg_mask, 244, 255, cv2.THRESH_BINARY)
    cv2.erode(thresh, erode_kernel, thresh, iterations=2)
    cv2.dilate(thresh, dilate_kernel, thresh, iterations=2)
    contours, hier = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    for c in contours:
        if cv2.contourArea(c) > 1000:
            x, y, w, h = cv2.boundingRect(c)
            cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 255, 0), 2)
    cv2.imshow('gmg', fg_mask)
    cv2.imshow('thresh', thresh)
    cv2.imshow('detection', frame)
    k = cv2.waitKey(30)
    if k == 27:
        break
    success, frame = cap.read()

## 实现MeanShift示例

In [None]:
import cv2
cap = cv2.VideoCapture(1)
for i in range(10):
    success, frame = cap.read()
if not success:
    exit(1)

frame_h, frame_w =  frame.shape[:2]
w = frame_w // 8
h = frame_h // 8
x = frame_w // 2 - w // 2
y = frame_h // 2 - h // 2
track_window = (x, y, w, h)
roi = frame[y:y+h, x:x+w]
hsv_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
mask = None
roi_hist = cv2.calcHist([hsv_roi], [0], mask, [180], [0, 180])
cv2.normalize(roi_hist, roi_hist, 0, 255, cv2.NORM_MINMAX)
term_crit = (cv2.TERM_CRITERIA_COUNT | cv2.TERM_CRITERIA_EPS, 10, 1)
success, frame = cap.read()
while success:
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    back_proj = cv2.calcBackProject([hsv], [0], roi_hist, [0, 180], 1)
    num_iters, track_window = cv2.meanShift(back_proj, track_window, term_crit)
    x, y, w, h = track_window
    cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
    cv2.imshow("back-projection", back_proj)
    cv2.imshow("meanshift", frame)
    k = cv2.waitKey(30)
    if k == 27:
        break
    success, frame = cap.read()

## 使用CamShift

In [None]:
import cv2
import numpy as np
cap = cv2.VideoCapture(1)
for i in range(10):
    success, frame = cap.read()
if not success:
    exit(1)

frame_h, frame_w =  frame.shape[:2]
w = frame_w // 8
h = frame_h // 8
x = frame_w // 2 - w // 2
y = frame_h // 2 - h // 2
track_window = (x, y, w, h)
roi = frame[y:y+h, x:x+w]
hsv_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
mask = None
roi_hist = cv2.calcHist([hsv_roi], [0], mask, [180], [0, 180])
cv2.normalize(roi_hist, roi_hist, 0, 255, cv2.NORM_MINMAX)
term_crit = (cv2.TERM_CRITERIA_COUNT | cv2.TERM_CRITERIA_EPS, 10, 1)
success, frame = cap.read()
while success:
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    back_proj = cv2.calcBackProject([hsv], [0], roi_hist, [0, 180], 1)
    rotated_rect, track_window = cv2.CamShift(back_proj, track_window, term_crit)
    box_points = cv2.boxPoints(rotated_rect)
    box_points = np.int0(box_points)
    cv2.polylines(frame, [box_points], True, (255, 0, 0), 2)
    cv2.imshow("back-projection", back_proj)
    cv2.imshow("camshift", frame)
    k = cv2.waitKey(30)
    if k == 27:
        break
    success, frame = cap.read()

## 跟踪鼠标光标

In [None]:
import cv2
import numpy as np
img = np.zeros((800, 800, 3), np.uint8)
kalman = cv2.KalmanFilter(4, 2)
kalman.measurementMatrix = np.array([[1, 0, 0, 0], [0, 1, 0, 0]], np.float32)
kalman.transitionMatrix = np.array([[1, 0, 1, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]], np.float32)
kalman.processNoiseCov = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]], np.float32) * 0.03
last_measurement = None
last_prediction = None

def on_mouse_moved(event, x, y, flags, param):
    global img, kalman, last_measurement, last_prediction
    measurement = np.array([[x], [y]], np.float32)
    if last_measurement is None:
        kalman.statePre = np.array([[x],[y],[0],[0]], np.float32)
        kalman.statePost = np.array([[x],[y],[0],[0]], np.float32)
        prediction = measurement
    else:
        kalman.correct(measurement)
        prediction = kalman.predict()
        cv2.line(img, (int(last_measurement[0]), int(last_measurement[1])), (int(measurement[0]), int(measurement[1])), (0, 255, 0))
        cv2.line(img, (int(last_prediction[0]), int(last_prediction[1])), (int(prediction[0]), int(prediction[1])), (0, 0, 255))
    last_prediction = prediction.copy()
    last_measurement = measurement
cv2.namedWindow("kalman_tracker")
cv2.setMouseCallback("kalman_tracker", on_mouse_moved)
while True:
    cv2.imshow("kalman_tracker", img)
    k = cv2.waitKey(1)
    if k == 27:
        cv2.imwrite("images/kalman.png", img)
        break

## 实现行人类

In [None]:
import cv2
import numpy as np

class Pedestrain():
    
    def __init__(self, id ,hsv_frame, track_window):
        self.id = id
        self.track_window = track_window
        self.term_crit = (cv2.TERM_CRITERIA_COUNT | cv2.TermCriteria_EPS, 10, 1)
        x, y, w, h  = track_window
        roi = hsv_frame[y:y+h, x:x+w]
        roi_hist = cv2.calcHist([roi], [0], None, [16], [0, 180])
        self.roi_hist = cv2.normalize(roi_hist, roi_hist, 0, 255, cv2.NORM_MINMAX)
        self.kalman = cv2.KalmanFilter(4, 2)
        self.kalman.measurementMatrix = np.array([[1, 0, 0, 0], [0, 1, 0, 0]], np.float32)
        self.kalman.transitionMatrix = np.array([[1, 0, 1, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]], np.float32)
        self.kalman.processNoiseCov = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]], np.float32) * 0.03
        cx = x+w/2
        cy = y+h/2
        self.kalman.statePre = np.array([[cx],[cy],[0],[0]], np.float32)
        self.kalman.statePost = np.array([[cx],[cy],[0],[0]], np.float32)
    
    def update(self, frame, hsv_frame):
        back_proj = cv2.calcBackProject([hsv_frame], [0], self.roi_hist, [0, 180], 1)
        ret, self.track_window = cv2.meanShift(back_proj, self.track_window, self.term_crit)
        x, y, w, h = self.track_window
        center = np.array([x+w/2, y+h/2], np.float32)
        prediction = self.kalman.predict()
        estimate = self.kalman.correct(center)
        center_offset = estimate[:,0][:2] - center
        self.track_window = (x + int(center_offset[0]), y + int(center_offset[1]), w, h)
        x, y , w, h = self.track_window
        cv2.circle(frame, (int(prediction[0]), int(prediction[1])), 4, (255, 0, 0), -1)
        cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 255, 0), 2)
        cv2.putText(frame, f"ID: {self.id}", (x, y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 1, cv2.LINE_AA)
    
def main():
    cap = cv2.VideoCapture("data/pedestrians.avi")
    bg_subtractor = cv2.createBackgroundSubtractorKNN()
    history_length = 20
    bg_subtractor.setHistory(history_length)

    erode_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
    dilate_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (8, 3))
    pedestrians = []
    num_history_frames_populated = 0
    while True:
        grabbed, frame = cap.read()
        if (grabbed is False):
            break
        fg_mask = bg_subtractor.apply(frame)
        if num_history_frames_populated < history_length:
            num_history_frames_populated += 1
            continue
        _, thresh = cv2.threshold(fg_mask, 127, 255, cv2.THRESH_BINARY)
        cv2.erode(thresh, erode_kernel, thresh, iterations=2)
        cv2.dilate(thresh, dilate_kernel, thresh, iterations=2)
        contours, heir = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        hsv_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
        should_initialize_pedestrians = len(pedestrians) == 0
        id = 0
        for c in contours:
            if cv2.contourArea(c) > 500:
                (x, y, w, h) = cv2.boundingRect(c)
                cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 1)
                if should_initialize_pedestrians:
                    pedestrians.append(Pedestrain(id, hsv_frame, (x, y, w, h)))
        for pedestrain in pedestrians:
            pedestrain.update(frame, hsv_frame)
        cv2.imshow("Pedestrains Tracked", frame)
        k = cv2.waitKey(110)
        if k == 27:
            break
main()  