# Video Object Detection with SSD Demo

## Import

In [1]:
from __future__ import division
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
from torch.autograd import Variable
import numpy as np 
import cv2
if torch.cuda.is_available():
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
    
import time
import os.path as osp
import pickle as pkl
import pandas as pd
import random
import matplotlib.pyplot as plt

from util import *
from ssd import build_ssd
from data import VOC_CLASSES as labels
%matplotlib inline

## Initialization

In [2]:
CUDA = torch.cuda.is_available()

videofile= 'test3.avi'
cap = cv2.VideoCapture(videofile)  

assert cap.isOpened(), 'Cannot capture source'

frames = 0  
start = time.time()
top_k=10

net = build_ssd('test', 300, 21)    # initialize SSD
net.load_weights('../weights/ssd300_mAP_77.43_v2.pth')

Loading weights into state dict...
Finished!


## Generating Images

In [44]:
cap = cv2.VideoCapture(videofile)  
t=0
while cap.isOpened():
    ret, frame = cap.read()
    inp_dim= 300
    img = cv2.resize(frame, (inp_dim, inp_dim))
    im_dim = frame.shape[1], frame.shape[0]
    im_dim = torch.FloatTensor(im_dim).repeat(1,2)
    img_=img[:,:,::-1]
    #plt.imshow(img_)
    img0 = cv2.resize(img, (300, 300)).astype(np.float32)
    img0 -= (104.0, 117.0, 123.0)
    img0 = img0.astype(np.float32)
    img0 = img0[:, :, ::-1].copy()
    img0 = torch.from_numpy(img0).permute(2, 0, 1)
    img00 = Variable(img0.unsqueeze(0))
    if torch.cuda.is_available():
        img00 = img00.cuda()
    y = net(img00)


    plt.figure(figsize=(12,12))
    colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist()
    plt.imshow(img_)  # plot the image for matplotlib
    currentAxis = plt.gca()

    detections = y.data
    # scale each detection back up to the image
    scale = torch.Tensor(img_.shape[1::-1]).repeat(2)
    for i in range(detections.size(1)):
        j = 0
        while detections[0,i,j,0] >= 0.56:
            score = detections[0,i,j,0]
            label_name = labels[i-1]
            display_txt = '%s: %.2f'%(label_name, score)
            pt = (detections[0,i,j,1:]*scale).cpu().numpy()
            coords = (pt[0], pt[1]), pt[2]-pt[0]+1, pt[3]-pt[1]+1
            color = colors[i]
            currentAxis.add_patch(plt.Rectangle(*coords, fill=False, edgecolor=color, linewidth=2))
            currentAxis.text(pt[0], pt[1], display_txt, bbox={'facecolor':color, 'alpha':0.5})
            #print(detections[0,i,j,0])
            j+=1

    plt.savefig('outputs/p{0}.jpg'.format(t))
    t=t+1

## Generating Output Video

In [40]:
path='/Users/zhouminghao/Desktop/codes/ssd.pytorch/demo/outputs/'
#imgs2video(path,'output_v.avi')
fps=8
img=[]
for i in range(0,246):
    img.append(cv2.imread(path+'p{0}.jpg'.format(i)))
height,width,layers=img[1].shape

In [43]:
fourcc = cv2.VideoWriter_fourcc(*'MJPG')
videoWriter = cv2.VideoWriter('saveVideo.avi',fourcc,fps,(height,width))#最后一个是保存图片的尺寸

for i in range(246):
    frame = cv2.imread(path+'p{0}.jpg'.format(i))
    videoWriter.write(frame)
videoWriter.release()