# OpenCV DNN Scene Classification

- use Places365 pre-trained models to analyze scenery
- updated for OpenCV 3.4.2

In [4]:
import os, sys
from os.path import join
import cv2 as cv
if not cv.__version__ == '3.4.2':
  print('pip install opencv-python==3.4.2 or greater')
import time
import numpy as np
import imutils
%matplotlib inline
import matplotlib.pyplot as plt
%reload_ext autoreload
%autoreload 2
from operator import itemgetter
sys.path.append('/vframe/')
#from utils import imx
# from tools.config import notebooks as cfg
# import tools.utils.imx as imx

In [6]:
# Asset directories
DIR_APP = '/data_store_nas/apps/vframe/'
TEST_IMG_DIR = join(DIR_APP,'assets/img/test')
DIR_MODELS = join(DIR_APP, 'models')
# Model dir
model_dir = osp.join(DIR_MODELS,'caffe/places365')

# GoogleNet 365
fp_model = osp.join(model_dir,'googlenet_places365/googlenet_places365.caffemodel')
fp_prototxt = osp.join(model_dir,'googlenet_places365/deploy_googlenet_places365.prototxt')

# ResNet152 365
#fp_model = osp.join(model_dir,'resnet152_places365/resnet152_places365.caffemodel')
#fp_prototxt = osp.join(model_dir,'resnet152_places365/deploy_resnet152_places365.prototxt')

# VGG 365
#fp_model = osp.join(model_dir,'vgg16_places365/vgg16_places365.caffemodel')
#fp_prototxt = osp.join(model_dir,'vgg16_places365/deploy_vgg16_places365.prototxt')


# --------------------------------------------------------------------------------
# 1365 classes

# ResNet152 1365
#fp_model = osp.join(model_dir,'resnet152_places365/resnet152_places365.caffemodel')
#fp_prototxt = osp.join(model_dir,'resnet152_places365/deploy_resnet152_places365.prototxt')

# VGG 1365
#fp_model = osp.join(model_dir,'resnet152_places365/alexnet_places365.caffemodel')
#fp_prototxt = osp.join(model_dir,'alexnet_places365/deploy_alexnet_places365.prototxt')


In [7]:
# 365 places
# /a/airfield 0
fp_categories = join(model_dir,'data/categories_places365.txt')

# or, 1365 places
#fp_categories = osp.join(model_dir,'data/categories_hybrid1365.txt')

In [8]:
# load the class labels
classes = list()
with open(fp_categories) as class_file:
    for line in class_file:
        classes.append(line.strip().split(' ')[0][3:])
classes = tuple(classes)

In [9]:
# load image from test dir
fp_im = osp.join(TEST_IMG_DIR,'people-01.jpg')
print(fp_im)
im = cv.imread(fp_im)
print(im.shape)

/data_store_nas/apps/vframe/assets/img/test/people-01.jpg
(720, 1280, 3)


In `cv2.dnn.blobFromImage` check `deploy.prototxt` for dimensions and mean value.

This CNN requires fixed spatial dimensions for our input image(s) so we need to ensure it is resized to 224x224 pixels while performing mean subtraction (104, 117, 123) to normalize the input; after executing this command our "blob" now has the shape: (1, 3, 224, 224)

In [10]:
#help(cv2.dnn.readNetFromTorch)

In [11]:
net = cv.dnn.readNetFromCaffe(fp_prototxt, fp_model)

If the prototxt file throws an error, try using this at top of file:
```
input: "data"
input_shape {
  dim: 1
  dim: 3
  dim: 224
  dim: 224
}
```

In [30]:
# calc FPS
niters = 10
st = time.time()
for n in range(niters):
    blob = cv.dnn.blobFromImage(im, 1, (224, 224), (104, 117, 123))
    net.setInput(blob)
    preds = net.forward()
    
fps = float(niters) / (time.time()-st)
print("[INFO] FPS {:.5}".format(fps))

[INFO] FPS 67.608


In [31]:
# sort the indexes of the probabilities in descending order (higher
# probabilitiy first) and grab the top-5 predictions
idxs = np.argsort(preds[0])[::-1][:5]
print(idxs)

[191 186 309 187 305]


In [22]:
# loop over the top-5 predictions and display them
dst = im.copy()
for (i, idx) in enumerate(idxs):
    # draw the top prediction on the input image
    if i == 0:
        text = "{} ({:.2f}%)".format(classes[idx],
            preds[0][idx] * 100)
        cv.putText(dst, text, (20, 100),  cv.FONT_HERSHEY_SIMPLEX,
            1.5, (0, 255,0), 2)
        cv.putText(dst,'{:.2f} FPS'.format(fps),(20,40),cv.FONT_HERSHEY_SIMPLEX,
            1.5,(0,255,0),2)
        
    print("[INFO]{}. label: {}, probability: {:.5}".format(i + 1,
        classes[idx], preds[0][idx]))

# # display the output image
# imx.pltimg(dst,mode='bgr')
# print('{:.2f} FPS'.format(fps))

[INFO]1. label: igloo, probability: 0.50725
[INFO]2. label: ice_floe, probability: 0.19791
[INFO]3. label: snowfield, probability: 0.072709
[INFO]4. label: ice_shelf, probability: 0.071913
[INFO]5. label: ski_slope, probability: 0.017611


In [33]:
pred_scores = preds[0]
top_idxs = np.where(np.array(pred_scores) > 0.1)
print(pred_scores[top_idxs])

[0.19791022 0.50725454]


In [28]:
print(preds.shape)

for i, p in enumerate(pred_scores):
  if float(p) > 0.1:
    print(i, p)

(1, 365)
186 0.19791022
191 0.50725454


In [42]:
a = np.array([5, 6])
b = np.where(a  > 10)
print(bool(b), len(b), b, b[0])

True 1 (array([], dtype=int64),) []
