In [None]:
import time
import pandas as pd

import torch
import numpy as np
from torchvision import models, transforms

import cv2
from PIL import Image

#   cap = cv.VideoCapture(0)
#   if not cap.isOpened():
#       print("Cannot open camera")
#       exit()
#   while True:
#       # Capture frame-by-frame
#       ret, frame = cap.read()
#       # if frame is read correctly ret is True
#       if not ret:
#           print("Can't receive frame (stream end?). Exiting ...")
#           break
#       # Our operations on the frame come here
#       gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
#       # Display the resulting frame
#       cv.imshow('frame', gray)
#       if cv.waitKey(1) == ord('q'):
#           break
#   # When everything done, release the capture
#   cap.release()
#   cv.destroyAllWindows()

In [None]:
with open('classes.txt', 'r') as f: 
    # Read the contents of the file into a list 
    lines = f.readlines() 
    # Create an empty dictionary 
    classes = []
    # Loop through the list of lines 
    for line in lines: 
       # Split the line into key-value pairs 
        key, value = line.strip().split(': ')
        # Store the key-value pairs in the dictionary 
        classes.append(value)
# The dictionary 'data' now contains the contents of the text file
classes[700]

In [None]:
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 224)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 224)
cap.set(cv2.CAP_PROP_FPS, 36)

preprocess = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

#net = models.quantization.mobilenet_v2(pretrained=True, quantize=True)
model = models.mobilenet_v2(weights=models.MobileNet_V2_Weights.DEFAULT)



#torch.backends.quantized.engine = 'qnnpack'
# jit model to take it from ~20fps to ~30fps
#net = torch.jit.script(net)

In [None]:
started = time.time()
last_logged = time.time()
frame_count = 0

with torch.no_grad():
    if not cap.isOpened():
        print("Cannot open camera")
        exit()
    while True:
        # read frame
        ret, image = cap.read()
        if not ret:
            raise RuntimeError("failed to read frame")

        # convert opencv output from BGR to RGB
        image = image[:, :, [2, 1, 0]]
        permuted = image

        # preprocess
        input_tensor = preprocess(image)

        # create a mini-batch as expected by the model
        input_batch = input_tensor.unsqueeze(0)

        # run model
        output = model(input_batch)
        #print(output)
        # do something with output ...
        top = list(enumerate(output[0].softmax(dim=0)))
        top.sort(key=lambda x: x[1], reverse=True)
        #print(top[:10])
        for idx, val in top[:1]:
            print(f"{val.item()*100:.2f}% {classes[idx]}")

        # log model performance
        frame_count += 1
        now = time.time()
        if now - last_logged > 1:
            #print(f"{frame_count / (now-last_logged)} fps")
            last_logged = now
            frame_count = 0
            
        # Display the resulting frame
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        cv2.imshow('frame', gray)
        if cv2.waitKey(1) == ord('q'):
            break
    
cap.release()
cv2.destroyAllWindows()