In [1]:
import numpy as np
import cv2
from collections import deque
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

  _nan_object_mask = _nan_object_array != _nan_object_array


In [2]:
def checkTemplate(curr):
    # check if the frame matches the template for static gestures: paper, rock and scissor

    # store the max_val of template matching to "scores" and max_loc to "max_l"
    myTemplate = ['rock.jpg','scissor.jpg','paper.jpg']
    scores={"rock":0,"scissor":0,"paper":0}
    max_l = {"rock":0,"scissor":0,"paper":0}

    # run the frame to match all three possible cases and choose the most probable one
    for i in range(3):
        template = cv2.imread(myTemplate[i])
        template = mySkinDetect(template)
        tempw, temph = template.shape[:2]

        img = curr
        imgw, imgh = img.shape[:2]
        img = mySkinDetect(img)
        if (tempw > imgw or temph > imgh):
            img = cv2.resize(img,(round(tempw*1.5),round(temph*1.5)))

        w,h = template.shape[:2]

        res = cv2.matchTemplate(img,template,cv2.TM_CCOEFF_NORMED)
        min_val,max_val,min_loc,max_loc = cv2.minMaxLoc(res)
        scores[myTemplate[i][:-4]]=max_val
        max_l[myTemplate[i][:-4]]=max_loc
    
    decision=max(scores,key=scores.get)
    score=max(scores)
        
    max_loc = max_l[decision]
    
    print("decision",decision)    

    #scale the rectangle with respect to updated image size (150,150)
    x,y = img.shape[:2]
    scaleX = x // 150
    scaleY = y // 150

    top_left = (max_loc[0] // scaleX, max_loc[1] // scaleY)
    bottom_right = (top_left[0]+ w//scaleY ,top_left[1] + h//scaleX)
    
    #scale the img size to (150,150)
    img = cv2.resize(img,(150,150))
    
    # Draw rectangle on the image
    cv2.rectangle(img,top_left,bottom_right,(255,0,0),2)
    
    # write the decision text on the image
    cv2.putText(img,decision, (25,25), cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,255),2)
    cv2.imwrite("res.jpg",img)
    cv2.imshow("result",img)
            
    return (score,decision)
    

In [3]:
def checkTemplateD(curr):

    # check if the frame matches the template for dynamic gestures: waving or non-waving
    
    # store the max_val of template matching to "scores" and max_loc to "max_l"
    myTemplate = ["waving_blob.png","non_wave_blob.png"]
    scores={'waving':0,'non-wave':0}
    max_l = {'waving':0,'non-wave':0}
    
    # run the frame to match both possible cases and choose the most probable one
    for i in range(2):
        template = cv2.imread(myTemplate[i])
        tempw, temph = template.shape[:2]

        img = curr
        imgw, imgh = img.shape[:2]

        if (tempw >= imgw or temph >= imgh):
            img = cv2.resize(img,(round(tempw*2),round(temph*2)))

        w,h = template.shape[:2]

        res = cv2.matchTemplate(img,template,cv2.TM_CCOEFF_NORMED)
        min_val,max_val,min_loc,max_loc = cv2.minMaxLoc(res)
        scores[myTemplate[i][:-4]]=max_val
        max_l[myTemplate[i][:-4]]=max_loc
        
    decision=max(scores,key=scores.get)
    score=max(scores)

    #scale the rectangle with respect to updated image size (150,150)
    x,y = img.shape[:2]
    scaleX = x // 150
    scaleY = y // 150
    max_loc = max_l[decision]

    top_left = (max_loc[0] // scaleX, max_loc[1] // scaleY)
    bottom_right = (top_left[0]+ w//scaleY ,top_left[1] + h//scaleX)
    
    #scale the img size to (150,150)
    img = cv2.resize(img,(150,150))
    
    # Draw rectangle on the image
    cv2.rectangle(img,top_left,bottom_right,(0,0,255),2)
    
    # write the decision text on the image
    cv2.putText(img,decision, (25,25), cv2.FONT_HERSHEY_SIMPLEX,1,(255,0,0),2)
    cv2.imwrite("res.jpg",img)
    cv2.imshow("result",img)     

    return (score,decision)
    

In [4]:
# Function that detects whether a pixel belongs to the skin based on RGB values
# src - the source color image
# dst - the destination grayscale image where skin pixels are colored white and the rest are colored black
def mySkinDetect(src):
    # Surveys of skin color modeling and detection techniques:
    # 1. Vezhnevets, Vladimir, Vassili Sazonov, and Alla Andreeva. "A survey on pixel-based skin color detection techniques." Proc. Graphicon. Vol. 3. 2003.
    # 2. Kakumanu, Praveen, Sokratis Makrogiannis, and Nikolaos Bourbakis. "A survey of skin-color modeling and detection methods." Pattern recognition 40.3 (2007): 1106-1122.
    dst = np.zeros((src.shape[0], src.shape[1], 1), dtype = "uint8")
    for i in range(src.shape[0]):
        for j in range(src.shape[1]):
            #b,g,r = src[i,j]
            b = int(src[i,j][0])
            g = int(src[i,j][1])
            r = int(src[i,j][2])
            if(r>95 and g>40 and b>20 and max(r,g,b)-min(r,g,b)>15 and abs(r-g)>15 and r>g and r>b):
                dst[i,j] = 255
    return dst

In [5]:
# Function that does frame differencing between the current frame and the previous frame
# prev - the previous color image
# curr - the current color image
# dst - the destination grayscale image where pixels are colored white if the corresponding pixel intensities in the current
# and previous image are not the same
def myFrameDifferencing(prev, curr):
    # For more information on operation with arrays: 
    # http://docs.opencv.org/modules/core/doc/operations_on_arrays.html
    
    # code here...
    dst = cv2.absdiff(prev,curr)
    dst = cv2.cvtColor(dst,cv2.COLOR_BGR2GRAY)
    _, dst = cv2.threshold(dst, 50, 255, cv2.THRESH_BINARY)
    
    #dst = np.zeros((prev.shape[0], prev.shape[1], 1), dtype = "uint8")
    
    return dst

In [6]:
# Function that accumulates the frame differences for a certain number of pairs of frames
# mh - vector of frame difference images
# dst - the destination grayscale image to store the accumulation of the frame difference images
def myMotionEnergy(mh):
    # the window of time is 3
    mh0 = mh[0]
    mh1 = mh[1]
    mh2 = mh[2]
    mh3 = mh[3]
    mh4 = mh[4]
    dst = np.zeros((mh0.shape[0], mh0.shape[1], 1), dtype = "uint8")
    
    # code here...
    for i in range(mh0.shape[0]):
        for j in range(mh0.shape[1]):
            if mh0[i,j]==255 or mh1[i,j]==255 or mh2[i,j] == 255 or mh3[i,j] == 255 or mh4[i,j] == 255:
                dst[i,j] = 255
    
    return dst

In [None]:
def main():
    # a) Reading a stream of images from a webcamera, and displaying the video
    # open the video camera no. 0
    # for more information on reading and writing video: http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html
    cap = cv2.VideoCapture(0)
    
    #if not successful, exit program
    if not cap.isOpened():
        print("Cannot open the video cam")
        return -1
    
    y_true = ['paper','scissor','paper','paper','scissor','rock','paper','rock','scissor','paper','scissor','rock','rock','paper','paper','rock','rock','scissor','paper','paper']
    y_pred = ['rock','scissor','paper','paper','scissor','rock','paper','rock','scissor','paper','scissor','rock','rock','rock','rock','rock','rock','rock','rock','paper']
    
    print(confusion_matrix(y_true,y_pred,labels=['rock','paper','scissor']))
    print(classification_report(y_true,y_pred))
    
    # read a new frame from video
    success, prev_frame = cap.read()
    
    #if not successful, exit program
    if not success:
        print("Cannot read a frame from video stream")
        return -1
    cv2.namedWindow("frame", cv2.WINDOW_AUTOSIZE)
    
    prev_frame = cv2.resize(prev_frame,(150,150))
    fMH1 = np.zeros((prev_frame.shape[0], prev_frame.shape[1], 1), dtype = "uint8")
    fMH2 = fMH1.copy()
    fMH3 = fMH1.copy()
    fMH4 = fMH1.copy()
    fMH5 = fMH1.copy()
    myMotionHistory = deque([fMH1, fMH2, fMH3,fMH4,fMH5]) 
    frame = 0
    while(True):
        #read a new frame from video
        
        success, curr_frame = cap.read()
        curr_frame = cv2.resize(curr_frame,(150,150))
        frame += 1
            
        
        if not success:
            print("Cannot read a frame from video stream")
            break
    
        cv2.imshow('frame',curr_frame)

        # b) Skin color detection
        mySkin = mySkinDetect(curr_frame)
        cv2.imshow('mySkinDetect',mySkin)

        # c) Background differencing
        frameDest = myFrameDifferencing(prev_frame, curr_frame)
        cv2.imshow('myFrameDifferencing',frameDest)

        # d) Visualizing motion history
        myMotionHistory.popleft()
        myMotionHistory.append(frameDest)
        myMH = myMotionEnergy(myMotionHistory)
        cv2.imshow('myMotionHistory',myMH)
        
        #score1,decision1 = checkTemplate(curr_frame)
        if frame% 25 == 0:
            cv2.imwrite('motion.jpg',myMH)
            frame_captured=cv2.imread('motion.jpg')
            frame_captured = cv2.resize(frame_captured,(150,150))
            cv2.imshow('frame.jpg',frame_captured)
            #checkTemplateD(frame_captured)
            checkTemplate(curr_frame)

        prev_frame = curr_frame
        
        # wait for 'q' key press. If 'q' key is pressed, break loop
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()
    cv2.waitKey(1)
    return 0

In [None]:
if __name__ == "__main__":
    main()

[[6 0 0]
 [4 5 0]
 [1 0 4]]
              precision    recall  f1-score   support

       paper       1.00      0.56      0.71         9
        rock       0.55      1.00      0.71         6
     scissor       1.00      0.80      0.89         5

   micro avg       0.75      0.75      0.75        20
   macro avg       0.85      0.79      0.77        20
weighted avg       0.86      0.75      0.76        20

decision paper
decision rock
decision rock
decision rock
