In [1]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
import pyttsx3
from keras.models import load_model
%matplotlib inline

Using TensorFlow backend.


In [2]:
IMAGE_SIZE = 50 #We'll be workign with 50 * 50 pixel images

LABELS = ['A', 'C', 'E', 'H', 'I', 'L', 'O', 'U', 'V', 'W']

In [3]:
engine = pyttsx3.init()
engine.setProperty('rate', 120)
engine.setProperty('voice',1)


In [4]:
MODEL_PATH = "withbgmodelv_small_test.h5"
model = load_model(MODEL_PATH)

In [5]:
# IMG_PATH = "data/train_data/A/A_bg_less2.jpg"

def preprocess_image(IMG_PATH):
    """
    :param IMG_PATH: path of the image
    :return: image array by preprocessing the image
    Example:
        img_array = preprocess_image("a.jpg")
    """
    try:
        if "array" not in str(type(IMG_PATH)): # taking image as input
            img = cv2.imread(IMG_PATH)
        else: img = IMG_PATH # taking numpy array of image as input
        
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Change color space to gray

        img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
        
        img_array = img.reshape(IMAGE_SIZE, IMAGE_SIZE, 1) # Reshape array to l * w * channels

        img_array = img_array / 255.0
        
        img_array = np.expand_dims(img_array, axis = 0) # Expand Dimension of the array as our model expects a 4D array

        return img_array
    
    except Exception as e:
        print ("Unexpected error occured", e)


In [6]:
def which_letter(IMG_PATH):
    """
    :param IMG_PATH: path of the image
    :return: confident_percent, predicted label using the model or None if exception occurs
    eg:
        print(which_letter("sample.jpeg"))
    """
    try:
        img_array = preprocess_image(IMG_PATH)
        preds= model.predict(img_array)
        preds *= 100
        most_likely_class_index = int(np.argmax(preds))
        return preds.max(), LABELS[most_likely_class_index]
    
    except Exception as e:
        print (e)
        return None
    
# conf, label = which_letter(IMG_PATH)
# print ("The predicted letter is {} with {} % confidence.".format(label,conf))

# Predictions

In [7]:
IMG_PATH = "data/train_data/A/A_bg_less2.jpg"

conf, label = which_letter(IMG_PATH)
print ("The predicted letter is {} with {} % confidence.".format(label,conf))




The predicted letter is A with 99.99952697753906 % confidence.


# Get Frames from Webcam and translate

In [None]:
window_name = "Speech Assistant"
frame_height, frame_width, roi_height, roi_width = 1200,1800,400,400
cap = cv2.VideoCapture(0)
cv2.namedWindow(window_name, cv2.WND_PROP_FULLSCREEN)
x_start, y_start = 200,200
sentence = ""

while True:
    ret, frame = cap.read()
    if ret is None:
        print ("No frame captured")
        continue
        
    # bounding box which captures ASL sign to be detected by the system
    cv2.rectangle(frame, (x_start, y_start), (x_start+roi_width, y_start+roi_height), (255,0,0),3)
    
    # Crop blue rectangular area(ROI)
    img1 = frame[y_start: y_start + roi_height, x_start: x_start+roi_width]
    
    img_ycrcb = cv2.cvtColor(img1, cv2.COLOR_BGR2YCR_CB)
    
    blur = cv2.GaussianBlur(img_ycrcb, (11,11),0)
    
    # lower  and upper skin color
    skin_ycrcb_min = np.array((0,138,67))
    skin_ycrcb_max = np.array((255,173,133))
    
    # detecting the hand in the bounding box    
    mask = cv2.inRange(blur, skin_ycrcb_min, skin_ycrcb_max)
    
    kernel = np.ones((2,2), dtype = np.uint8)
    
    # Fixes holes in foreground    
    mask = cv2.dilate(mask, kernel, iterations = 1)
    
    naya = cv2.bitwise_and(img1, img1, mask = mask)
    
    cv2.imshow("mask", mask)
    cv2.imshow("Region of Interest", naya)
    
    hand_bg_rm = naya
    hand = img1
    
    # Control Key    
    c = cv2.waitKey(1) & 0xff
    
    # Speak the sentence
    if len(sentence) > 0 and c == ord('s'):
        engine.say(sentence)
        engine.runAndWait()
    
    # Clear the sentence
    if c == ord('c') or c == ord('C'):
        sentence = ""
    
    # Delete the last character
    if c == ord('d') or c == ord('D'):
        sentence = sentence[:-1]
    
    # Put Space between words
    if c == ord('m') or c == ord('M'):
        sentence += ' '
        
    # If  valid hand area is cropped    
    if hand.shape[0] != 0 and hand.shape[1] != 0:
        conf, label = which_letter(hand_bg_rm)
    
        cv2.putText(frame, label, (90,90), cv2.FONT_HERSHEY_COMPLEX, 3.0 , (0,0,0))
        if c == ord('n') or c == ord('N'):
            sentence += label
        
    cv2.putText(frame, sentence, (90, 180), cv2.FONT_HERSHEY_COMPLEX, 3.0, (0,0,0))
    cv2.imshow(window_name, frame)
    
    # If pressed ESC break
    if c == 27:
        break
        
cap.release()
cv2.destroyAllWindows()
        