In [1]:
from ViolaJones import *

In [2]:
import matplotlib.pyplot as plt
from skimage.color import rgb2gray
import numpy as np
import skimage.io as io
import random
import cv2
import os
import matplotlib.patches as patches
from sklearn.metrics import classification_report
from collections import defaultdict

In [3]:
train_faces_count = 1000
train_objects_count = 1000

In [4]:
faces = []
files = os.listdir('faces2')

for i in tqdm(range(len(files))):
    img = io.imread(f"faces2/{files[i]}")
    if len(img.shape) > 2:
        img = rgb2gray(img)
    img = cv2.resize(img, (19, 19))
    if (img.max() > 1):
        img = img / 255
    faces.append(img)
    
print(f'Loaded {len(faces)} Face Images')

100%|██████████| 17475/17475 [01:09<00:00, 253.15it/s]

Loaded 17475 Face Images





In [5]:
objects = []
folders = os.listdir('objects')

for i in tqdm(range(len(folders))):
    for file in os.listdir(f"objects/{folders[i]}"):
        img = io.imread(f"objects/{folders[i]}/{file}")
        img = cv2.resize(img, (19, 19))
        if len(img.shape) > 2:
            img = rgb2gray(img)

        if (img.max() > 1):
            img = img / 255

        objects.append(img)
       
print(f'Loaded {len(objects)} Objects Images')

100%|██████████| 97/97 [00:31<00:00,  3.09it/s]

Loaded 7661 Objects Images





In [6]:
X_train = faces[:train_faces_count] + objects[:train_objects_count]
y_train = [1] * train_faces_count + [0] * train_objects_count

# shuffle data
c = list(zip(X_train, y_train))
random.shuffle(c)
X_train, y_train = zip(*c)
print(len(y_train))
print(len(X_train))

2000
2000


In [7]:
X_test = faces[train_faces_count:] + objects[train_objects_count:]
y_test = [1] * (len(faces) - train_faces_count) + [0]  * (len(objects) - train_objects_count)
print(len(y_test))
print(len(X_test))

23136
23136


In [8]:
clf = ViolaJones(X_train, y_train, train_faces_count, train_objects_count)

100%|██████████| 19/19 [00:00<00:00, 97.44it/s]
100%|██████████| 51705/51705 [03:24<00:00, 253.40it/s]


In [None]:
#clf = ViolaJones.load('V1')

In [24]:
stages1 = [20]
stages2 = [20]
stages3 = [30]
stages4 = [30]
stages5 = [30, 35]
stages6 = [40]

In [25]:
clf.train(stages2)

100%|██████████| 20/20 [20:38<00:00, 61.93s/it]


In [11]:
len(clf.stages)

1

In [26]:
def evaluate(clf, data):
    correct = 0
    y_hat = []
    for x, y in data:
        y_hat.append(clf.classify(x))
        correct += 1 if clf.classify(x) == y else 0
    print("Classified %d out of %d test examples" % (correct, len(data)))
    print(f"accuracy = {(correct / len(data)) * 100}%")
    return y_hat

In [27]:
train_data =  list(zip(X_train, y_train))
test_data = list(zip(X_test, y_test))

In [28]:
y_hat = evaluate(clf, train_data)

Classified 1988 out of 2000 test examples
accuracy = 99.4%


In [29]:
unique, counts = np.unique(y_hat, return_counts=True)
dict(zip(unique, counts))

{0: 1012, 1: 988}

In [30]:
_ = evaluate(clf, test_data)

Classified 19500 out of 23136 test examples
accuracy = 84.28423236514523%


In [31]:
print(classification_report(y_test, _))

              precision    recall  f1-score   support

           0       0.65      0.99      0.78      6661
           1       1.00      0.78      0.88     16475

    accuracy                           0.84     23136
   macro avg       0.82      0.89      0.83     23136
weighted avg       0.90      0.84      0.85     23136



In [None]:
zeros = {0: 0, 1: 0, 2 : 0}

for image in faces:
    for i, stage in enumerate(clf.stages):
        stage_pred = clf.classify_stage(
            image, stage[0], stage[1], False)
        if stage_pred == 0:
            zeros[i] = zeros[i] + 1
zeros

In [32]:
clf.save('test_3', True)

In [None]:
all_stages = clf.stages.copy()
len(all_stages)

In [None]:
#clf.stages = all_stages[:2]
clf.stages = all_stages

In [33]:
def scan_image(img, clf, window_x, window_y, stride_x, stride_y, scale):
    res = []
    img = cv2.resize(img, (img.shape[1] // scale, img.shape[0] // scale))
    x, y = img.shape
    for i in range(0, x - window_x + 1, stride_x):
        for j in range(0, y - window_y + 1, stride_y):
            temp = img[i : i + window_x, j : j + window_y]
            p = clf.classify(temp)
            
            if p == 1:
                res.append((i * scale, j * scale, (i + window_x) * scale, (j + window_y) * scale))
    return res

In [38]:
def get_faces(img, clf):
    window_x = 19
    window_y = 19
    stride_x = 3
    stride_y = 3
    pred = []

    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) / 255
    pred = pred + scan_image(gray, clf, window_x, window_y, stride_x, stride_y, 10)

    #img = cv2.resize(img, (150, 150))
    #window_x = 150 // 6
    #window_y = 150 // 5
    return pred #scan_image(img, clf, window_x, window_y, stride_x, stride_y, 2) 


In [39]:


cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Cannot open camera")
    exit()
while True:
    # Capture frame-by-frame
    ret, frame = cap.read()
    # if frame is read correctly ret is True
    if not ret:
        print("Can't receive frame (stream end?). Exiting ...")
        break

    #img = cv2.resize(frame, (300, 300))
    img = frame.copy()
    faces_out = get_faces(img, clf)
    #faces_out = [(200, 100, 200 + 180, 100 + 180)]
    for i, j, x, y in faces_out:
        cv2.rectangle(img, (j,i), (y, x), (255, 0, 0))

    # Display the resulting frame
    img = cv2.flip(img, 1)
    cv2.imshow('frame', img)
    if cv2.waitKey(1) == ord('a'):
        test_img = frame.copy()
    if cv2.waitKey(1) == ord('q'):
        break
# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()

In [None]:
cap.release()
cv2.destroyAllWindows()

In [None]:
#220 - 640
#19 - x

(640 / 220)

In [None]:
 # Our operations on the frame come here
img = cv2.resize(frame, (300, 300))
img = img[40:-40, :]
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) / 255
faces = get_faces(gray, clf)
for i, j in faces:
    cv2.rectangle(img, (i,j), (i + 60, j + 80), (255, 0, 0))