In [1]:
# ==========================================================================
# CELL 1: DRIVER CLASS (Run this first)
# ==========================================================================
from pynq import Overlay, allocate
import numpy as np
import time

class FisherfaceDriver:
    def __init__(self, bitfile_path, max_classes=5):
        print(f"Loading overlay: {bitfile_path}")
        self.overlay = Overlay(bitfile_path)
        self.dma = self.overlay.axi_dma_0
        self.ip = self.overlay.fisherface_accel_0
        
        self.VECTOR_SIZE = 10000
        self.MAX_CLASSES = max_classes
        self.HW_SCALE = 128.0
        self.SW_BOOST = 20.0
        self.TOTAL_SCALE = self.HW_SCALE * self.SW_BOOST
        
        self.input_buffer = allocate(shape=(self.VECTOR_SIZE,), dtype=np.int32)
        self.num_classes = 0
        print(f"Driver initialized. MAX_CLASSES={self.MAX_CLASSES}")
    
    def _send_chunked(self, data_array, scale):
        flat_data = data_array.flatten().astype(np.float64)
        quantized = (flat_data * scale).astype(np.int32)
        np.copyto(self.input_buffer, quantized)
        
        CHUNK_SIZE = 4000
        for i in range(0, self.VECTOR_SIZE, CHUNK_SIZE):
            end = min(i + CHUNK_SIZE, self.VECTOR_SIZE)
            self.dma.sendchannel.transfer(self.input_buffer[i:end])
            self.dma.sendchannel.wait()
    
    def load_mean(self, mean_vector):
        assert len(mean_vector) == self.VECTOR_SIZE
        self.ip.register_map.mode = 1
        self.ip.register_map.class_id = 0
        self.ip.register_map.CTRL.AP_START = 1
        self._send_chunked(mean_vector, self.HW_SCALE)
        print("Mean vector loaded.")
    
    def load_weights(self, weight_matrix):
        if weight_matrix.shape[0] == self.VECTOR_SIZE:
            num_classes = weight_matrix.shape[1] if weight_matrix.ndim > 1 else 1
        elif weight_matrix.shape[1] == self.VECTOR_SIZE:
            weight_matrix = weight_matrix.T
            num_classes = weight_matrix.shape[1]
        else:
            raise ValueError(f"Invalid weight shape: {weight_matrix.shape}")
        
        if weight_matrix.ndim == 1:
            weight_matrix = weight_matrix.reshape(-1, 1)
            num_classes = 1
        
        if num_classes > self.MAX_CLASSES:
            print(f"WARNING: Truncating {num_classes} to {self.MAX_CLASSES} classes")
            num_classes = self.MAX_CLASSES
            weight_matrix = weight_matrix[:, :self.MAX_CLASSES]
        
        for c in range(num_classes):
            self.ip.register_map.mode = 2
            self.ip.register_map.class_id = c
            self.ip.register_map.CTRL.AP_START = 1
            self._send_chunked(weight_matrix[:, c], self.TOTAL_SCALE)
            print(f"  Class {c} weights loaded.")
        
        self.num_classes = num_classes
        print(f"All {num_classes} weight vectors loaded.")
    
    def inference(self, face_vector):
        assert len(face_vector) == self.VECTOR_SIZE
        
        self.ip.register_map.mode = 0
        self.ip.register_map.class_id = 0
        self.ip.register_map.CTRL.AP_START = 1
        
        self._send_chunked(face_vector, self.HW_SCALE)
        
        while self.ip.register_map.CTRL.AP_DONE == 0:
            pass
        
        ACC_SCALE = 4096.0
        results = np.zeros(self.num_classes, dtype=np.float64)
        OUTPUT_BASE_OFFSET = 0x20
        
        for c in range(self.num_classes):
            raw_int = self.ip.mmio.read(OUTPUT_BASE_OFFSET + c * 4)
            if raw_int >= 0x80000000:
                raw_int -= 0x100000000
            results[c] = raw_int / (ACC_SCALE * self.SW_BOOST)
        
        return results

In [23]:
"""
==========================================================================
CELL 2: LOAD MODEL & INITIALIZE HARDWARE (Run after Cell 1)
==========================================================================
"""
print("=" * 60)
print("Loading Model Data...")
print("=" * 60)

data = np.load("face_model_data.npz", allow_pickle=True)

mean_vec = data['mean_vec']
eigen_vecs = data['eigen_vecs']
train_proj_db = data['train_proj_db']
train_lbls_db = data['train_lbls_db']

# Handle label_names storage format
if 'label_keys' in data:
    keys = data['label_keys']
    vals = data['label_vals']
    label_names = dict(zip(keys, vals))
else:
    label_names = data['label_names'].item()

print(f"Model loaded:")
print(f"  - eigen_vecs shape: {eigen_vecs.shape}")
print(f"  - train_proj_db shape: {train_proj_db.shape}")
print(f"  - Classes: {list(label_names.values())}")

# Initialize driver
num_classes = eigen_vecs.shape[1] if eigen_vecs.ndim > 1 else 1
driver = FisherfaceDriver("fisherface.bit", max_classes=max(num_classes, 5))

# Load parameters to FPGA
driver.load_mean(mean_vec)
driver.load_weights(eigen_vecs)

print("=" * 60)
print("HARDWARE READY - All classes loaded!")
print("=" * 60)

Loading Model Data...
Model loaded:
  - eigen_vecs shape: (10000, 2)
  - train_proj_db shape: (1035, 2)
  - Classes: ['993', 'peidong', 'unknown']
Loading overlay: fisherface.bit
Driver initialized. MAX_CLASSES=5
Mean vector loaded to FPGA.
  Class 0 weights loaded.
  Class 1 weights loaded.
All 2 weight vectors loaded to FPGA.
HARDWARE READY - All classes loaded!


In [3]:
# Quick test - should return array of 4 values
test_face = np.random.rand(10000) * 255
result = driver.inference(test_face)
print(f"Inference result shape: {result.shape}")
print(f"Values: {result}")

Inference result shape: (2,)
Values: [-1440.72836914  -443.77095947]


In [24]:
# # ==========================================================================
# # CELL 3-BENCHMARK: TIMING ANALYSIS (Run after Cell 2)
# # ==========================================================================
# # This measures exact timing for each stage of face recognition

# import cv2
# import numpy as np
# import time

# # --------------------------------------------------------------
# # CONFIGURATION
# # --------------------------------------------------------------
# IMG_SIZE = (100, 100)
# THRESHOLD = 400.0
# SCALE_FACTOR = 1.0

# # --------------------------------------------------------------
# # SETUP
# # --------------------------------------------------------------
# cascade_path = 'haarcascade_frontalface_default.xml'
# face_cascade = cv2.CascadeClassifier(cascade_path)

# if face_cascade.empty():
#     print("ERROR: Cascade XML not found!")
# else:
#     print("Face cascade loaded.")

# def preprocess(img):
#     g = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if img.ndim == 3 else img
#     g = cv2.resize(g, IMG_SIZE, interpolation=cv2.INTER_AREA)
#     g = cv2.equalizeHist(g)
#     g = cv2.normalize(g, None, 0, 255, cv2.NORM_MINMAX)
#     return g

# # --------------------------------------------------------------
# # OPEN CAMERA
# # --------------------------------------------------------------
# print("\nOpening camera...")
# cap = cv2.VideoCapture(0, cv2.CAP_V4L2)
# cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
# cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)

# if not cap.isOpened():
#     print("ERROR: Could not open camera.")
# else:
#     print("Camera opened successfully.")
    
#     # Warm up camera (discard first few frames)
#     for _ in range(10):
#         cap.read()
    
#     # ==============================================================
#     # BENCHMARK 1: Single Frame Breakdown (with face detected)
#     # ==============================================================
#     print("\n" + "=" * 60)
#     print("BENCHMARK 1: Single Frame Timing Breakdown")
#     print("=" * 60)
#     print("Waiting for face detection...")
    
#     # Find a frame with a face
#     face_found = False
#     attempts = 0
#     while not face_found and attempts < 100:
#         ret, frame = cap.read()
#         if not ret:
#             continue
#         gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
#         faces = face_cascade.detectMultiScale(gray, 1.1, 5)
#         if len(faces) > 0:
#             face_found = True
#             test_frame = frame.copy()
#             (x, y, w, h) = faces[0]
#         attempts += 1
    
#     if face_found:
#         print(f"Face detected at ({x}, {y}, {w}, {h})")
#         print("-" * 60)
        
#         # Time each stage separately
        
#         # Stage 1: Haar Cascade Detection
#         t0 = time.perf_counter()
#         gray = cv2.cvtColor(test_frame, cv2.COLOR_BGR2GRAY)
#         faces = face_cascade.detectMultiScale(gray, 1.1, 5)
#         t_haar = time.perf_counter() - t0
        
#         # Stage 2: Preprocessing
#         t0 = time.perf_counter()
#         face_roi = test_frame[y:y+h, x:x+w]
#         processed_face = preprocess(face_roi)
#         flat_face = processed_face.reshape(-1).astype(np.float64)
#         t_preprocess = time.perf_counter() - t0
        
#         # Stage 3: FPGA Inference
#         t0 = time.perf_counter()
#         test_proj = driver.inference(flat_face)
#         t_fpga = time.perf_counter() - t0
        
#         # Stage 4: Distance Calculation
#         t0 = time.perf_counter()
#         dists = np.linalg.norm(train_proj_db - test_proj, axis=1)
#         min_index = np.argmin(dists)
#         min_dist = dists[min_index]
#         predicted_lbl = train_lbls_db[min_index]
#         t_distance = time.perf_counter() - t0
        
#         # Total (excluding Haar since face already detected)
#         t_after_detection = t_preprocess + t_fpga + t_distance
#         t_total = t_haar + t_after_detection
        
#         print(f"  Haar Cascade:        {t_haar*1000:8.2f} ms")
#         print(f"  Preprocessing:       {t_preprocess*1000:8.2f} ms")
#         print(f"  FPGA Inference:      {t_fpga*1000:8.2f} ms")
#         print(f"  Distance Calc:       {t_distance*1000:8.2f} ms")
#         print("-" * 60)
#         print(f"  After Detection:     {t_after_detection*1000:8.2f} ms")
#         print(f"  TOTAL (incl Haar):   {t_total*1000:8.2f} ms")
#     else:
#         print("ERROR: No face detected in 100 attempts. Please face the camera.")
    
#     # ==============================================================
#     # BENCHMARK 2: N Iterations (Full Pipeline)
#     # ==============================================================
#     for N in [10, 100]:
#         print("\n" + "=" * 60)
#         print(f"BENCHMARK 2: {N} Iterations (Full Pipeline)")
#         print("=" * 60)
        
#         successful_frames = 0
#         total_time = 0
        
#         t_haar_total = 0
#         t_preprocess_total = 0
#         t_fpga_total = 0
#         t_distance_total = 0
        
#         print(f"Processing {N} frames with face detection...")
        
#         start_all = time.perf_counter()
        
#         while successful_frames < N:
#             ret, frame = cap.read()
#             if not ret:
#                 continue
            
#             # Haar Detection
#             t0 = time.perf_counter()
#             gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
#             faces = face_cascade.detectMultiScale(gray, 1.1, 5)
#             t_haar_total += time.perf_counter() - t0
            
#             if len(faces) > 0:
#                 (x, y, w, h) = max(faces, key=lambda r: r[2] * r[3])
                
#                 if y + h < frame.shape[0] and x + w < frame.shape[1]:
#                     # Preprocessing
#                     t0 = time.perf_counter()
#                     face_roi = frame[y:y+h, x:x+w]
#                     processed_face = preprocess(face_roi)
#                     flat_face = processed_face.reshape(-1).astype(np.float64)
#                     t_preprocess_total += time.perf_counter() - t0
                    
#                     # FPGA Inference
#                     t0 = time.perf_counter()
#                     test_proj = driver.inference(flat_face)
#                     t_fpga_total += time.perf_counter() - t0
                    
#                     # Distance Calculation
#                     t0 = time.perf_counter()
#                     dists = np.linalg.norm(train_proj_db - test_proj, axis=1)
#                     min_index = np.argmin(dists)
#                     min_dist = dists[min_index]
#                     t_distance_total += time.perf_counter() - t0
                    
#                     successful_frames += 1
        
#         end_all = time.perf_counter()
#         total_time = end_all - start_all
        
#         print("-" * 60)
#         print(f"  Frames processed:    {successful_frames}")
#         print(f"  Total wall time:     {total_time*1000:8.2f} ms")
#         print("-" * 60)
#         print(f"  Haar Cascade (sum):  {t_haar_total*1000:8.2f} ms ({t_haar_total/N*1000:.2f} ms/frame)")
#         print(f"  Preprocessing (sum): {t_preprocess_total*1000:8.2f} ms ({t_preprocess_total/N*1000:.2f} ms/frame)")
#         print(f"  FPGA Inference (sum):{t_fpga_total*1000:8.2f} ms ({t_fpga_total/N*1000:.2f} ms/frame)")
#         print(f"  Distance Calc (sum): {t_distance_total*1000:8.2f} ms ({t_distance_total/N*1000:.2f} ms/frame)")
#         print("-" * 60)
#         print(f"  Avg per frame:       {total_time/N*1000:8.2f} ms")
#         print(f"  Effective FPS:       {N/total_time:8.2f} fps")
    
#     # ==============================================================
#     # SUMMARY
#     # ==============================================================
#     print("\n" + "=" * 60)
#     print("SUMMARY")
#     print("=" * 60)
#     print("The bottleneck is Haar Cascade running on ARM CPU.")
#     print("FPGA accelerates the projection step effectively.")
#     print("\nTo speed up, try:")
#     print("  - SCALE_FACTOR = 0.5 (4x faster Haar)")
#     print("  - BATCH_SIZE = 30 (faster results)")
#     print("=" * 60)
    
#     cap.release()
#     print("\nCamera released. Benchmark complete!")

In [5]:
"""
==========================================================================
CELL 3: FACE RECOGNITION WITH VOTING (Run after Cell 2)
==========================================================================
"""
import cv2
import ipywidgets as widgets
from IPython.display import display
import threading
from collections import Counter

# --------------------------------------------------------------
# CONFIGURATION
# --------------------------------------------------------------
IMG_SIZE = (100, 100)
THRESHOLD = 450.0
SCALE_FACTOR = 1.0
BATCH_SIZE = 10

# --------------------------------------------------------------
# SETUP HAAR CASCADE
# --------------------------------------------------------------
cascade_path = 'haarcascade_frontalface_default.xml'
face_cascade = cv2.CascadeClassifier(cascade_path)

if face_cascade.empty():
    print("WARNING: Cascade XML not found!")
    class DummyCascade:
        def detectMultiScale(self, *args, **kwargs): 
            return []
    face_cascade = DummyCascade()
else:
    print("Face cascade loaded.")

# --------------------------------------------------------------
# GLOBAL VARIABLES
# --------------------------------------------------------------
current_frame = None
latest_result_box = None
latest_result_name = ""
latest_result_color = (255, 255, 255)
system_running = True
frame_lock = threading.Lock()

# --------------------------------------------------------------
# HELPER FUNCTIONS
# --------------------------------------------------------------
def preprocess(img):
    g = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if img.ndim == 3 else img
    g = cv2.resize(g, IMG_SIZE, interpolation=cv2.INTER_AREA)
    g = cv2.equalizeHist(g)
    g = cv2.normalize(g, None, 0, 255, cv2.NORM_MINMAX)
    return g

# --------------------------------------------------------------
# AI WORKER THREAD
# --------------------------------------------------------------
def ai_processing_worker():
    global latest_result_box, latest_result_name, latest_result_color, current_frame
    
    prediction_buffer = []
    print("[THREAD] FPGA AI Worker Started")
    
    while system_running:
        frame_to_process = None
        with frame_lock:
            if current_frame is not None:
                frame_to_process = current_frame.copy()
        
        if frame_to_process is None:
            time.sleep(0.05)
            continue
        
        if SCALE_FACTOR != 1.0:
            small_frame = cv2.resize(frame_to_process, (0, 0), 
                                     fx=SCALE_FACTOR, fy=SCALE_FACTOR)
        else:
            small_frame = frame_to_process
        
        gray = cv2.cvtColor(small_frame, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray, 1.1, 5)
        
        if len(faces) > 0:
            largest_face = max(faces, key=lambda r: r[2] * r[3])
            (sx, sy, sw, sh) = largest_face
            
            scale_inv = 1.0 / SCALE_FACTOR
            x = int(sx * scale_inv)
            y = int(sy * scale_inv)
            w = int(sw * scale_inv)
            h = int(sh * scale_inv)
            
            if y + h < frame_to_process.shape[0] and x + w < frame_to_process.shape[1]:
                face_roi = cv2.cvtColor(frame_to_process[y:y+h, x:x+w], 
                                        cv2.COLOR_BGR2GRAY)
                processed_face = preprocess(face_roi)
                flat_face = processed_face.reshape(-1).astype(np.float64)
                
                # FPGA INFERENCE
                test_proj = driver.inference(flat_face)
                
                # Distance calculation
                dists = np.linalg.norm(train_proj_db - test_proj, axis=1)
                min_index = np.argmin(dists)
                min_dist = dists[min_index]
                predicted_lbl = train_lbls_db[min_index]
                
                if min_dist > THRESHOLD:
                    instant_name = "Unknown"
                else:
                    instant_name = label_names.get(predicted_lbl, "Unknown")
                    if instant_name.lower() == "unknown":
                        instant_name = "Unknown"
                
                # VOTING LOGIC
                prediction_buffer.append(instant_name)
                
                if len(prediction_buffer) < BATCH_SIZE:
                    latest_result_name = f"Analyzing... {len(prediction_buffer)}/{BATCH_SIZE}"
                    latest_result_color = (200, 200, 200)
                else:
                    counts = Counter(prediction_buffer)
                    if not counts:
                        winner_name, vote_count = "Unknown", 0
                    else:
                        winner_name, vote_count = counts.most_common(1)[0]
                    
#                     if winner_name == "Unknown":
#                         latest_result_name = "Unknown"
#                         latest_result_color = (0, 0, 255)
#                     elif vote_count >= 80:
#                         latest_result_name = f"{winner_name} ({vote_count}%)"
#                         latest_result_color = (0, 255, 0)
#                     elif vote_count >= 50:
#                         latest_result_name = f"Maybe {winner_name}? ({vote_count}%)"
#                         latest_result_color = (0, 255, 255)
#                     else:
#                         latest_result_name = "Unknown (Low Conf)"
#                         latest_result_color = (0, 0, 255)
                    # NEW CODE (for 10 frames):
                    if winner_name == "Unknown":
                        latest_result_name = "Unknown"
                        latest_result_color = (0, 0, 255)
                    elif vote_count >= 8:  # 8,9,10 → Recognized
                        latest_result_name = f"{winner_name} ({vote_count}/{BATCH_SIZE})"
                        latest_result_color = (0, 255, 0)
                    elif vote_count >= 5:  # 5,6,7 → Maybe
                        latest_result_name = f"Maybe {winner_name}? ({vote_count}/{BATCH_SIZE})"
                        latest_result_color = (0, 255, 255)
                    else:  # 0,1,2,3,4 → Unknown
                        latest_result_name = "Unknown (Low Conf)"
                        latest_result_color = (0, 0, 255)
                    
                    print(f">>> [RESULT] {latest_result_name}")
                    prediction_buffer = []
                
                latest_result_box = (x, y, w, h)
        else:
            prediction_buffer = []
            latest_result_box = None
        
        time.sleep(0.01)

# --------------------------------------------------------------
# MAIN DISPLAY LOOP
# --------------------------------------------------------------
image_widget = widgets.Image(format='jpeg', width=640, height=480)
display(image_widget)

cap = cv2.VideoCapture(0, cv2.CAP_V4L2)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)

if not cap.isOpened():
    print("ERROR: Could not open camera.")
else:
    system_running = True
    worker_thread = threading.Thread(target=ai_processing_worker)
    worker_thread.start()
    print("[INFO] System Running! Press 'Stop' in Jupyter to end.")
    
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            
            with frame_lock:
                current_frame = frame
            
            if latest_result_box is not None:
                (x, y, w, h) = latest_result_box
                cv2.rectangle(frame, (x, y), (x+w, y+h), latest_result_color, 2)
                cv2.putText(frame, latest_result_name, (x, y-10),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.8, latest_result_color, 2)
            
            _, encoded = cv2.imencode('.jpg', frame, 
                                      [int(cv2.IMWRITE_JPEG_QUALITY), 50])
            image_widget.value = encoded.tobytes()
            time.sleep(0.001)
    
    except KeyboardInterrupt:
        print("Stopping...")
    
    finally:
        system_running = False
        worker_thread.join()
        cap.release()
        print("[INFO] System Stopped")

Face cascade loaded.


Image(value=b'', format='jpeg', height='480', width='640')

[THREAD] FPGA AI Worker Started
[INFO] System Running! Press 'Stop' in Jupyter to end.
>>> [RESULT] Unknown
>>> [RESULT] peidong (8/10)
>>> [RESULT] 993 (9/10)
Stopping...
[INFO] System Stopped


LBP Cascade loaded successfully from lbpcascade_frontalface_improved.xml


Image(value=b'', format='jpeg', height='480', width='640')

[THREAD] FPGA AI Worker Started
[INFO] System Running! Press 'Stop' in Jupyter to end.


Exception in thread Thread-6 (ai_processing_worker):
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1009, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 946, in run
    self._target(*self._args, **self._kwargs)
  File "/tmp/ipykernel_3445/897820363.py", line 110, in ai_processing_worker
ValueError: operands could not be broadcast together with shapes (1074,4) (2,) 


Stopping...
[INFO] System Stopped
