In [1]:
# ----------------------------------------------------------
# Audience Analysis Script using Hailo-8, DeGirum SDK, and PiCamera2
# ----------------------------------------------------------
# Models: Face Detection (WiderFace + 5 keypoints), Age, Gender, Emotion, Embedding
# Hardware: Raspberry Pi 5 + Hailo-8 + Camera Module 3
# Filename: 000_audience_analysis_live.ipynb
# Created date: 01 July 2025
# Last modified date: 08 Aug 2025
# Version: 1.1.0
# ----------------------------------------------------------

import os
import time
import json
import uuid
import random
import logging
import numpy as np
import degirum as dg
import cv2
from picamera2 import Picamera2
from datetime import datetime
from logging.handlers import TimedRotatingFileHandler
import bme680
from scipy.optimize import linear_sum_assignment
from hailo_platform import Device
import psutil
import shutil
import sys

# ----------------------------------------------------------
# Exception Handling
# ----------------------------------------------------------
def handle_uncaught_exceptions(exc_type, exc_value, exc_traceback):
    if issubclass(exc_type, KeyboardInterrupt):
        sys.__excepthook__(exc_type, exc_value, exc_traceback)
        return
    logger.critical("Uncaught Exception", exc_info=(exc_type, exc_value, exc_traceback))

sys.excepthook = handle_uncaught_exceptions

# ----------------------------------------------------------
# Configuration
# ----------------------------------------------------------
preview_camera = True
console_output = True

inference_host_address = "@local"
zoo_url = "../models"
token = ""
device_type = "HAILORT/HAILO8"

# WiderFace 5-keypoint model - detects face + 5 facial landmarks
widerface_model_name = "yolov8n_relu6_widerface_kpts--640x640_quant_hailort_hailo8_1"

# Face embedding model - generates 128-dimensional feature vectors
face_embed_model_name = "arcface_mobilefacenet--112x112_quant_hailort_hailo8_1"

# Age estimation model
age_model_name = "yolov8n_relu6_age--256x256_quant_hailort_hailo8_1"

# Gender classification model
gender_model_name = "yolov8n_relu6_fairface_gender--256x256_quant_hailort_hailo8_1"

# Emotion recognition model
emotion_model_name = "emotion_recognition_fer2013--64x64_quant_hailort_multidevice_1"

EMB_DIM = 128
SUMMARY_TIMEOUT_SEC = 10
viewer_summaries = {}

# ----------------------------------------------------------
# Logging Setup
# ----------------------------------------------------------
os.makedirs("../logs", exist_ok=True)
logger = logging.getLogger("audience_analysis_live")
logger.setLevel(logging.INFO)
logger.handlers.clear()

handler = TimedRotatingFileHandler(
    "../logs/audience_analysis_live.log",
    when="H", interval=1, backupCount=4, utc=True
)
handler.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(message)s"))
logger.addHandler(handler)

if console_output:
    ch = logging.StreamHandler()
    ch.setFormatter(handler.formatter)
    logger.addHandler(ch)

# ----------------------------------------------------------
# BME688 Setup
# ----------------------------------------------------------
def set_bme688_sensor(sensor):
    sensor.set_humidity_oversample(bme680.OS_2X)
    sensor.set_pressure_oversample(bme680.OS_4X)
    sensor.set_temperature_oversample(bme680.OS_8X)
    sensor.set_filter(bme680.FILTER_SIZE_3)
    sensor.set_gas_status(bme680.ENABLE_GAS_MEAS)

try:
    bme_sensor = bme680.BME680(bme680.I2C_ADDR_PRIMARY)
    set_bme688_sensor(bme_sensor)
except (RuntimeError, IOError):
    bme_sensor = bme680.BME680(bme680.I2C_ADDR_SECONDARY)
    set_bme688_sensor(bme_sensor)

def read_bme688_data():
    if bme_sensor.get_sensor_data():
        return {
            "temp_c": round(bme_sensor.data.temperature, 2),
            "humidity": round(bme_sensor.data.humidity, 2),
            "pressure_hPa": round(bme_sensor.data.pressure, 2),
            "gas_resistance_ohms": round(bme_sensor.data.gas_resistance, 2)
        }
    return {"temp_c": None, "humidity": None, "pressure_hPa": None, "gas_resistance_ohms": None}

# ----------------------------------------------------------
# Camera Streaming
# ----------------------------------------------------------
class CameraStream:
    def __init__(self, fps=5):
        self.picam2 = Picamera2()
        self.interval = 1.0 / fps
        self.picam2.configure(self.picam2.create_preview_configuration(
            main={"format": "RGB888"}
        ))
        self.picam2.start(show_preview=False)
        time.sleep(2)

    def __iter__(self):
        while True:
            start = time.time()
            frame = self.picam2.capture_array()
            yield frame
            elapsed = time.time() - start
            time.sleep(max(0, self.interval - elapsed))

    def stop(self):
        self.picam2.stop()

# ----------------------------------------------------------
# Utilities
# ----------------------------------------------------------
def cosine_distance(a: np.ndarray, b: np.ndarray) -> float:
    num = np.dot(a, b)
    den = np.linalg.norm(a) * np.linalg.norm(b)
    return 1.0 - (num / den) if den > 0 else 1.0

def draw_overlay(image, emo_res, age_res, gen_res):
    for i, r in enumerate(emo_res):
        try:
            x1, y1, x2, y2 = map(int, r.get("bbox", []))
            age   = round(age_res[i].get("score", 0))
            g_lbl = gen_res[i].get("label", "")
            g_sc  = gen_res[i].get("score", 0.0)
            emo   = emo_res[i].get("label", "")
            e_sc  = emo_res[i].get("score", 0.0)

            label = f"{g_lbl} ({g_sc:.2f}) | Age: {age} | {emo} ({e_sc:.2f})"
            cv2.rectangle(image, (x1, y1), (x2, y2), (0,255,255), 2)
            (w,h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.45, 1)
            cv2.rectangle(image, (x1,y1-22),(x1+w,y1),(0,255,255),-1)
            cv2.putText(image, label, (x1,y1-5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0,0,0),1)
        except:
            pass
    return image

def get_mac_address():
    mac = uuid.getnode()
    return ":".join(f"{(mac>>i)&0xff:02x}" for i in range(40,-1,-8)).upper()

mac_address = get_mac_address()

def get_rpi_system_metrics():
    try:
        with open("/sys/class/thermal/thermal_zone0/temp", "r") as f:
            cpu_temp = int(f.read().strip()) / 1000.0
    except FileNotFoundError:
        cpu_temp = None

    cpu_usage = psutil.cpu_percent(interval=0.5)
    mem = psutil.virtual_memory()
    ram_total_mb = round(mem.total / (1024 * 1024), 2)
    ram_used_mb = round(mem.used / (1024 * 1024), 2)
    disk = shutil.disk_usage("/")
    disk_total_gb = round(disk.total / (1024 * 1024 * 1024), 2)
    disk_used_gb = round(disk.used / (1024 * 1024 * 1024), 2)

    return {
        "cpu_temp_c": cpu_temp,
        "cpu_usage_percent": cpu_usage,
        "ram_used_mb": ram_used_mb,
        "ram_total_mb": ram_total_mb,
        "disk_used_gb": disk_used_gb,
        "disk_total_gb": disk_total_gb
    }

def get_hailo_temp():
    devices = [Device(di) for di in Device.scan()]
    for dev in devices:
        temp_info = dev.control.get_chip_temperature()
        return round(temp_info.ts0_temperature, 2)
    return None

# ----------------------------------------------------------
# Head Pose / Gaze Estimation from 5 keypoints
# ----------------------------------------------------------
def _estimate_camera_matrix(height, width):
    f = 0.9 * width
    return np.array([[f, 0, width/2],
                     [0, f, height/2],
                     [0, 0, 1]], dtype=np.float32)

def _five_point_template_3d():
    return np.array([
        [-30.0,  35.0, 30.0],
        [ 30.0,  35.0, 30.0],
        [  0.0,   0.0, 60.0],
        [-25.0, -35.0, 20.0],
        [ 25.0, -35.0, 20.0],
    ], dtype=np.float32)

def head_pose_from_5pts(pts2d, frame_shape):
    h, w = frame_shape[:2]
    K = _estimate_camera_matrix(h, w)
    dist = np.zeros(5)
    pts3d = _five_point_template_3d()
    ok, rvec, tvec = cv2.solvePnP(pts3d, pts2d.astype(np.float32), K, dist, flags=cv2.SOLVEPNP_ITERATIVE)
    if not ok:
        return None, None, None
    R, _ = cv2.Rodrigues(rvec)
    sy = np.sqrt(R[0,0]**2 + R[1,0]**2)
    pitch = np.degrees(np.arctan2(-R[2,0], sy))
    yaw   = np.degrees(np.arctan2(R[1,0], R[0,0]))
    roll  = np.degrees(np.arctan2(R[2,1], R[2,2]))
    return yaw, pitch, roll

def gaze_on_screen_from_5pts(pts2d, frame_shape, yaw_thr=25, pitch_thr=20):
    yaw, pitch, roll = head_pose_from_5pts(pts2d, frame_shape)
    if yaw is None:
        return False, None, None
    return (abs(yaw) <= yaw_thr and abs(pitch) <= pitch_thr), yaw, pitch

# ----------------------------------------------------------
# Viewer Tracker
# ----------------------------------------------------------
class ViewerTracker:
    def __init__(self, iou_threshold=0.3, emb_threshold=0.4, w_iou=0.5, w_emb=0.5, timeout_sec=10):
        self.iou_thr = iou_threshold
        self.emb_thr = emb_threshold
        self.w_iou   = w_iou
        self.w_emb   = w_emb
        self.timeout = timeout_sec
        self.tracks  = {}

    @staticmethod
    def _iou(a, b):
        xA, yA = max(a[0],b[0]), max(a[1],b[1])
        xB, yB = min(a[2],b[2]), min(a[3],b[3])
        inter = max(0, xB-xA) * max(0, yB-yA)
        areaA = (a[2]-a[0])*(a[3]-a[1])
        areaB = (b[2]-b[0])*(b[3]-b[1])
        uni   = areaA + areaB - inter
        return inter/uni if uni>0 else 0

    def _clean(self):
        now = time.time()
        for tid in list(self.tracks):
            if now - self.tracks[tid]['last_seen'] > self.timeout:
                del self.tracks[tid]

    def update(self, det_bboxes, det_embs):
        self._clean()
        T = list(self.tracks.keys())
        N, M = len(T), len(det_bboxes)
        if N == 0:
            out = []
            for bb, emb in zip(det_bboxes, det_embs):
                nid = uuid.uuid4().hex[:8]
                self.tracks[nid] = {'bbox': bb, 'emb': emb, 'last_seen': time.time()}
                out.append((nid, True))
            return out
        cost = np.zeros((N, M), dtype=np.float32)
        for i, tid in enumerate(T):
            tb = self.tracks[tid]['bbox']
            te = self.tracks[tid]['emb']
            for j, (db, de) in enumerate(zip(det_bboxes, det_embs)):
                iou_score = self._iou(tb, db)
                emb_dist  = cosine_distance(te, de)
                cost[i,j] = self.w_iou*(1-iou_score) + self.w_emb*emb_dist
        rows, cols = linear_sum_assignment(cost)
        results = [None]*M
        for r, c in zip(rows, cols):
            tid = T[r]
            iou_score = self._iou(self.tracks[tid]['bbox'], det_bboxes[c])
            emb_dist  = cosine_distance(self.tracks[tid]['emb'], det_embs[c])
            if iou_score >= self.iou_thr or emb_dist <= self.emb_thr:
                self.tracks[tid].update({
                    'bbox': det_bboxes[c], 'emb': det_embs[c], 'last_seen': time.time()
                })
                results[c] = (tid, False)
        for j in range(M):
            if results[j] is None:
                nid = uuid.uuid4().hex[:8]
                self.tracks[nid] = {'bbox': det_bboxes[j], 'emb': det_embs[j], 'last_seen': time.time()}
                results[j] = (nid, True)
        return results

# ----------------------------------------------------------
# Load Models
# ----------------------------------------------------------
try:
    widerface_model = dg.load_model(
        model_name=widerface_model_name,
        inference_host_address=inference_host_address,
        zoo_url=zoo_url,
        token=token,
        device_type=device_type
    )
    
    face_embed_model = dg.load_model(
        model_name=face_embed_model_name,
        inference_host_address=inference_host_address,
        zoo_url=zoo_url,
        token=token,
        device_type=device_type
    )
    
    age_model = dg.load_model(
        model_name=age_model_name,
        inference_host_address=inference_host_address,
        zoo_url=zoo_url,
        token=token,
        device_type=device_type
    )
    
    gender_model = dg.load_model(
        model_name=gender_model_name,
        inference_host_address=inference_host_address,
        zoo_url=zoo_url,
        token=token,
        device_type=device_type
    )
    
    emotion_model = dg.load_model(
        model_name=emotion_model_name,
        inference_host_address=inference_host_address,
        zoo_url=zoo_url,
        token=token,
        device_type=device_type
    )
except Exception:
    logger.exception("Failed to load model")
    raise

# ----------------------------------------------------------
# Inference Loop
# ----------------------------------------------------------
def run_inference(video_source):
    for frame in video_source:
        try:
            det = widerface_model.predict(frame)
            faces_out = []
            for det_i in det.results:
                bbox = det_i.get("bbox", None)
                if not bbox:
                    continue
                x1, y1, x2, y2 = map(int, bbox)
                x1, y1 = max(0, x1), max(0, y1)
                x2, y2 = min(frame.shape[1], x2), min(frame.shape[0], y2)
                if x2 <= x1 or y2 <= y1:
                    continue
                crop = frame[y1:y2, x1:x2]
                kpts = det_i.get("kpts") or det_i.get("landmarks") or det_i.get("keypoints")
                kpts_np = None
                if kpts is not None:
                    kpts_np = np.array(kpts, dtype=np.float32)
                    if kpts_np.ndim == 1 and kpts_np.size == 10:
                        kpts_np = kpts_np.reshape(5, 2)
                emo = emotion_model.predict(crop).results[0] if crop.size else {}
                age = age_model.predict(crop).results[0] if crop.size else {}
                gen = gender_model.predict(crop).results[0] if crop.size else {}
                emb = face_embed_model.predict(crop).results[0] if crop.size else {}
                faces_out.append({"bbox": bbox, "kpts": kpts_np, "emotion": emo, "age": age,
                                  "gender": gen, "embedding": emb})
            yield {"frame": frame, "faces": faces_out}
        except Exception:
            logger.exception("Per-frame inference failed")
            time.sleep(0.2)
            continue

# ----------------------------------------------------------
# Main Runtime
# ----------------------------------------------------------
video_source = CameraStream(fps=5)
tracker = ViewerTracker(iou_threshold=0.2, emb_threshold=0.6, w_iou=0.5, w_emb=0.5, timeout_sec=10)

try:
    for payload in run_inference(video_source):
        frame = payload["frame"]
        faces = payload["faces"]
        if not faces:
            continue
        bboxes = [f["bbox"] for f in faces]
        embs = [
            np.array(f["embedding"].get("data", [np.zeros(EMB_DIM)])[0], dtype=np.float32)
            if f.get("embedding") else np.zeros(EMB_DIM, dtype=np.float32)
            for f in faces
        ]
        assignments = tracker.update(bboxes, embs)
        env_data = read_bme688_data()
        if preview_camera:
            emo_res = [f["emotion"] for f in faces]
            age_res = [f["age"] for f in faces]
            gen_res = [f["gender"] for f in faces]
            frame = draw_overlay(frame, emo_res, age_res, gen_res)
        now = time.time()
        for i, f in enumerate(faces):
            vid, is_new = assignments[i]
            age_est = round(f["age"].get("score", 0))
            age_score = f["age"].get("score", 0.0)
            gender = f["gender"].get("label", "")
            gender_score = f["gender"].get("score", 0.0)
            emotion = f["emotion"].get("label", "")
            emotion_score = f["emotion"].get("score", 0.0)
            attention_duration = round(random.uniform(2.0, 7.5), 1)
            if f["kpts"] is not None and f["kpts"].shape == (5, 2):
                gaze, yaw, pitch = gaze_on_screen_from_5pts(f["kpts"], frame.shape)
            else:
                gaze, yaw, pitch = False, None, None
            logger.debug(f"[Gaze] Viewer {vid} | Yaw: {yaw} Pitch: {pitch} | Looking: {gaze}")
            if vid not in viewer_summaries:
                viewer_summaries[vid] = {
                    "viewer_id": vid, "count": 0, "age_sum": 0.0, "age_score_sum": 0.0,
                    "gender_score_sum": 0.0, "emotion_score_sum": 0.0,
                    "attention_duration_sum": 0.0, "gaze_count": 0,
                    "gender_votes": {}, "emotion_votes": {}, "last_seen": now
                }
            stats = viewer_summaries[vid]
            stats["count"] += 1
            stats["age_sum"] += age_est
            stats["age_score_sum"] += age_score
            stats["gender_score_sum"] += gender_score
            stats["emotion_score_sum"] += emotion_score
            stats["attention_duration_sum"] += attention_duration
            stats["gaze_count"] += int(gaze)
            stats["last_seen"] = now
            stats["gender_votes"][gender] = stats["gender_votes"].get(gender, 0) + 1
            stats["emotion_votes"][emotion] = stats["emotion_votes"].get(emotion, 0) + 1
        expired_viewers = []
        for vid, stats in viewer_summaries.items():
            if now - stats["last_seen"] >= SUMMARY_TIMEOUT_SEC:
                majority_gender = max(stats["gender_votes"], key=stats["gender_votes"].get, default=None)
                majority_emotion = max(stats["emotion_votes"], key=stats["emotion_votes"].get, default=None)
                avg_age = round(stats["age_sum"] / stats["count"], 2)
                avg_age_score = round(stats["age_score_sum"] / stats["count"], 2)
                avg_gender_score = round(stats["gender_score_sum"] / stats["count"], 2)
                avg_emotion_score = round(stats["emotion_score_sum"] / stats["count"], 2)
                avg_attention = round(stats["attention_duration_sum"] / stats["count"], 1)
                gaze_ratio = round(stats["gaze_count"] / stats["count"], 2)
                summary_out = {
                    "timestamp": datetime.utcnow().isoformat() + "Z",
                    "device": {
                        "mac_address": mac_address,
                        "hailo_temp_c": get_hailo_temp(),
                        "coordinates": "3.1319N, 101.6841E",
                        **get_rpi_system_metrics()
                    },
                    "env": env_data,
                    "viewer_id": vid,
                    "summary": {
                        "appearance_count": stats["count"],
                        "avg_age_est": avg_age,
                        "avg_age_score": avg_age_score,
                        "avg_gender_score": avg_gender_score,
                        "majority_gender": majority_gender,
                        "avg_emotion_score": avg_emotion_score,
                        "majority_emotion": majority_emotion,
                        "avg_attention_duration": avg_attention,
                        "gaze_at_screen_ratio": gaze_ratio
                    }
                }
                logger.info(json.dumps(summary_out))
                expired_viewers.append(vid)
        for vid in expired_viewers:
            del viewer_summaries[vid]
        if preview_camera:
            cv2.imshow("Audience Analysis", frame)
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
except KeyboardInterrupt:
    logger.info("Execution interrupted by user.")
finally:
    try:
        video_source.stop()
        cv2.destroyAllWindows()
        logger.info("Resources released. Exiting.")
    except Exception:
        logger.exception("Error occurred during resource cleanup.")




[0:10:22.853769855] [29189] [1;32m INFO [1;37mCamera [1;34mcamera_manager.cpp:330 [0mlibcamera v0.5.2+99-bfd68f78
[0:10:22.861131391] [29543] [1;32m INFO [1;37mRPI [1;34mpisp.cpp:720 [0mlibpisp version v1.2.1 981977ff21f3 29-04-2025 (14:13:50)
[0:10:22.863709891] [29543] [1;32m INFO [1;37mIPAProxy [1;34mipa_proxy.cpp:180 [0mUsing tuning file /usr/share/libcamera/ipa/rpi/pisp/imx708.json
[0:10:22.870858166] [29543] [1;32m INFO [1;37mCamera [1;34mcamera_manager.cpp:220 [0mAdding camera '/base/axi/pcie@1000120000/rp1/i2c@88000/imx708@1a' for pipeline handler rpi/pisp
[0:10:22.870873111] [29543] [1;32m INFO [1;37mRPI [1;34mpisp.cpp:1179 [0mRegistered camera /base/axi/pcie@1000120000/rp1/i2c@88000/imx708@1a to CFE device /dev/media0 and ISP device /dev/media2 using PiSP variant BCM2712_D0
[0:10:22.874672768] [29189] [1;32m INFO [1;37mCamera [1;34mcamera.cpp:1215 [0mconfiguring streams: (0) 640x480-RGB888/sRGB (1) 1536x864-BGGR_PISP_COMP1/RAW
[0:10:22.874767454] [2954