In [2]:
# ----------------------------------------------------------
# Audience Analysis Script using Hailo-8, DeGirum SDK, and PiCamera2
# ----------------------------------------------------------
# Models: Face Detection, Age, Gender, Emotion, Embedding
# Hardware: Raspberry Pi 5 + Hailo-8 + Camera Module 3
# Filename: 000_audience_analysis_live.ipynb
# Created date: 01 July 2025
# Last modified date: 06 July 2025
# Version: 1.0.0
# ----------------------------------------------------------

# ----------------------------------------------------------
# Sample JSON Output (Single Viewer Record)
# ----------------------------------------------------------
# {
#   "timestamp": "2025-07-13T03:42:46.614502Z",
#   "device": {
#     "mac_address": "88:A2:9E:1C:49:6F",
#     "hailo_temp_c": 51.58,
#     "coordinates": "3.1319N, 101.6841E",
#     "cpu_temp_c": 56.75,
#     "cpu_usage_percent": 2.0,
#     "ram_used_mb": 2055.5,
#     "ram_total_mb": 16219.31,
#     "disk_used_gb": 9.36,
#     "disk_total_gb": 113.76
#   },
#   "env": {
#     "temp_c": 29.24,
#     "humidity": 52.19,
#     "pressure_hPa": 1004.3,
#     "gas_resistance_ohms": 102400000.0
#   },
#   "viewer_id": "52419e40",
#   "summary": {
#     "appearance_count": 6,
#     "avg_age_est": 24.17,
#     "avg_age_score": 24.07,
#     "avg_gender_score": 0.87,
#     "majority_gender": "Female",
#     "avg_emotion_score": 0.95,
#     "majority_emotion": "neutral",
#     "avg_attention_duration": 5.4,
#     "gaze_at_screen_ratio": 0.33
#   }
# }
# 

import os
import time
import json
import uuid
import random
import logging
import numpy as np
import degirum as dg
import degirum_tools
import cv2
from picamera2 import Picamera2
from datetime import datetime
from logging.handlers import TimedRotatingFileHandler
import bme680
from scipy.optimize import linear_sum_assignment
from hailo_platform import Device
import psutil
import shutil
import sys

def handle_uncaught_exceptions(exc_type, exc_value, exc_traceback):
    if issubclass(exc_type, KeyboardInterrupt):
        sys.__excepthook__(exc_type, exc_value, exc_traceback)
        return
    logger.critical("Uncaught Exception", exc_info=(exc_type, exc_value, exc_traceback))

sys.excepthook = handle_uncaught_exceptions

# ----------------------------------------------------------
# Configuration
# ----------------------------------------------------------
preview_camera = False   # set True to see overlays on screen
console_output  = False  # set True to also log to console

inference_host_address = "@local"
zoo_url               = "../models"
token                 = ""
device_type           = "HAILORT/HAILO8"

face_det_model_name   = "retinaface_mobilenet--736x1280_quant_hailort_hailo8_1"
face_embed_model_name = "arcface_mobilefacenet--112x112_quant_hailort_hailo8_1"
age_model_name        = "yolov8n_relu6_age--256x256_quant_hailort_hailo8_1"
gender_model_name     = "yolov8n_relu6_fairface_gender--256x256_quant_hailort_hailo8_1"
emotion_model_name    = "emotion_recognition_fer2013--64x64_quant_hailort_multidevice_1"
# TDFFA model (for head pose / gaze estimation)
tddfa_model_name = "tddfa_mobilenet_v1--120x120_quant_hailort_hailo8_1"

EMB_DIM = 128  # adjust if your embedding is larger
SUMMARY_TIMEOUT_SEC = 10  # seconds without detection to log summary
viewer_summaries = {}     # holds stats per viewer

# ----------------------------------------------------------
# Logging Setup
# ----------------------------------------------------------
os.makedirs("../logs", exist_ok=True)
logger = logging.getLogger("audience_analysis_live")
logger.setLevel(logging.INFO)
logger.handlers.clear()

handler = TimedRotatingFileHandler(
    "../logs/audience_analysis_live.log",
    when="H", interval=1, backupCount=4, utc=True
)
handler.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(message)s"))
logger.addHandler(handler)

if console_output:
    ch = logging.StreamHandler()
    ch.setFormatter(handler.formatter)
    logger.addHandler(ch)



# ----------------------------------------------------------
# BME688 Setup
# ----------------------------------------------------------
def set_bme688_sensor(sensor):
    sensor.set_humidity_oversample(bme680.OS_2X)
    sensor.set_pressure_oversample(bme680.OS_4X)
    sensor.set_temperature_oversample(bme680.OS_8X)
    sensor.set_filter(bme680.FILTER_SIZE_3)
    sensor.set_gas_status(bme680.ENABLE_GAS_MEAS)

try:
    bme_sensor = bme680.BME680(bme680.I2C_ADDR_PRIMARY)
    set_bme688_sensor(bme_sensor)
except (RuntimeError, IOError):
    bme_sensor = bme680.BME680(bme680.I2C_ADDR_SECONDARY)
    set_bme688_sensor(bme_sensor)

def read_bme688_data():
    if bme_sensor.get_sensor_data():
        return {
            "temp_c": round(bme_sensor.data.temperature, 2),
            "humidity": round(bme_sensor.data.humidity, 2),
            "pressure_hPa": round(bme_sensor.data.pressure, 2),
            "gas_resistance_ohms": round(bme_sensor.data.gas_resistance, 2)
        }
    return {"temp_c": None, "humidity": None, "pressure_hPa": None, "gas_resistance_ohms": None}

# ----------------------------------------------------------
# Camera Streaming Class
# ----------------------------------------------------------
class CameraStream:
    def __init__(self, fps=5):
        self.picam2 = Picamera2()
        self.interval = 1.0 / fps
        self.picam2.configure(self.picam2.create_preview_configuration(
            main={"format": "RGB888"}
        ))
        self.picam2.start(show_preview=False)
        time.sleep(2)

    def __iter__(self):
        while True:
            start = time.time()
            frame = self.picam2.capture_array()
            yield frame
            elapsed = time.time() - start
            time.sleep(max(0, self.interval - elapsed))

    def stop(self):
        self.picam2.stop()

# ----------------------------------------------------------
# Utility Functions
# ----------------------------------------------------------
def cosine_distance(a: np.ndarray, b: np.ndarray) -> float:
    """1 - cosine similarity"""
    num = np.dot(a, b)
    den = np.linalg.norm(a) * np.linalg.norm(b)
    return 1.0 - (num / den) if den > 0 else 1.0

def draw_overlay(image, emo_res, age_res, gen_res):
    for i, r in enumerate(emo_res):
        try:
            x1, y1, x2, y2 = map(int, r.get("bbox", []))
            age   = round(age_res[i].get("score", 0))
            g_lbl = gen_res[i].get("label", "")
            g_sc  = gen_res[i].get("score", 0.0)
            emo   = emo_res[i].get("label", "")
            e_sc  = emo_res[i].get("score", 0.0)

            label = f"{g_lbl} ({g_sc:.2f}) | Age: {age} | {emo} ({e_sc:.2f})"
            cv2.rectangle(image, (x1, y1), (x2, y2), (0,255,255), 2)
            (w,h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.45, 1)
            cv2.rectangle(image, (x1,y1-22),(x1+w,y1),(0,255,255),-1)
            cv2.putText(image, label, (x1,y1-5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0,0,0),1)
        except:
            pass
    return image

def get_mac_address():
    mac = uuid.getnode()
    return ":".join(f"{(mac>>i)&0xff:02x}" for i in range(40,-1,-8)).upper()

mac_address = get_mac_address()

# Raspberry Pi system metrics
def get_rpi_system_metrics():
    import psutil, shutil

    # CPU temperature
    try:
        with open("/sys/class/thermal/thermal_zone0/temp", "r") as f:
            cpu_temp = int(f.read().strip()) / 1000.0
    except FileNotFoundError:
        cpu_temp = None

    # CPU usage (sampled over 0.5s)
    cpu_usage = psutil.cpu_percent(interval=0.5)

    # RAM
    mem = psutil.virtual_memory()
    ram_total_mb = round(mem.total / (1024 * 1024), 2)
    ram_used_mb = round(mem.used / (1024 * 1024), 2)

    # Disk
    disk = shutil.disk_usage("/")
    disk_total_gb = round(disk.total / (1024 * 1024 * 1024), 2)
    disk_used_gb = round(disk.used / (1024 * 1024 * 1024), 2)

    return {
        "cpu_temp_c": cpu_temp,
        "cpu_usage_percent": cpu_usage,
        "ram_used_mb": ram_used_mb,
        "ram_total_mb": ram_total_mb,
        "disk_used_gb": disk_used_gb,
        "disk_total_gb": disk_total_gb
    }

# Get Hailo-8 temperature(s)
def get_hailo_temp():
    temps = []
    devices = [Device(di) for di in Device.scan()]
    for dev in devices:
        temp_info = dev.control.get_chip_temperature()
        # Safely get only known attribute(s)
        # temps.append({
        #     "device_id": dev.device_id,
        #     "ts0_temp": temp_info.ts0_temperature
        # })
        return round(temp_info.ts0_temperature, 2)
    return temps

# ----------------------------------------------------------
# Run Inference Generator
# ----------------------------------------------------------
def run_inference(video_source):
    for frame in video_source:
        yield {
            "emotion": face_emotion_model.predict(frame),
            "age":     face_age_model.predict(frame),
            "gender":  face_gender_model.predict(frame),
            "embedding": face_embed_model_comp.predict(frame)
        }

# ----------------------------------------------------------
# ViewerTracker: IoU + Embedding Matching
# ----------------------------------------------------------
class ViewerTracker:
    def __init__(self, iou_threshold=0.3, emb_threshold=0.4,
                 w_iou=0.5, w_emb=0.5, timeout_sec=10):
        self.iou_thr = iou_threshold
        self.emb_thr = emb_threshold
        self.w_iou   = w_iou
        self.w_emb   = w_emb
        self.timeout = timeout_sec
        self.tracks  = {}  # id -> {'bbox', 'emb', 'last_seen'}

    @staticmethod
    def _iou(a, b):
        xA, yA = max(a[0],b[0]), max(a[1],b[1])
        xB, yB = min(a[2],b[2]), min(a[3],b[3])
        inter = max(0, xB-xA) * max(0, yB-yA)
        areaA = (a[2]-a[0])*(a[3]-a[1])
        areaB = (b[2]-b[0])*(b[3]-b[1])
        uni   = areaA + areaB - inter
        return inter/uni if uni>0 else 0

    def _clean(self):
        now = time.time()
        for tid in list(self.tracks):
            if now - self.tracks[tid]['last_seen'] > self.timeout:
                del self.tracks[tid]

    def update(self, det_bboxes, det_embs):
        self._clean()
        T = list(self.tracks.keys())
        N, M = len(T), len(det_bboxes)

        # no existing tracks → all new
        if N == 0:
            out = []
            for bb, emb in zip(det_bboxes, det_embs):
                nid = uuid.uuid4().hex[:8]
                self.tracks[nid] = {'bbox': bb, 'emb': emb,
                                     'last_seen': time.time()}
                out.append((nid, True))
            return out

        # build cost matrix
        cost = np.zeros((N, M), dtype=np.float32)
        for i, tid in enumerate(T):
            tb = self.tracks[tid]['bbox']
            te = self.tracks[tid]['emb']
            for j, (db, de) in enumerate(zip(det_bboxes, det_embs)):
                iou_score = self._iou(tb, db)
                emb_dist  = cosine_distance(te, de)
                cost[i,j] = self.w_iou*(1-iou_score) + self.w_emb*emb_dist

        # Hungarian assignment
        rows, cols = linear_sum_assignment(cost)
        results = [None]*M

        # accept matches under one of the thresholds
        for r, c in zip(rows, cols):
            tid = T[r]
            iou_score = self._iou(self.tracks[tid]['bbox'], det_bboxes[c])
            emb_dist  = cosine_distance(self.tracks[tid]['emb'], det_embs[c])
            logger.debug(f"[Tracker] comparing track {tid} ? det {c}: IoU={iou_score:.2f}, emb_dist={emb_dist:.2f}")
            if iou_score >= self.iou_thr or emb_dist <= self.emb_thr:
                self.tracks[tid].update({
                    'bbox': det_bboxes[c],
                    'emb':  det_embs[c],
                    'last_seen': time.time()
                })
                results[c] = (tid, False)
            else:
                logger.debug(f"[Tracker] rejecting match (IoU<{self.iou_thr} AND emb_dist>{self.emb_thr})")

        # unmatched → new
        for j in range(M):
            if results[j] is None:
                nid = uuid.uuid4().hex[:8]
                self.tracks[nid] = {
                    'bbox': det_bboxes[j],
                    'emb':  det_embs[j],
                    'last_seen': time.time()
                }
                results[j] = (nid, True)

        return results

# ----------------------------------------------------------
# Gaze
# ----------------------------------------------------------
def estimate_gaze_from_tddfa(frame, bbox, model, yaw_thresh=25, pitch_thresh=20):
    """
    Estimate if the viewer is looking at the screen based on head pose from TDFFA.

    Args:
        frame (np.ndarray): Full RGB frame
        bbox (list[int]): [x1, y1, x2, y2] face bounding box
        model (DeGirumModel): Loaded TDFFA model via DeGirum SDK
        yaw_thresh (float): Max yaw (degrees) to consider facing screen
        pitch_thresh (float): Max pitch (degrees) to consider facing screen

    Returns:
        bool: True if looking at screen
        float: yaw angle
        float: pitch angle
    """
    try:
        x1, y1, x2, y2 = map(int, bbox)
        face_crop = frame[y1:y2, x1:x2]
        if face_crop.shape[0] < 10 or face_crop.shape[1] < 10:
            return False, None, None

        resized = cv2.resize(face_crop, (120, 120))  # model expects 120x120
        result = model(resized)

        # Assume model output: [yaw, pitch, roll, ...]
        pose_data = result.results[0].get("data", [])
        if not pose_data or len(pose_data[0]) < 3:
            return False, None, None
            
        yaw = pose_data[0]
        pitch = pose_data[1]

        is_looking = abs(yaw) <= yaw_thresh and abs(pitch) <= pitch_thresh
        return is_looking, yaw, pitch

    except Exception as e:
        logger.debug(f"[Gaze] Error estimating gaze: {e}")
        return False, None, None

# ----------------------------------------------------------
# Load Models & Compound Pipelines
# ----------------------------------------------------------

try:
    # Load Face Detection Model
    face_det_model = dg.load_model(
        model_name=face_det_model_name,
        inference_host_address=inference_host_address,
        zoo_url=zoo_url,
        token=token,
        device_type=device_type
    )
    face_det_model.overlay_color = [(255, 255, 0), (0, 255, 0)]
    
    # Load Face Embedding Model
    face_embed_model = dg.load_model(
        model_name=face_embed_model_name,
        inference_host_address=inference_host_address,
        zoo_url=zoo_url,
        token=token,
        device_type=device_type
    )
    
    # Load Age Estimation Model
    age_model = dg.load_model(
        model_name=age_model_name,
        inference_host_address=inference_host_address,
        zoo_url=zoo_url,
        token=token,
        device_type=device_type
    )
    
    # Load Gender Classification Model
    gender_model = dg.load_model(
        model_name=gender_model_name,
        inference_host_address=inference_host_address,
        zoo_url=zoo_url,
        token=token,
        device_type=device_type
    )
    
    # Load Emotion Recognition Model
    emotion_model = dg.load_model(
        model_name=emotion_model_name,
        inference_host_address=inference_host_address,
        zoo_url=zoo_url,
        token=token,
        device_type=device_type
    )

    tddfa_model = dg.load_model(
        model_name=tddfa_model_name,
        inference_host_address=inference_host_address,
        zoo_url=zoo_url,
        token=token,
        device_type=device_type
    )
    
    face_emotion_model  = degirum_tools.CroppingAndClassifyingCompoundModel(face_det_model, emotion_model)
    face_age_model      = degirum_tools.CroppingAndClassifyingCompoundModel(face_det_model, age_model)
    face_gender_model   = degirum_tools.CroppingAndClassifyingCompoundModel(face_det_model, gender_model)
    face_embed_model_comp = degirum_tools.CroppingAndClassifyingCompoundModel(face_det_model, face_embed_model)
except Exception:
    logger.exception("Failed to load model")
    raise

# ----------------------------------------------------------
# Main Runtime
# ----------------------------------------------------------
video_source = CameraStream(fps=5)
tracker = ViewerTracker(
    iou_threshold=0.2,    # a little lower to catch small overlaps
    emb_threshold=0.6,    # since embeddings are normalized, distances ? [0,2]
    w_iou=0.5,
    w_emb=0.5,
    timeout_sec=10
)

try:
    with degirum_tools.Display("Audience Analysis") as disp:
        try:
            for results in run_inference(video_source):
                emo_res = results["emotion"].results
                age_res = results["age"].results
                gen_res = results["gender"].results
                emb_res = results["embedding"].results
    
                if not emo_res:
                    continue
    
                bboxes = [r["bbox"] for r in emo_res]
                embs   = [
                    np.array(r.get("data", [np.zeros(EMB_DIM)])[0], dtype=np.float32)
                    for r in emb_res
                ]
    
                assignments = tracker.update(bboxes, embs)
    
                frame    = results["emotion"].image
                env_data = read_bme688_data()
                min_len  = min(len(emo_res), len(age_res), len(gen_res), len(embs))
    
                if preview_camera:
                    frame = draw_overlay(frame, emo_res, age_res, gen_res)
                    disp.show(frame)
    
                # for i in range(min_len):
                #     vid, is_new = assignments[i]
                #     out = {
                #         "timestamp": datetime.utcnow().isoformat() + "Z",
                #         "device": {
                #             "mac_address": mac_address,
                #             "cpu_temp_c": get_rpi_cpu_temp(),
                #             "hailo_temp_c": get_hailo_temp(),
                #             "coordinates": "3.1319N, 101.6841E"
                #         },
                #         "env": env_data,
                #         "viewer_id": vid,
                #         "is_new_viewer": is_new,
                #         "age_est": round(age_res[i].get("score", 0)),
                #         "age_score": age_res[i].get("score", 0.0),
                #         "gender": gen_res[i].get("label", ""),
                #         "gender_score": gen_res[i].get("score", 0.0),
                #         "emotion": emo_res[i].get("label", ""),
                #         "emotion_score": emo_res[i].get("score", 0.0),
                #         "attention_duration": round(random.uniform(2.0, 7.5), 1),
                #         "gaze_at_screen": random.choice([True, False])
                #     }
                #     logger.info(json.dumps(out))
                now = time.time()
    
                for i in range(min_len):
                    vid, is_new = assignments[i]
                    age_est = round(age_res[i].get("score", 0))
                    age_score = age_res[i].get("score", 0.0)
                    gender = gen_res[i].get("label", "")
                    gender_score = gen_res[i].get("score", 0.0)
                    emotion = emo_res[i].get("label", "")
                    emotion_score = emo_res[i].get("score", 0.0)
                    attention_duration = round(random.uniform(2.0, 7.5), 1)
                    # gaze = random.choice([True, False])
                    bbox = bboxes[i]
                    gaze, yaw, pitch = estimate_gaze_from_tddfa(frame, bbox, tddfa_model)
                    logger.debug(f"[Gaze] Viewer {vid} | Yaw: {yaw:.2f}, Pitch: {pitch:.2f} | Looking: {gaze}")
                
                    if vid not in viewer_summaries:
                        viewer_summaries[vid] = {
                            "viewer_id": vid,
                            "count": 0,
                            "age_sum": 0.0,
                            "age_score_sum": 0.0,
                            "gender_score_sum": 0.0,
                            "emotion_score_sum": 0.0,
                            "attention_duration_sum": 0.0,
                            "gaze_count": 0,
                            "gender_votes": {},
                            "emotion_votes": {},
                            "last_seen": now
                        }
                
                    stats = viewer_summaries[vid]
                    stats["count"] += 1
                    stats["age_sum"] += age_est
                    stats["age_score_sum"] += age_score
                    stats["gender_score_sum"] += gender_score
                    stats["emotion_score_sum"] += emotion_score
                    stats["attention_duration_sum"] += attention_duration
                    stats["gaze_count"] += int(gaze)
                    stats["last_seen"] = now
                
                    # Vote count
                    stats["gender_votes"][gender] = stats["gender_votes"].get(gender, 0) + 1
                    stats["emotion_votes"][emotion] = stats["emotion_votes"].get(emotion, 0) + 1
                
                # -------- Timeout Summary Logging --------
                expired_viewers = []
                for vid, stats in viewer_summaries.items():
                    if now - stats["last_seen"] >= SUMMARY_TIMEOUT_SEC:
                        majority_gender = max(stats["gender_votes"], key=stats["gender_votes"].get, default=None)
                        majority_emotion = max(stats["emotion_votes"], key=stats["emotion_votes"].get, default=None)
                
                        avg_age = round(stats["age_sum"] / stats["count"], 2)
                        avg_age_score = round(stats["age_score_sum"] / stats["count"], 2)
                        avg_gender_score = round(stats["gender_score_sum"] / stats["count"], 2)
                        avg_emotion_score = round(stats["emotion_score_sum"] / stats["count"], 2)
                        avg_attention = round(stats["attention_duration_sum"] / stats["count"], 1)
                        gaze_ratio = round(stats["gaze_count"] / stats["count"], 2)
                
                        summary_out = {
                            "timestamp": datetime.utcnow().isoformat() + "Z",
                            "device": {
                                "mac_address": mac_address,
                                "hailo_temp_c": get_hailo_temp(),
                                "coordinates": "3.1319N, 101.6841E",
                                **get_rpi_system_metrics()
                            },
                            "env": env_data,
                            "viewer_id": vid,
                            "summary": {
                                "appearance_count": stats["count"],
                                "avg_age_est": avg_age,
                                "avg_age_score": avg_age_score,
                                "avg_gender_score": avg_gender_score,
                                "majority_gender": majority_gender,
                                "avg_emotion_score": avg_emotion_score,
                                "majority_emotion": majority_emotion,
                                "avg_attention_duration": avg_attention,
                                "gaze_at_screen_ratio": gaze_ratio
                            }
                        }
                
                        logger.info(json.dumps(summary_out))
                        expired_viewers.append(vid)
                
                # -------- Cleanup expired viewers --------
                for vid in expired_viewers:
                    del viewer_summaries[vid]
    
                if cv2.waitKey(1) & 0xFF == ord("q"):
                    break
                    
        except Exception as e:
            logger.exception("Error occurred during the inference loop")
            
except KeyboardInterrupt:
    logger.info("Execution interrupted by user.")
except Exception as e:
    logger.exception("Unhandled exception occurred in the main execution block")
finally:
    try:
        video_source.stop()
        cv2.destroyAllWindows()
        logger.info("Resources released. Exiting.")
    except Exception as e:
        logger.exception("Error occurred during resource cleanup.")


DegirumException: Model 'tddfa_mobilenet_v1--120x120_quant_hailort_hailo8_1' is not found in model zoo '../models'