In [None]:
# ----------------------------------------------------------
# Audience Analysis Script using Hailo-8, DeGirum SDK, and PiCamera2
# ----------------------------------------------------------
# Models: Face Detection, Age, Gender, Emotion, Embedding
# Hardware: Raspberry Pi 5 + Hailo-8 + Camera Module 3
# Filename: 000_audience_analysis_live.ipynb
# Created date: 01 July 2025
# Last modified date: 06 July 2025
# Version: 1.0.0
# ----------------------------------------------------------

# ----------------------------------------------------------
# Sample JSON Output (Single Viewer Record)
# ----------------------------------------------------------
# {
#   "timestamp": "2025-07-10T15:48:27.124567Z",
#   "location": {
#     "mac_address": "88:A2:9E:1C:49:6F",
#     "coordinates": "3.1319N, 101.6841E"
#   },
#   "env": {
#     "temp_c": 29.75,
#     "humidity": 63.12,
#     "pressure_hPa": 1008.27,
#     "gas_resistance_ohms": 12105.89
#   },
#   "viewer_id": "3c4f9a7e",
#   "age_est": 32,
#   "age_score": 31.78,
#   "gender": "Male",
#   "gender_score": 0.95,
#   "emotion": "happy",
#   "emotion_score": 0.91,
#   "attention_duration": 5.3,
#   "gaze_at_screen": true
# }

import os
import time
import json
import uuid
import random
import logging
import numpy as np
import degirum as dg
import degirum_tools
import cv2
from picamera2 import Picamera2
from datetime import datetime
from logging.handlers import TimedRotatingFileHandler
import bme680
from scipy.optimize import linear_sum_assignment

# ----------------------------------------------------------
# Configuration
# ----------------------------------------------------------
preview_camera = False   # set True to see overlays on screen
console_output  = False  # set True to also log to console

inference_host_address = "@local"
zoo_url               = "../models"
token                 = ""
device_type           = "HAILORT/HAILO8"

face_det_model_name   = "retinaface_mobilenet--736x1280_quant_hailort_hailo8_1"
face_embed_model_name = "arcface_mobilefacenet--112x112_quant_hailort_hailo8_1"
age_model_name        = "yolov8n_relu6_age--256x256_quant_hailort_hailo8_1"
gender_model_name     = "yolov8n_relu6_fairface_gender--256x256_quant_hailort_hailo8_1"
emotion_model_name    = "emotion_recognition_fer2013--64x64_quant_hailort_multidevice_1"

EMB_DIM = 128  # adjust if your embedding is larger

# ----------------------------------------------------------
# Logging Setup
# ----------------------------------------------------------
os.makedirs("../logs", exist_ok=True)
logger = logging.getLogger("audience_analysis_live")
logger.setLevel(logging.DEBUG)
logger.handlers.clear()

handler = TimedRotatingFileHandler(
    "../logs/audience_analysis_live.log",
    when="H", interval=1, backupCount=4, utc=True
)
handler.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(message)s"))
logger.addHandler(handler)

if console_output:
    ch = logging.StreamHandler()
    ch.setFormatter(handler.formatter)
    logger.addHandler(ch)

# ----------------------------------------------------------
# BME688 Setup
# ----------------------------------------------------------
def set_bme688_sensor(sensor):
    sensor.set_humidity_oversample(bme680.OS_2X)
    sensor.set_pressure_oversample(bme680.OS_4X)
    sensor.set_temperature_oversample(bme680.OS_8X)
    sensor.set_filter(bme680.FILTER_SIZE_3)
    sensor.set_gas_status(bme680.ENABLE_GAS_MEAS)

try:
    bme_sensor = bme680.BME680(bme680.I2C_ADDR_PRIMARY)
    set_bme688_sensor(bme_sensor)
except (RuntimeError, IOError):
    bme_sensor = bme680.BME680(bme680.I2C_ADDR_SECONDARY)
    set_bme688_sensor(bme_sensor)

def read_bme688_data():
    if bme_sensor.get_sensor_data():
        return {
            "temp_c": round(bme_sensor.data.temperature, 2),
            "humidity": round(bme_sensor.data.humidity, 2),
            "pressure_hPa": round(bme_sensor.data.pressure, 2),
            "gas_resistance_ohms": round(bme_sensor.data.gas_resistance, 2)
        }
    return {"temp_c": None, "humidity": None, "pressure_hPa": None, "gas_resistance_ohms": None}

# ----------------------------------------------------------
# Camera Streaming Class
# ----------------------------------------------------------
class CameraStream:
    def __init__(self, fps=5):
        self.picam2 = Picamera2()
        self.interval = 1.0 / fps
        self.picam2.configure(self.picam2.create_preview_configuration(
            main={"format": "RGB888"}
        ))
        self.picam2.start(show_preview=False)
        time.sleep(2)

    def __iter__(self):
        while True:
            start = time.time()
            frame = self.picam2.capture_array()
            yield frame
            elapsed = time.time() - start
            time.sleep(max(0, self.interval - elapsed))

    def stop(self):
        self.picam2.stop()

# ----------------------------------------------------------
# Utility Functions
# ----------------------------------------------------------
def cosine_distance(a: np.ndarray, b: np.ndarray) -> float:
    """1 - cosine similarity"""
    num = np.dot(a, b)
    den = np.linalg.norm(a) * np.linalg.norm(b)
    return 1.0 - (num / den) if den > 0 else 1.0

def draw_overlay(image, emo_res, age_res, gen_res):
    for i, r in enumerate(emo_res):
        try:
            x1, y1, x2, y2 = map(int, r.get("bbox", []))
            age   = round(age_res[i].get("score", 0))
            g_lbl = gen_res[i].get("label", "")
            g_sc  = gen_res[i].get("score", 0.0)
            emo   = emo_res[i].get("label", "")
            e_sc  = emo_res[i].get("score", 0.0)

            label = f"{g_lbl} ({g_sc:.2f}) | Age: {age} | {emo} ({e_sc:.2f})"
            cv2.rectangle(image, (x1, y1), (x2, y2), (0,255,255), 2)
            (w,h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.45, 1)
            cv2.rectangle(image, (x1,y1-22),(x1+w,y1),(0,255,255),-1)
            cv2.putText(image, label, (x1,y1-5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0,0,0),1)
        except:
            pass
    return image

def get_mac_address():
    mac = uuid.getnode()
    return ":".join(f"{(mac>>i)&0xff:02x}" for i in range(40,-1,-8)).upper()

mac_address = get_mac_address()

# ----------------------------------------------------------
# Run Inference Generator
# ----------------------------------------------------------
def run_inference(video_source):
    for frame in video_source:
        yield {
            "emotion": face_emotion_model.predict(frame),
            "age":     face_age_model.predict(frame),
            "gender":  face_gender_model.predict(frame),
            "embedding": face_embed_model_comp.predict(frame)
        }

# ----------------------------------------------------------
# ViewerTracker: IoU + Embedding Matching
# ----------------------------------------------------------
class ViewerTracker:
    def __init__(self, iou_threshold=0.3, emb_threshold=0.4,
                 w_iou=0.5, w_emb=0.5, timeout_sec=10):
        self.iou_thr = iou_threshold
        self.emb_thr = emb_threshold
        self.w_iou   = w_iou
        self.w_emb   = w_emb
        self.timeout = timeout_sec
        self.tracks  = {}  # id -> {'bbox', 'emb', 'last_seen'}

    @staticmethod
    def _iou(a, b):
        xA, yA = max(a[0],b[0]), max(a[1],b[1])
        xB, yB = min(a[2],b[2]), min(a[3],b[3])
        inter = max(0, xB-xA) * max(0, yB-yA)
        areaA = (a[2]-a[0])*(a[3]-a[1])
        areaB = (b[2]-b[0])*(b[3]-b[1])
        uni   = areaA + areaB - inter
        return inter/uni if uni>0 else 0

    def _clean(self):
        now = time.time()
        for tid in list(self.tracks):
            if now - self.tracks[tid]['last_seen'] > self.timeout:
                del self.tracks[tid]

    def update(self, det_bboxes, det_embs):
        self._clean()
        T = list(self.tracks.keys())
        N, M = len(T), len(det_bboxes)

        # no existing tracks → all new
        if N == 0:
            out = []
            for bb, emb in zip(det_bboxes, det_embs):
                nid = uuid.uuid4().hex[:8]
                self.tracks[nid] = {'bbox': bb, 'emb': emb,
                                     'last_seen': time.time()}
                out.append((nid, True))
            return out

        # build cost matrix
        cost = np.zeros((N, M), dtype=np.float32)
        for i, tid in enumerate(T):
            tb = self.tracks[tid]['bbox']
            te = self.tracks[tid]['emb']
            for j, (db, de) in enumerate(zip(det_bboxes, det_embs)):
                iou_score = self._iou(tb, db)
                emb_dist  = cosine_distance(te, de)
                cost[i,j] = self.w_iou*(1-iou_score) + self.w_emb*emb_dist

        # Hungarian assignment
        rows, cols = linear_sum_assignment(cost)
        results = [None]*M

        # accept matches under one of the thresholds
        for r, c in zip(rows, cols):
            tid = T[r]
            iou_score = self._iou(self.tracks[tid]['bbox'], det_bboxes[c])
            emb_dist  = cosine_distance(self.tracks[tid]['emb'], det_embs[c])
            logger.debug(f"[Tracker] comparing track {tid} ? det {c}: IoU={iou_score:.2f}, emb_dist={emb_dist:.2f}")
            if iou_score >= self.iou_thr or emb_dist <= self.emb_thr:
                self.tracks[tid].update({
                    'bbox': det_bboxes[c],
                    'emb':  det_embs[c],
                    'last_seen': time.time()
                })
                results[c] = (tid, False)
            else:
                logger.debug(f"[Tracker] rejecting match (IoU<{self.iou_thr} AND emb_dist>{self.emb_thr})")

        # unmatched → new
        for j in range(M):
            if results[j] is None:
                nid = uuid.uuid4().hex[:8]
                self.tracks[nid] = {
                    'bbox': det_bboxes[j],
                    'emb':  det_embs[j],
                    'last_seen': time.time()
                }
                results[j] = (nid, True)

        return results

# ----------------------------------------------------------
# Load Models & Compound Pipelines
# ----------------------------------------------------------
# Load Face Detection Model
face_det_model = dg.load_model(
    model_name=face_det_model_name,
    inference_host_address=inference_host_address,
    zoo_url=zoo_url,
    token=token,
    device_type=device_type
)
face_det_model.overlay_color = [(255, 255, 0), (0, 255, 0)]

# Load Face Embedding Model
face_embed_model = dg.load_model(
    model_name=face_embed_model_name,
    inference_host_address=inference_host_address,
    zoo_url=zoo_url,
    token=token,
    device_type=device_type
)

# Load Age Estimation Model
age_model = dg.load_model(
    model_name=age_model_name,
    inference_host_address=inference_host_address,
    zoo_url=zoo_url,
    token=token,
    device_type=device_type
)

# Load Gender Classification Model
gender_model = dg.load_model(
    model_name=gender_model_name,
    inference_host_address=inference_host_address,
    zoo_url=zoo_url,
    token=token,
    device_type=device_type
)

# Load Emotion Recognition Model
emotion_model = dg.load_model(
    model_name=emotion_model_name,
    inference_host_address=inference_host_address,
    zoo_url=zoo_url,
    token=token,
    device_type=device_type
)

face_emotion_model  = degirum_tools.CroppingAndClassifyingCompoundModel(face_det_model, emotion_model)
face_age_model      = degirum_tools.CroppingAndClassifyingCompoundModel(face_det_model, age_model)
face_gender_model   = degirum_tools.CroppingAndClassifyingCompoundModel(face_det_model, gender_model)
face_embed_model_comp = degirum_tools.CroppingAndClassifyingCompoundModel(face_det_model, face_embed_model)

# ----------------------------------------------------------
# Main Runtime
# ----------------------------------------------------------
video_source = CameraStream(fps=5)
tracker = ViewerTracker(
    iou_threshold=0.2,    # a little lower to catch small overlaps
    emb_threshold=0.6,    # since embeddings are normalized, distances ? [0,2]
    w_iou=0.5,
    w_emb=0.5,
    timeout_sec=10
)

try:
    with degirum_tools.Display("Audience Analysis") as disp:
        for results in run_inference(video_source):
            emo_res = results["emotion"].results
            age_res = results["age"].results
            gen_res = results["gender"].results
            emb_res = results["embedding"].results

            if not emo_res:
                continue

            bboxes = [r["bbox"] for r in emo_res]
            embs   = [
                np.array(r.get("data", [np.zeros(EMB_DIM)])[0], dtype=np.float32)
                for r in emb_res
            ]

            assignments = tracker.update(bboxes, embs)

            frame    = results["emotion"].image
            env_data = read_bme688_data()
            min_len  = min(len(emo_res), len(age_res), len(gen_res), len(embs))

            if preview_camera:
                frame = draw_overlay(frame, emo_res, age_res, gen_res)
                disp.show(frame)

            for i in range(min_len):
                vid, is_new = assignments[i]
                out = {
                    "timestamp": datetime.utcnow().isoformat() + "Z",
                    "location": {
                        "mac_address": mac_address,
                        "coordinates": "3.1319N, 101.6841E"
                    },
                    "env": env_data,
                    "viewer_id": vid,
                    "is_new_viewer": is_new,
                    "age_est": round(age_res[i].get("score", 0)),
                    "age_score": age_res[i].get("score", 0.0),
                    "gender": gen_res[i].get("label", ""),
                    "gender_score": gen_res[i].get("score", 0.0),
                    "emotion": emo_res[i].get("label", ""),
                    "emotion_score": emo_res[i].get("score", 0.0),
                    "attention_duration": round(random.uniform(2.0, 7.5), 1),
                    "gaze_at_screen": random.choice([True, False])
                }
                logger.debug(json.dumps(out))

            if cv2.waitKey(1) & 0xFF == ord("q"):
                break

except KeyboardInterrupt:
    logger.info("Interrupted by user.")
finally:
    video_source.stop()
    cv2.destroyAllWindows()


[38:23:51.925572573] [3000998] [1;32m INFO [1;37mCamera [1;34mcamera_manager.cpp:326 [0mlibcamera v0.5.0+59-d83ff0a4
[38:23:51.932912458] [3001075] [1;32m INFO [1;37mRPI [1;34mpisp.cpp:720 [0mlibpisp version v1.2.1 981977ff21f3 29-04-2025 (14:13:50)
[38:23:51.942634886] [3001075] [1;32m INFO [1;37mRPI [1;34mpisp.cpp:1179 [0mRegistered camera /base/axi/pcie@1000120000/rp1/i2c@88000/imx708@1a to CFE device /dev/media0 and ISP device /dev/media2 using PiSP variant BCM2712_D0
[38:23:51.946335968] [3000998] [1;32m INFO [1;37mCamera [1;34mcamera.cpp:1205 [0mconfiguring streams: (0) 640x480-RGB888 (1) 1536x864-BGGR_PISP_COMP1
[38:23:51.946433505] [3001075] [1;32m INFO [1;37mRPI [1;34mpisp.cpp:1483 [0mSensor: /base/axi/pcie@1000120000/rp1/i2c@88000/imx708@1a - Selected sensor format: 1536x864-SBGGR10_1X10 - Selected CFE format: 1536x864-PC1B
