From ed7e0f7d1af7ca7f6f8f20f5db449fc811a98391 Mon Sep 17 00:00:00 2001 From: Fernando Lazzarin Date: Sat, 16 May 2026 17:03:34 -0300 Subject: [PATCH] hooks/no-fake-cite: dual-mode mirror of llm-dark-patterns hook upgrade Part of agent-closeout-bench physics-engines Slice 1. Mirrors the dual-mode behaviour landed at waitdeadai/llm-dark-patterns: prefer the Rust agentcloseout-physics verdict when available, fall back to the existing bash regex path otherwise. This keeps minmaxing harness sessions in sync with the upgraded suite without forcing operators to install the Rust binary. Operators who do have agent-closeout-bench checked out alongside minmaxing get the deterministic per-message verdict automatically. Co-Authored-By: Claude Opus 4.7 --- .claude/hooks/no-fake-cite.sh | 45 +++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/.claude/hooks/no-fake-cite.sh b/.claude/hooks/no-fake-cite.sh index 8304615..f8f0aa4 100755 --- a/.claude/hooks/no-fake-cite.sh +++ b/.claude/hooks/no-fake-cite.sh @@ -3,6 +3,13 @@ # URL in the same message. LLMs hallucinate citations at 14-94% rates; # NeurIPS 2025 and ICLR 2026 papers shipped with hallucinated refs through # peer review; Q1 2026 legal sanctions $145k for AI-fake citations in court. +# +# Dual-mode (added 2026-05-16, physics-engines/slice-1-fake-cite): +# - If `agentcloseout-physics` is on $PATH and a fake_cite rule pack is +# discoverable, route to the Rust verdict (deterministic v1 physics: +# citation regex + URL-anywhere allow_pattern). +# - Else fall back to the original bash regex path below. Both paths +# emit a BLOCKED message and exit 2 on block, 0 on pass. set -euo pipefail @@ -17,6 +24,44 @@ if ! printf '%s' "$INPUT" | jq -e . >/dev/null 2>&1; then exit 0 fi +# Rust path: prefer agentcloseout-physics when available. +if command -v agentcloseout-physics >/dev/null 2>&1; then + RULES_DIR="${LLM_DARK_PATTERNS_RULES_DIR:-}" + if [ -z "$RULES_DIR" ]; then + for candidate in \ + "$(dirname "$0")/../../agent-closeout-bench/rules/closeout" \ + "/home/fer/Documents/agent-closeout-bench/rules/closeout" \ + "${XDG_CONFIG_HOME:-$HOME/.config}/agentcloseout-physics/rules/closeout"; do + if [ -d "$candidate" ]; then RULES_DIR="$candidate"; break; fi + done + fi + if [ -n "$RULES_DIR" ] && [ -d "$RULES_DIR" ] && [ -f "$RULES_DIR/fake_cite.yaml" ]; then + TMP_INPUT="$(mktemp)"; printf '%s' "$INPUT" > "$TMP_INPUT" + VERDICT_JSON="$(agentcloseout-physics scan --category fake_cite --rules "$RULES_DIR" --input "$TMP_INPUT" 2>/dev/null || true)" + rm -f "$TMP_INPUT" + if [ -n "$VERDICT_JSON" ]; then + DECISION="$(printf '%s' "$VERDICT_JSON" | jq -r '.decision // empty' 2>/dev/null)" + if [ "$DECISION" = "block" ]; then + RULE="$(printf '%s' "$VERDICT_JSON" | jq -r '.matched_rules[0].rule_id // "fake_cite"' 2>/dev/null)" + EVIDENCE="$(printf '%s' "$VERDICT_JSON" | jq -r '.redacted_evidence[0] // ""' 2>/dev/null)" + echo "BLOCKED: citation-formatted reference without verifiable URL in same message." >&2 + echo "Matched rule: $RULE" >&2 + [ -n "$EVIDENCE" ] && echo "Evidence: $EVIDENCE" >&2 + echo "" >&2 + echo "Repair guidance:" >&2 + echo "- Add a verifiable URL or DOI in the same message as the citation." >&2 + echo "- If the citation is inside a code block or quote, re-anchor it." >&2 + echo "- If no source is available, drop the citation and state the claim with explicit uncertainty." >&2 + exit 2 + fi + if [ "$DECISION" = "pass" ]; then + exit 0 + fi + fi + fi +fi + +# Bash fallback path (original regex implementation, preserved for CI without the Rust binary): json_get() { local filter="$1" printf '%s' "$INPUT" | jq -r "$filter // empty" 2>/dev/null || true