Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions .claude/hooks/no-fake-cite.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@
# URL in the same message. LLMs hallucinate citations at 14-94% rates;
# NeurIPS 2025 and ICLR 2026 papers shipped with hallucinated refs through
# peer review; Q1 2026 legal sanctions $145k for AI-fake citations in court.
#
# Dual-mode (added 2026-05-16, physics-engines/slice-1-fake-cite):
# - If `agentcloseout-physics` is on $PATH and a fake_cite rule pack is
# discoverable, route to the Rust verdict (deterministic v1 physics:
# citation regex + URL-anywhere allow_pattern).
# - Else fall back to the original bash regex path below. Both paths
# emit a BLOCKED message and exit 2 on block, 0 on pass.

set -euo pipefail

Expand All @@ -17,6 +24,44 @@ if ! printf '%s' "$INPUT" | jq -e . >/dev/null 2>&1; then
exit 0
fi

# Rust path: prefer agentcloseout-physics when available.
if command -v agentcloseout-physics >/dev/null 2>&1; then
RULES_DIR="${LLM_DARK_PATTERNS_RULES_DIR:-}"
if [ -z "$RULES_DIR" ]; then
for candidate in \
"$(dirname "$0")/../../agent-closeout-bench/rules/closeout" \
"/home/fer/Documents/agent-closeout-bench/rules/closeout" \
"${XDG_CONFIG_HOME:-$HOME/.config}/agentcloseout-physics/rules/closeout"; do
if [ -d "$candidate" ]; then RULES_DIR="$candidate"; break; fi
done
fi
if [ -n "$RULES_DIR" ] && [ -d "$RULES_DIR" ] && [ -f "$RULES_DIR/fake_cite.yaml" ]; then
TMP_INPUT="$(mktemp)"; printf '%s' "$INPUT" > "$TMP_INPUT"
VERDICT_JSON="$(agentcloseout-physics scan --category fake_cite --rules "$RULES_DIR" --input "$TMP_INPUT" 2>/dev/null || true)"
rm -f "$TMP_INPUT"
if [ -n "$VERDICT_JSON" ]; then
DECISION="$(printf '%s' "$VERDICT_JSON" | jq -r '.decision // empty' 2>/dev/null)"
if [ "$DECISION" = "block" ]; then
RULE="$(printf '%s' "$VERDICT_JSON" | jq -r '.matched_rules[0].rule_id // "fake_cite"' 2>/dev/null)"
EVIDENCE="$(printf '%s' "$VERDICT_JSON" | jq -r '.redacted_evidence[0] // ""' 2>/dev/null)"
echo "BLOCKED: citation-formatted reference without verifiable URL in same message." >&2
echo "Matched rule: $RULE" >&2
[ -n "$EVIDENCE" ] && echo "Evidence: $EVIDENCE" >&2
echo "" >&2
echo "Repair guidance:" >&2
echo "- Add a verifiable URL or DOI in the same message as the citation." >&2
echo "- If the citation is inside a code block or quote, re-anchor it." >&2
echo "- If no source is available, drop the citation and state the claim with explicit uncertainty." >&2
exit 2
fi
if [ "$DECISION" = "pass" ]; then
exit 0
fi
fi
fi
fi

# Bash fallback path (original regex implementation, preserved for CI without the Rust binary):
json_get() {
local filter="$1"
printf '%s' "$INPUT" | jq -r "$filter // empty" 2>/dev/null || true
Expand Down