Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 70 additions & 4 deletions .claude/hooks/no-fake-recall.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,32 @@
# earlier" / "as I mentioned before" / "from my previous response".
# LLMs frequently hallucinate prior conversation content. The fix is for the
# model to either quote the verbatim prior content or use neutral phrasing.
#
# Vocabulary loaded from packs/locale/<lang>.txt section [recall_phrase].
# Inline English fallback preserves pre-pack behavior.

set -euo pipefail

_HOOK_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
if [ -f "$_HOOK_DIR/../lib/packs.sh" ]; then
# shellcheck source=../lib/packs.sh
source "$_HOOK_DIR/../lib/packs.sh"
fi

_load_or_fallback() {
local section="$1" fallback="$2" loaded=""
if declare -F load_locale_section >/dev/null 2>&1; then
loaded="$(load_locale_section "$section" 2>/dev/null)"
fi
if [ -z "$loaded" ]; then
printf '%s' "$fallback"
else
printf '%s' "$loaded"
fi
}

RECALL_RE="$(_load_or_fallback recall_phrase '\b(as|like)[[:space:]]+(we|i|you[[:space:]]+and[[:space:]]+i)[[:space:]]+(discussed|mentioned|talked[[:space:]]+about|covered|noted|established|agreed)\b|\bas[[:space:]]+(i|we)[[:space:]]+(mentioned|said|noted|stated|explained|told[[:space:]]+you|wrote)[[:space:]]+(earlier|before|previously|above|in[[:space:]]+(my|the)[[:space:]]+(last|previous|prior))\b|\bfrom[[:space:]]+(my|our|the)[[:space:]]+(previous|earlier|prior|last)[[:space:]]+(response|message|turn|reply|conversation|exchange)\b|\b(you|i)[[:space:]]+(mentioned|said|told[[:space:]]+(me|you))[[:space:]]+(earlier|before|previously)\b|\bremember[[:space:]]+(when|how|that)[[:space:]]+(we|i|you)[[:space:]]+(discussed|talked|covered)\b|\bbuilding[[:space:]]+on[[:space:]]+what[[:space:]]+(we|i|you)[[:space:]]+(said|discussed|covered|established)\b|\brecap[[:space:]]+(of)?[[:space:]]?(our|my|the)[[:space:]]+(earlier|previous|prior)[[:space:]]+(conversation|discussion|exchange)\b|\bas[[:space:]]+(i|we)[[:space:]]+(established|covered|outlined)[[:space:]]+(earlier|previously|above)\b')"

INPUT="$(cat)"

if ! command -v jq >/dev/null 2>&1; then
Expand All @@ -17,6 +40,43 @@ if ! printf '%s' "$INPUT" | jq -e . >/dev/null 2>&1; then
exit 0
fi

# Rust path: prefer agentcloseout-physics when available.
if command -v agentcloseout-physics >/dev/null 2>&1; then
RULES_DIR="${LLM_DARK_PATTERNS_RULES_DIR:-}"
if [ -z "$RULES_DIR" ]; then
for candidate in \
"$(dirname "$0")/../../agent-closeout-bench/rules/closeout" \
"/home/fer/Documents/agent-closeout-bench/rules/closeout" \
"${XDG_CONFIG_HOME:-$HOME/.config}/agentcloseout-physics/rules/closeout"; do
if [ -d "$candidate" ]; then RULES_DIR="$candidate"; break; fi
done
fi
if [ -n "$RULES_DIR" ] && [ -d "$RULES_DIR" ] && [ -f "$RULES_DIR/fake_recall.yaml" ]; then
TMP_INPUT="$(mktemp)"; printf '%s' "$INPUT" > "$TMP_INPUT"
VERDICT_JSON="$(agentcloseout-physics scan --category fake_recall --rules "$RULES_DIR" --input "$TMP_INPUT" 2>/dev/null || true)"
rm -f "$TMP_INPUT"
if [ -n "$VERDICT_JSON" ]; then
DECISION="$(printf '%s' "$VERDICT_JSON" | jq -r '.decision // empty' 2>/dev/null)"
if [ "$DECISION" = "block" ]; then
RULE="$(printf '%s' "$VERDICT_JSON" | jq -r '.matched_rules[0].rule_id // "fake_recall"' 2>/dev/null)"
EVIDENCE="$(printf '%s' "$VERDICT_JSON" | jq -r '.redacted_evidence[0] // ""' 2>/dev/null)"
echo "BLOCKED: false-memory recall claim without quoted prior content." >&2
echo "Matched rule: $RULE" >&2
[ -n "$EVIDENCE" ] && echo "Evidence: $EVIDENCE" >&2
echo "" >&2
echo "Repair guidance:" >&2
echo "- Quote the prior content verbatim via a markdown blockquote (>) or a 30+ char inline quote." >&2
echo "- Or use neutral phrasing instead of claiming recall." >&2
echo "- Reference: arXiv:2408.04681 (Pataranutaporn et al. 2024)." >&2
exit 2
fi
if [ "$DECISION" = "pass" ]; then
exit 0
fi
fi
fi
fi

json_get() {
local filter="$1"
printf '%s' "$INPUT" | jq -r "$filter // empty" 2>/dev/null || true
Expand Down Expand Up @@ -49,8 +109,9 @@ if [ -z "$message" ]; then
exit 0
fi

# Trigger: false-memory recall vocabulary
RECALL='(\b(as|like)[[:space:]]+(we|i|you[[:space:]]+and[[:space:]]+i)[[:space:]]+(discussed|mentioned|talked[[:space:]]+about|covered|noted|established|agreed)\b|\bas[[:space:]]+(i|we)[[:space:]]+(mentioned|said|noted|stated|explained|told[[:space:]]+you|wrote)[[:space:]]+(earlier|before|previously|above|in[[:space:]]+(my|the)[[:space:]]+(last|previous|prior))\b|\bfrom[[:space:]]+(my|our|the)[[:space:]]+(previous|earlier|prior|last)[[:space:]]+(response|message|turn|reply|conversation|exchange)\b|\b(you|i)[[:space:]]+(mentioned|said|told[[:space:]]+(me|you))[[:space:]]+(earlier|before|previously)\b|\bremember[[:space:]]+(when|how|that)[[:space:]]+(we|i|you)[[:space:]]+(discussed|talked|covered)\b|\bbuilding[[:space:]]+on[[:space:]]+what[[:space:]]+(we|i|you)[[:space:]]+(said|discussed|covered|established)\b|\brecap[[:space:]]+(of)?[[:space:]]?(our|my|the)[[:space:]]+(earlier|previous|prior)[[:space:]]+(conversation|discussion|exchange)\b|\bas[[:space:]]+(i|we)[[:space:]]+(established|covered|outlined)[[:space:]]+(earlier|previously|above)\b)'
# Trigger: false-memory recall vocabulary loaded from packs/locale/<lang>.txt
# section [recall_phrase].
RECALL="(${RECALL_RE})"

if printf '%s\n' "$message" | grep -Eiq "$RECALL"; then
# Allow-clause: the model is quoting verbatim prior content (proves it actually saw it).
Expand All @@ -70,8 +131,13 @@ if printf '%s\n' "$message" | grep -Eiq "$RECALL"; then
verify the recall is real, OR
(b) Use neutral phrasing that doesn't claim recall — 'one approach is X',
'a common pattern is Y' — instead of 'as we discussed, X'.
- Citation: ACM IUI 2025 — generative chatbots induce 3x more false memories
than the control. The fix is verifiable recall, not assumed recall."
- Citations:
Pataranutaporn et al. 2024 (arXiv:2408.04681) — generative chatbots
induce over 3x more immediate false memories than the control condition.
Pataranutaporn et al. 2025 (ACM IUI 2025, doi:10.1145/3708359.3712112) —
follow-up showing subtle in-conversation injection further amplifies the
effect.
The fix is verifiable recall, not assumed recall."
fi

exit 0
37 changes: 37 additions & 0 deletions .claude/hooks/no-fake-stats.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,43 @@ if ! printf '%s' "$INPUT" | jq -e . >/dev/null 2>&1; then
exit 0
fi

# Rust path: prefer agentcloseout-physics when available.
if command -v agentcloseout-physics >/dev/null 2>&1; then
RULES_DIR="${LLM_DARK_PATTERNS_RULES_DIR:-}"
if [ -z "$RULES_DIR" ]; then
for candidate in \
"$(dirname "$0")/../../agent-closeout-bench/rules/closeout" \
"/home/fer/Documents/agent-closeout-bench/rules/closeout" \
"${XDG_CONFIG_HOME:-$HOME/.config}/agentcloseout-physics/rules/closeout"; do
if [ -d "$candidate" ]; then RULES_DIR="$candidate"; break; fi
done
fi
if [ -n "$RULES_DIR" ] && [ -d "$RULES_DIR" ] && [ -f "$RULES_DIR/fake_stats.yaml" ]; then
TMP_INPUT="$(mktemp)"; printf '%s' "$INPUT" > "$TMP_INPUT"
VERDICT_JSON="$(agentcloseout-physics scan --category fake_stats --rules "$RULES_DIR" --input "$TMP_INPUT" 2>/dev/null || true)"
rm -f "$TMP_INPUT"
if [ -n "$VERDICT_JSON" ]; then
DECISION="$(printf '%s' "$VERDICT_JSON" | jq -r '.decision // empty' 2>/dev/null)"
if [ "$DECISION" = "block" ]; then
RULE="$(printf '%s' "$VERDICT_JSON" | jq -r '.matched_rules[0].rule_id // "fake_stats"' 2>/dev/null)"
EVIDENCE="$(printf '%s' "$VERDICT_JSON" | jq -r '.redacted_evidence[0] // ""' 2>/dev/null)"
echo "BLOCKED: fabricated-looking statistic without source or strong hedge." >&2
echo "Matched rule: $RULE" >&2
[ -n "$EVIDENCE" ] && echo "Evidence: $EVIDENCE" >&2
echo "" >&2
echo "Repair guidance:" >&2
echo "- Add a URL, 'according to <ProperNoun>', '(YYYY)', '<Author> et al.', doi:, or arXiv: in the same message." >&2
echo "- Or mark the figure 'unverified' / 'insufficient_data' / 'unknown'." >&2
echo "- Loose hedges like 'approximately' do NOT make a precise decimal honest." >&2
exit 2
fi
if [ "$DECISION" = "pass" ]; then
exit 0
fi
fi
fi
fi

json_get() {
local filter="$1"
printf '%s' "$INPUT" | jq -r "$filter // empty" 2>/dev/null || true
Expand Down
37 changes: 37 additions & 0 deletions .claude/hooks/no-phantom-tool-call.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,43 @@ INPUT="$(cat)"
if ! command -v jq >/dev/null 2>&1; then exit 0; fi
if ! printf '%s' "$INPUT" | jq -e . >/dev/null 2>&1; then exit 0; fi

# Rust path: prefer agentcloseout-physics when available.
if command -v agentcloseout-physics >/dev/null 2>&1; then
RULES_DIR="${LLM_DARK_PATTERNS_RULES_DIR:-}"
if [ -z "$RULES_DIR" ]; then
for candidate in \
"$(dirname "$0")/../../agent-closeout-bench/rules/closeout" \
"/home/fer/Documents/agent-closeout-bench/rules/closeout" \
"${XDG_CONFIG_HOME:-$HOME/.config}/agentcloseout-physics/rules/closeout"; do
if [ -d "$candidate" ]; then RULES_DIR="$candidate"; break; fi
done
fi
if [ -n "$RULES_DIR" ] && [ -d "$RULES_DIR" ] && [ -f "$RULES_DIR/phantom_tool_call.yaml" ]; then
TMP_INPUT="$(mktemp)"; printf '%s' "$INPUT" > "$TMP_INPUT"
VERDICT_JSON="$(agentcloseout-physics scan --category phantom_tool_call --rules "$RULES_DIR" --input "$TMP_INPUT" 2>/dev/null || true)"
rm -f "$TMP_INPUT"
if [ -n "$VERDICT_JSON" ]; then
DECISION="$(printf '%s' "$VERDICT_JSON" | jq -r '.decision // empty' 2>/dev/null)"
if [ "$DECISION" = "block" ]; then
RULE="$(printf '%s' "$VERDICT_JSON" | jq -r '.matched_rules[0].rule_id // "phantom_tool_call"' 2>/dev/null)"
EVIDENCE="$(printf '%s' "$VERDICT_JSON" | jq -r '.redacted_evidence[0] // ""' 2>/dev/null)"
echo "BLOCKED: phantom tool call: claim of tool execution without same-message output evidence." >&2
echo "Matched rule: $RULE" >&2
[ -n "$EVIDENCE" ] && echo "Evidence: $EVIDENCE" >&2
echo "" >&2
echo "Repair guidance:" >&2
echo "- Show the tool's actual output (paste the result, fence with triple backticks)." >&2
echo "- Or drop the 'I ran X' framing if you intend to run it next, not already." >&2
echo "- Or close as Status: partial / Verification: not run." >&2
exit 2
fi
if [ "$DECISION" = "pass" ]; then
exit 0
fi
fi
fi
fi

json_get() { printf '%s' "$INPUT" | jq -r "$1 // empty" 2>/dev/null || true; }
block() {
echo "BLOCKED: $1" >&2
Expand Down
37 changes: 37 additions & 0 deletions .claude/hooks/no-rollback-claim-without-evidence.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,43 @@ INPUT="$(cat)"
if ! command -v jq >/dev/null 2>&1; then exit 0; fi
if ! printf '%s' "$INPUT" | jq -e . >/dev/null 2>&1; then exit 0; fi

# Rust path: prefer agentcloseout-physics when available.
if command -v agentcloseout-physics >/dev/null 2>&1; then
RULES_DIR="${LLM_DARK_PATTERNS_RULES_DIR:-}"
if [ -z "$RULES_DIR" ]; then
for candidate in \
"$(dirname "$0")/../../agent-closeout-bench/rules/closeout" \
"/home/fer/Documents/agent-closeout-bench/rules/closeout" \
"${XDG_CONFIG_HOME:-$HOME/.config}/agentcloseout-physics/rules/closeout"; do
if [ -d "$candidate" ]; then RULES_DIR="$candidate"; break; fi
done
fi
if [ -n "$RULES_DIR" ] && [ -d "$RULES_DIR" ] && [ -f "$RULES_DIR/rollback_claim_without_evidence.yaml" ]; then
TMP_INPUT="$(mktemp)"; printf '%s' "$INPUT" > "$TMP_INPUT"
VERDICT_JSON="$(agentcloseout-physics scan --category rollback_claim_without_evidence --rules "$RULES_DIR" --input "$TMP_INPUT" 2>/dev/null || true)"
rm -f "$TMP_INPUT"
if [ -n "$VERDICT_JSON" ]; then
DECISION="$(printf '%s' "$VERDICT_JSON" | jq -r '.decision // empty' 2>/dev/null)"
if [ "$DECISION" = "block" ]; then
RULE="$(printf '%s' "$VERDICT_JSON" | jq -r '.matched_rules[0].rule_id // "rollback_claim_without_evidence"' 2>/dev/null)"
EVIDENCE="$(printf '%s' "$VERDICT_JSON" | jq -r '.redacted_evidence[0] // ""' 2>/dev/null)"
echo "BLOCKED: rollback claim without command evidence." >&2
echo "Matched rule: $RULE" >&2
[ -n "$EVIDENCE" ] && echo "Evidence: $EVIDENCE" >&2
echo "" >&2
echo "Repair guidance:" >&2
echo "- Show the rollback command in the same message (\`git revert HEAD\`, \`kubectl rollout undo\`, etc.)." >&2
echo "- Or drop the 'rolled back' framing if no rollback command was run." >&2
echo "- Or close as Status: blocked / Next step: operator must run the rollback." >&2
exit 2
fi
if [ "$DECISION" = "pass" ]; then
exit 0
fi
fi
fi
fi

json_get() { printf '%s' "$INPUT" | jq -r "$1 // empty" 2>/dev/null || true; }
block() {
echo "BLOCKED: $1" >&2
Expand Down
37 changes: 37 additions & 0 deletions .claude/hooks/no-sandbagging-disguise.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,43 @@ INPUT="$(cat)"
if ! command -v jq >/dev/null 2>&1; then exit 0; fi
if ! printf '%s' "$INPUT" | jq -e . >/dev/null 2>&1; then exit 0; fi

# Rust path: prefer agentcloseout-physics when available.
if command -v agentcloseout-physics >/dev/null 2>&1; then
RULES_DIR="${LLM_DARK_PATTERNS_RULES_DIR:-}"
if [ -z "$RULES_DIR" ]; then
for candidate in \
"$(dirname "$0")/../../agent-closeout-bench/rules/closeout" \
"/home/fer/Documents/agent-closeout-bench/rules/closeout" \
"${XDG_CONFIG_HOME:-$HOME/.config}/agentcloseout-physics/rules/closeout"; do
if [ -d "$candidate" ]; then RULES_DIR="$candidate"; break; fi
done
fi
if [ -n "$RULES_DIR" ] && [ -d "$RULES_DIR" ] && [ -f "$RULES_DIR/sandbagging_disguise.yaml" ]; then
TMP_INPUT="$(mktemp)"; printf '%s' "$INPUT" > "$TMP_INPUT"
VERDICT_JSON="$(agentcloseout-physics scan --category sandbagging_disguise --rules "$RULES_DIR" --input "$TMP_INPUT" 2>/dev/null || true)"
rm -f "$TMP_INPUT"
if [ -n "$VERDICT_JSON" ]; then
DECISION="$(printf '%s' "$VERDICT_JSON" | jq -r '.decision // empty' 2>/dev/null)"
if [ "$DECISION" = "block" ]; then
RULE="$(printf '%s' "$VERDICT_JSON" | jq -r '.matched_rules[0].rule_id // "sandbagging_disguise"' 2>/dev/null)"
EVIDENCE="$(printf '%s' "$VERDICT_JSON" | jq -r '.redacted_evidence[0] // ""' 2>/dev/null)"
echo "BLOCKED: sandbagging disguise: 'tried but couldn't' claim without specific blocker, error, or evidence." >&2
echo "Matched rule: $RULE" >&2
[ -n "$EVIDENCE" ] && echo "Evidence: $EVIDENCE" >&2
echo "" >&2
echo "Repair guidance:" >&2
echo "- Cite the specific error / exit_code / blocker that caused the failure." >&2
echo "- Or mark insufficient_data / Status: blocked with the specific missing input." >&2
echo "- Or make a clear handoff: what would unblock you?" >&2
exit 2
fi
if [ "$DECISION" = "pass" ]; then
exit 0
fi
fi
fi
fi

json_get() { printf '%s' "$INPUT" | jq -r "$1 // empty" 2>/dev/null || true; }
block() {
echo "BLOCKED: $1" >&2
Expand Down