Skip to content

Commit bb1f9cb

Browse files
authored
feat: support for gemini-3-pro and gpt-5.2 (#30)
* feat: support for gemini-3-pro and gpt-5.2 * feat: support for gemini-3-pro and gpt-5.2 * feat: support for gemini-3-pro and gpt-5.2 * feat: support for gemini-3-pro and gpt-5.2 * feat: add local mcp server * docs * docs
1 parent 7dd6853 commit bb1f9cb

File tree

11 files changed

+5617
-123
lines changed

11 files changed

+5617
-123
lines changed

.githooks/pre-commit

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/bin/bash
2+
# Pre-commit hook - runs all scripts in scripts/hooks/
3+
4+
set -e
5+
6+
REPO_ROOT="$(git rev-parse --show-toplevel)"
7+
HOOKS_DIR="$REPO_ROOT/scripts/hooks"
8+
9+
if [ ! -d "$HOOKS_DIR" ]; then
10+
exit 0
11+
fi
12+
13+
echo ""
14+
echo "=== Running pre-commit hooks ==="
15+
echo ""
16+
17+
# Run all executable scripts in hooks directory
18+
for hook in "$HOOKS_DIR"/*.sh; do
19+
if [ -x "$hook" ]; then
20+
echo "[hook] $(basename "$hook")"
21+
(cd "$REPO_ROOT" && "$hook")
22+
fi
23+
done
24+
25+
echo ""
26+
echo "=== Pre-commit hooks completed ==="
27+
echo ""

hindsight-api/hindsight_api/engine/search/think_utils.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,3 +282,81 @@ def singularize_verb(verb):
282282
except Exception as e:
283283
logger.warning(f"Failed to extract opinions: {str(e)}")
284284
return []
285+
286+
287+
async def reflect(
288+
llm_config,
289+
query: str,
290+
experience_facts: List[str] = None,
291+
world_facts: List[str] = None,
292+
opinion_facts: List[str] = None,
293+
name: str = "Assistant",
294+
disposition: DispositionTraits = None,
295+
background: str = "",
296+
context: str = None,
297+
) -> str:
298+
"""
299+
Standalone reflect function for generating answers based on facts.
300+
301+
This is a static version of the reflect operation that can be called
302+
without a MemoryEngine instance, useful for testing.
303+
304+
Args:
305+
llm_config: LLM provider instance
306+
query: Question to answer
307+
experience_facts: List of experience/agent fact strings
308+
world_facts: List of world fact strings
309+
opinion_facts: List of opinion fact strings
310+
name: Name of the agent/persona
311+
disposition: Disposition traits (defaults to neutral)
312+
background: Background information
313+
context: Additional context for the prompt
314+
315+
Returns:
316+
Generated answer text
317+
"""
318+
# Default disposition if not provided
319+
if disposition is None:
320+
disposition = DispositionTraits(skepticism=3, literalism=3, empathy=3)
321+
322+
# Convert string lists to MemoryFact format for formatting
323+
def to_memory_facts(facts: List[str], fact_type: str) -> List[MemoryFact]:
324+
if not facts:
325+
return []
326+
return [MemoryFact(id=f"test-{i}", text=f, fact_type=fact_type) for i, f in enumerate(facts)]
327+
328+
agent_results = to_memory_facts(experience_facts or [], "experience")
329+
world_results = to_memory_facts(world_facts or [], "world")
330+
opinion_results = to_memory_facts(opinion_facts or [], "opinion")
331+
332+
# Format facts for prompt
333+
agent_facts_text = format_facts_for_prompt(agent_results)
334+
world_facts_text = format_facts_for_prompt(world_results)
335+
opinion_facts_text = format_facts_for_prompt(opinion_results)
336+
337+
# Build prompt
338+
prompt = build_think_prompt(
339+
agent_facts_text=agent_facts_text,
340+
world_facts_text=world_facts_text,
341+
opinion_facts_text=opinion_facts_text,
342+
query=query,
343+
name=name,
344+
disposition=disposition,
345+
background=background,
346+
context=context,
347+
)
348+
349+
system_message = get_system_message(disposition)
350+
351+
# Call LLM
352+
answer_text = await llm_config.call(
353+
messages=[
354+
{"role": "system", "content": system_message},
355+
{"role": "user", "content": prompt}
356+
],
357+
scope="memory_think",
358+
temperature=0.9,
359+
max_completion_tokens=1000
360+
)
361+
362+
return answer_text.strip()
Lines changed: 52 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
"""
2-
Test LLM provider with different models and providers.
2+
Test LLM provider with different models using actual memory operations.
33
"""
44
import os
5+
from datetime import datetime
56
import pytest
67
from hindsight_api.engine.llm_wrapper import LLMProvider
8+
from hindsight_api.engine.utils import extract_facts
9+
from hindsight_api.engine.search.think_utils import reflect
710

811

912
# Model matrix: (provider, model)
@@ -15,13 +18,14 @@
1518
("openai", "gpt-5-mini"),
1619
("openai", "gpt-5-nano"),
1720
("openai", "gpt-5"),
21+
("openai", "gpt-5.2"),
1822
# Groq models
19-
("groq", "llama-3.3-70b-versatile"),
2023
("groq", "openai/gpt-oss-120b"),
2124
("groq", "openai/gpt-oss-20b"),
2225
# Gemini models
2326
("gemini", "gemini-2.5-flash"),
2427
("gemini", "gemini-2.5-flash-lite"),
28+
("gemini", "gemini-3-pro-preview"),
2529
]
2630

2731

@@ -38,10 +42,10 @@ def get_api_key_for_provider(provider: str) -> str | None:
3842

3943
@pytest.mark.parametrize("provider,model", MODEL_MATRIX)
4044
@pytest.mark.asyncio
41-
async def test_llm_provider_call(provider: str, model: str):
45+
async def test_llm_provider_memory_operations(provider: str, model: str):
4246
"""
43-
Test LLM provider can make a basic call with different models.
44-
Skips if the required API key is not available.
47+
Test LLM provider with actual memory operations: fact extraction and reflect.
48+
All models must pass this test.
4549
"""
4650
api_key = get_api_key_for_provider(provider)
4751
if not api_key:
@@ -54,74 +58,53 @@ async def test_llm_provider_call(provider: str, model: str):
5458
model=model,
5559
)
5660

57-
# Test basic call
58-
response = await llm.call(
59-
messages=[{"role": "user", "content": "Say 'hello' and nothing else."}],
60-
max_completion_tokens=50,
61-
temperature=0.1,
62-
)
63-
64-
print(f"\n{provider}/{model} response: {response}")
65-
assert response is not None, f"{provider}/{model} returned None"
66-
67-
68-
@pytest.mark.parametrize("provider,model", MODEL_MATRIX)
69-
@pytest.mark.asyncio
70-
async def test_llm_provider_verify_connection(provider: str, model: str):
71-
"""
72-
Test LLM provider verify_connection method with different models.
73-
Skips if the required API key is not available.
61+
# Test 1: Fact extraction (structured output)
62+
test_text = """
63+
User: I just got back from my trip to Paris last week. The Eiffel Tower was amazing!
64+
Assistant: That sounds wonderful! How long were you there?
65+
User: About 5 days. I also visited the Louvre and saw the Mona Lisa.
7466
"""
75-
api_key = get_api_key_for_provider(provider)
76-
if not api_key:
77-
pytest.skip(f"Skipping {provider}/{model}: no API key available")
67+
event_date = datetime(2024, 12, 10)
7868

79-
llm = LLMProvider(
80-
provider=provider,
81-
api_key=api_key,
82-
base_url="",
83-
model=model,
69+
facts, chunks = await extract_facts(
70+
text=test_text,
71+
event_date=event_date,
72+
context="Travel conversation",
73+
llm_config=llm,
8474
)
8575

86-
# Test verify_connection
87-
await llm.verify_connection()
88-
print(f"\n{provider}/{model} connection verified")
89-
90-
91-
# Models that support large output (65000+ tokens)
92-
LARGE_OUTPUT_MODELS = [
93-
("openai", "gpt-5-mini"),
94-
("openai", "gpt-5-nano"),
95-
("openai", "gpt-5"),
96-
("gemini", "gemini-2.5-flash"),
97-
("gemini", "gemini-2.5-flash-lite"),
98-
]
99-
100-
101-
@pytest.mark.parametrize("provider,model", LARGE_OUTPUT_MODELS)
102-
@pytest.mark.asyncio
103-
async def test_llm_provider_large_output(provider: str, model: str):
104-
"""
105-
Test LLM provider with large max_completion_tokens (65000).
106-
Only tests models that support large outputs.
107-
Skips if the required API key is not available.
108-
"""
109-
api_key = get_api_key_for_provider(provider)
110-
if not api_key:
111-
pytest.skip(f"Skipping {provider}/{model}: no API key available")
112-
113-
llm = LLMProvider(
114-
provider=provider,
115-
api_key=api_key,
116-
base_url="",
117-
model=model,
76+
print(f"\n{provider}/{model} - Fact extraction:")
77+
print(f" Extracted {len(facts)} facts from {len(chunks)} chunks")
78+
for fact in facts:
79+
print(f" - {fact.fact}")
80+
81+
assert facts is not None, f"{provider}/{model} fact extraction returned None"
82+
assert len(facts) > 0, f"{provider}/{model} should extract at least one fact"
83+
84+
# Verify facts have required fields
85+
for fact in facts:
86+
assert fact.fact, f"{provider}/{model} fact missing text"
87+
assert fact.fact_type in ["world", "experience", "opinion"], f"{provider}/{model} invalid fact_type: {fact.fact_type}"
88+
89+
# Test 2: Reflect (actual reflect function)
90+
response = await reflect(
91+
llm_config=llm,
92+
query="What was the highlight of my Paris trip?",
93+
experience_facts=[
94+
"I visited Paris in December 2024",
95+
"I saw the Eiffel Tower and it was amazing",
96+
"I visited the Louvre and saw the Mona Lisa",
97+
"The trip lasted 5 days",
98+
],
99+
world_facts=[
100+
"The Eiffel Tower is a famous landmark in Paris",
101+
"The Mona Lisa is displayed at the Louvre museum",
102+
],
103+
name="Traveler",
118104
)
119105

120-
# Test call with large max_completion_tokens
121-
response = await llm.call(
122-
messages=[{"role": "user", "content": "Say 'ok'"}],
123-
max_completion_tokens=65000,
124-
)
106+
print(f"\n{provider}/{model} - Reflect response:")
107+
print(f" {response[:200]}...")
125108

126-
print(f"\n{provider}/{model} large output response: {response}")
127-
assert response is not None, f"{provider}/{model} returned None"
109+
assert response is not None, f"{provider}/{model} reflect returned None"
110+
assert len(response) > 10, f"{provider}/{model} reflect response too short"

hindsight-docs/docs/developer/models.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,17 +26,18 @@ The following models have been tested and verified to work correctly with Hindsi
2626

2727
| Provider | Model |
2828
|----------|-------|
29+
| **OpenAI** | `gpt-5.2` |
2930
| **OpenAI** | `gpt-5` |
3031
| **OpenAI** | `gpt-5-mini` |
3132
| **OpenAI** | `gpt-5-nano` |
3233
| **OpenAI** | `gpt-4.1-mini` |
3334
| **OpenAI** | `gpt-4.1-nano` |
3435
| **OpenAI** | `gpt-4o-mini` |
36+
| **Gemini** | `gemini-3-pro-preview` |
3537
| **Gemini** | `gemini-2.5-flash` |
3638
| **Gemini** | `gemini-2.5-flash-lite` |
3739
| **Groq** | `openai/gpt-oss-120b` |
3840
| **Groq** | `openai/gpt-oss-20b` |
39-
| **Groq** | `llama-3.3-70b-versatile` |
4041

4142
### Using Other Models
4243

hindsight-docs/docs/sdks/integrations/local-mcp.md

Lines changed: 5 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -65,13 +65,13 @@ By default, memories are stored in a bank called `mcp`. To use a different bank:
6565

6666
## Environment Variables
6767

68+
All standard [Hindsight configuration variables](/developer/configuration) are supported.
69+
70+
### Local MCP Specific
71+
6872
| Variable | Required | Default | Description |
6973
|----------|----------|---------|-------------|
70-
| `HINDSIGHT_API_LLM_API_KEY` | Yes | - | API key for the LLM provider |
71-
| `HINDSIGHT_API_LLM_PROVIDER` | No | `openai` | LLM provider (`openai`, `groq`, `anthropic`) |
72-
| `HINDSIGHT_API_LLM_MODEL` | No | `gpt-4o-mini` | Model to use for fact extraction |
73-
| `HINDSIGHT_API_MCP_LOCAL_BANK_ID` | No | `mcp` | Memory bank ID |
74-
| `HINDSIGHT_API_LOG_LEVEL` | No | `info` | Log level (`debug`, `info`, `warning`, `error`) |
74+
| `HINDSIGHT_API_MCP_LOCAL_BANK_ID` | No | `mcp` | Memory bank ID to use |
7575

7676
## Available Tools
7777

@@ -125,23 +125,6 @@ Search memories to provide personalized responses.
125125
}
126126
```
127127

128-
**Response:**
129-
```json
130-
{
131-
"results": [
132-
{
133-
"id": "...",
134-
"text": "User's favorite color is blue",
135-
"fact_type": "world",
136-
"context": "preferences",
137-
"event_date": null,
138-
"score": 0.95
139-
}
140-
],
141-
"total_tokens": 42
142-
}
143-
```
144-
145128
## How It Works
146129

147130
The local MCP server:
@@ -152,16 +135,6 @@ The local MCP server:
152135

153136
Data is persisted in the pg0 data directory (`~/.pg0/hindsight-mcp/`), so your memories survive restarts.
154137

155-
## Comparison: Local vs Server MCP
156-
157-
| Feature | Local MCP | Server MCP |
158-
|---------|-----------|------------|
159-
| Setup | Zero config | Requires running server |
160-
| Database | Embedded (pg0) | External PostgreSQL |
161-
| Multi-user | Single user | Multi-tenant |
162-
| Scalability | Single machine | Horizontally scalable |
163-
| Use case | Personal/development | Production/teams |
164-
165138
## Troubleshooting
166139

167140
### "HINDSIGHT_API_LLM_API_KEY required"

hindsight-docs/sidebars.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,11 @@ const sidebars: SidebarsConfig = {
152152
label: 'Integrations',
153153
collapsible: false,
154154
items: [
155+
{
156+
type: 'doc',
157+
id: 'sdks/integrations/local-mcp',
158+
label: 'Local MCP Server',
159+
},
155160
{
156161
type: 'doc',
157162
id: 'sdks/integrations/litellm',

0 commit comments

Comments
 (0)