Skip to content

Commit d899d18

Browse files
authored
fix: groq llm with free tier doesn't work (#102)
* fix: groq with free tier doens't work * fix: groq with free tier doens't work
1 parent 70de23e commit d899d18

File tree

3 files changed

+16
-3
lines changed

3 files changed

+16
-3
lines changed

hindsight-api/hindsight_api/config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
ENV_LLM_BASE_URL = "HINDSIGHT_API_LLM_BASE_URL"
1919
ENV_LLM_MAX_CONCURRENT = "HINDSIGHT_API_LLM_MAX_CONCURRENT"
2020
ENV_LLM_TIMEOUT = "HINDSIGHT_API_LLM_TIMEOUT"
21+
ENV_LLM_GROQ_SERVICE_TIER = "HINDSIGHT_API_LLM_GROQ_SERVICE_TIER"
2122

2223
ENV_EMBEDDINGS_PROVIDER = "HINDSIGHT_API_EMBEDDINGS_PROVIDER"
2324
ENV_EMBEDDINGS_LOCAL_MODEL = "HINDSIGHT_API_EMBEDDINGS_LOCAL_MODEL"

hindsight-api/hindsight_api/engine/llm_wrapper.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from ..config import (
2020
DEFAULT_LLM_MAX_CONCURRENT,
2121
DEFAULT_LLM_TIMEOUT,
22+
ENV_LLM_GROQ_SERVICE_TIER,
2223
ENV_LLM_MAX_CONCURRENT,
2324
ENV_LLM_TIMEOUT,
2425
)
@@ -63,6 +64,7 @@ def __init__(
6364
base_url: str,
6465
model: str,
6566
reasoning_effort: str = "low",
67+
groq_service_tier: str | None = None,
6668
):
6769
"""
6870
Initialize LLM provider.
@@ -73,12 +75,15 @@ def __init__(
7375
base_url: Base URL for the API.
7476
model: Model name.
7577
reasoning_effort: Reasoning effort level for supported providers.
78+
groq_service_tier: Groq service tier ("on_demand", "flex", "auto"). Default: None (uses Groq's default).
7679
"""
7780
self.provider = provider.lower()
7881
self.api_key = api_key
7982
self.base_url = base_url
8083
self.model = model
8184
self.reasoning_effort = reasoning_effort
85+
# Default to 'auto' for best performance, users can override to 'on_demand' for free tier
86+
self.groq_service_tier = groq_service_tier or os.getenv(ENV_LLM_GROQ_SERVICE_TIER, "auto")
8287

8388
# Validate provider
8489
valid_providers = ["openai", "groq", "ollama", "gemini", "anthropic", "lmstudio"]
@@ -263,11 +268,15 @@ async def call(
263268
# Provider-specific parameters
264269
if self.provider == "groq":
265270
call_params["seed"] = DEFAULT_LLM_SEED
266-
extra_body = {"service_tier": "auto"}
267-
# Only add reasoning parameters for reasoning models
271+
extra_body: dict[str, Any] = {}
272+
# Add service_tier if configured (requires paid plan for flex/auto)
273+
if self.groq_service_tier:
274+
extra_body["service_tier"] = self.groq_service_tier
275+
# Add reasoning parameters for reasoning models
268276
if is_reasoning_model:
269277
extra_body["include_reasoning"] = False
270-
call_params["extra_body"] = extra_body
278+
if extra_body:
279+
call_params["extra_body"] = extra_body
271280

272281
last_exception = None
273282

hindsight-docs/docs/developer/configuration.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ If not provided, the server uses embedded `pg0` — convenient for development b
3333
| `HINDSIGHT_API_LLM_BASE_URL` | Custom LLM endpoint | Provider default |
3434
| `HINDSIGHT_API_LLM_MAX_CONCURRENT` | Max concurrent LLM requests | `32` |
3535
| `HINDSIGHT_API_LLM_TIMEOUT` | LLM request timeout in seconds | `120` |
36+
| `HINDSIGHT_API_LLM_GROQ_SERVICE_TIER` | Groq service tier: `on_demand`, `flex`, `auto` | `auto` |
3637

3738
**Provider Examples**
3839

@@ -41,6 +42,8 @@ If not provided, the server uses embedded `pg0` — convenient for development b
4142
export HINDSIGHT_API_LLM_PROVIDER=groq
4243
export HINDSIGHT_API_LLM_API_KEY=gsk_xxxxxxxxxxxx
4344
export HINDSIGHT_API_LLM_MODEL=openai/gpt-oss-20b
45+
# For free tier users: override to on_demand if you get service_tier errors
46+
# export HINDSIGHT_API_LLM_GROQ_SERVICE_TIER=on_demand
4447

4548
# OpenAI
4649
export HINDSIGHT_API_LLM_PROVIDER=openai

0 commit comments

Comments
 (0)