1919from ..config import (
2020 DEFAULT_LLM_MAX_CONCURRENT ,
2121 DEFAULT_LLM_TIMEOUT ,
22+ ENV_LLM_GROQ_SERVICE_TIER ,
2223 ENV_LLM_MAX_CONCURRENT ,
2324 ENV_LLM_TIMEOUT ,
2425)
@@ -63,6 +64,7 @@ def __init__(
6364 base_url : str ,
6465 model : str ,
6566 reasoning_effort : str = "low" ,
67+ groq_service_tier : str | None = None ,
6668 ):
6769 """
6870 Initialize LLM provider.
@@ -73,12 +75,15 @@ def __init__(
7375 base_url: Base URL for the API.
7476 model: Model name.
7577 reasoning_effort: Reasoning effort level for supported providers.
78+ groq_service_tier: Groq service tier ("on_demand", "flex", "auto"). Default: None (uses Groq's default).
7679 """
7780 self .provider = provider .lower ()
7881 self .api_key = api_key
7982 self .base_url = base_url
8083 self .model = model
8184 self .reasoning_effort = reasoning_effort
85+ # Default to 'auto' for best performance, users can override to 'on_demand' for free tier
86+ self .groq_service_tier = groq_service_tier or os .getenv (ENV_LLM_GROQ_SERVICE_TIER , "auto" )
8287
8388 # Validate provider
8489 valid_providers = ["openai" , "groq" , "ollama" , "gemini" , "anthropic" , "lmstudio" ]
@@ -263,11 +268,15 @@ async def call(
263268 # Provider-specific parameters
264269 if self .provider == "groq" :
265270 call_params ["seed" ] = DEFAULT_LLM_SEED
266- extra_body = {"service_tier" : "auto" }
267- # Only add reasoning parameters for reasoning models
271+ extra_body : dict [str , Any ] = {}
272+ # Add service_tier if configured (requires paid plan for flex/auto)
273+ if self .groq_service_tier :
274+ extra_body ["service_tier" ] = self .groq_service_tier
275+ # Add reasoning parameters for reasoning models
268276 if is_reasoning_model :
269277 extra_body ["include_reasoning" ] = False
270- call_params ["extra_body" ] = extra_body
278+ if extra_body :
279+ call_params ["extra_body" ] = extra_body
271280
272281 last_exception = None
273282
0 commit comments