diff --git a/vllm/entrypoints/renderer.py b/vllm/entrypoints/renderer.py index f0798afbcf21..fb859d57be9f 100644 --- a/vllm/entrypoints/renderer.py +++ b/vllm/entrypoints/renderer.py @@ -383,7 +383,7 @@ def _create_tokens_prompt( """Create validated EngineTokensPrompt.""" if max_length is not None and len(token_ids) > max_length: raise ValueError( - f"This maximum context length is {max_length} tokens. " + f"This model's maximum context length is {max_length} tokens. " f"However, your request has {len(token_ids)} input tokens. " "Please reduce the length of the input messages.")