From e5fe085180a2bddebe743226b2d3ac4245d661d1 Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Fri, 7 Nov 2025 15:36:09 +0000 Subject: [PATCH] [Core] Cache `vllm_is_batch_invariant` `vllm_is_batch_invariant` is called many times during inference which is noticeable in a profile. We should cache it such that we don't need to repeatedly call `os.getenv()`. Signed-off-by: Lukas Geiger --- vllm/model_executor/layers/batch_invariant.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vllm/model_executor/layers/batch_invariant.py b/vllm/model_executor/layers/batch_invariant.py index 65babd10a948..746a543ab827 100644 --- a/vllm/model_executor/layers/batch_invariant.py +++ b/vllm/model_executor/layers/batch_invariant.py @@ -4,6 +4,7 @@ import os from collections import namedtuple from collections.abc import Callable +from functools import cache from typing import Any import torch @@ -857,6 +858,7 @@ def get_batch_invariant_attention_block_size() -> AttentionBlockSize: return AttentionBlockSize(block_m=16, block_n=16) +@cache def vllm_is_batch_invariant(): env_key = "VLLM_BATCH_INVARIANT" is_overridden = False