diff --git a/vllm/model_executor/layers/fused_moe/utils.py b/vllm/model_executor/layers/fused_moe/utils.py index a682f848b0c4..e5957474630c 100644 --- a/vllm/model_executor/layers/fused_moe/utils.py +++ b/vllm/model_executor/layers/fused_moe/utils.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import functools from math import prod import torch @@ -325,5 +326,6 @@ def activation_without_mul(activation: str) -> str: # Torch custom ops can't deal with outputs aliasing inputs so we need to # disable inplace for torch >= 2.9. # See https://github.com/vllm-project/vllm/issues/26378 +@functools.cache def disable_inplace() -> bool: return is_torch_equal_or_newer("2.9") diff --git a/vllm/model_executor/models/bert.py b/vllm/model_executor/models/bert.py index df302aee0bf6..6e81eb8dc91b 100644 --- a/vllm/model_executor/models/bert.py +++ b/vllm/model_executor/models/bert.py @@ -609,8 +609,8 @@ class SPLADESparsePooler(Pooler): def __init__( self, mlm_head: nn.Module, - cls_token_id: Optional[int] = 101, - sep_token_id: Optional[int] = 102, + cls_token_id: int | None = 101, + sep_token_id: int | None = 102, pooling: str = "max", remove_cls_sep: bool = True, ):