From 403bd6355ab4206b35440a1af83e91892604cdc4 Mon Sep 17 00:00:00 2001
From: Bill Nell <bnell@redhat.com>
Date: Sun, 12 Oct 2025 19:12:42 +0000
Subject: [PATCH 1/2] cache result of disable_inplace

Signed-off-by: Bill Nell <bnell@redhat.com>
---
 vllm/model_executor/layers/fused_moe/utils.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/vllm/model_executor/layers/fused_moe/utils.py b/vllm/model_executor/layers/fused_moe/utils.py
index a682f848b0c4..e5957474630c 100644
--- a/vllm/model_executor/layers/fused_moe/utils.py
+++ b/vllm/model_executor/layers/fused_moe/utils.py
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import functools
 from math import prod
 
 import torch
@@ -325,5 +326,6 @@ def activation_without_mul(activation: str) -> str:
 # Torch custom ops can't deal with outputs aliasing inputs so we need to
 # disable inplace for torch >= 2.9.
 # See https://github.com/vllm-project/vllm/issues/26378
+@functools.cache
 def disable_inplace() -> bool:
     return is_torch_equal_or_newer("2.9")

From a04af65f8535de36a3c168a4cab9641af4425f10 Mon Sep 17 00:00:00 2001
From: Bill Nell <bnell@redhat.com>
Date: Sun, 12 Oct 2025 21:19:08 +0000
Subject: [PATCH 2/2] fix unrelated lint error

Signed-off-by: Bill Nell <bnell@redhat.com>
---
 vllm/model_executor/models/bert.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/model_executor/models/bert.py b/vllm/model_executor/models/bert.py
index df302aee0bf6..6e81eb8dc91b 100644
--- a/vllm/model_executor/models/bert.py
+++ b/vllm/model_executor/models/bert.py
@@ -609,8 +609,8 @@ class SPLADESparsePooler(Pooler):
     def __init__(
         self,
         mlm_head: nn.Module,
-        cls_token_id: Optional[int] = 101,
-        sep_token_id: Optional[int] = 102,
+        cls_token_id: int | None = 101,
+        sep_token_id: int | None = 102,
         pooling: str = "max",
         remove_cls_sep: bool = True,
     ):