diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index 4f540fe965e2..5eeedf693446 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -349,7 +349,8 @@ def check_and_update_config(cls, vllm_config: "VllmConfig") -> None: else: parallel_config.worker_cls = "vllm.worker.worker.Worker" # Aiter rms norm perform best when CUDA Graph capture is enabled. - if use_v1 and use_aiter_rms_norm and not is_eager_execution: + if (use_v1 and use_aiter_rms_norm and not is_eager_execution + and "-rms_norm" not in compilation_config.custom_ops): compilation_config.custom_ops.append("+rms_norm") @classmethod