From 91bc7e8c991e29cec32ec8c7c6bbd963bcba2841 Mon Sep 17 00:00:00 2001 From: Xiake Sun Date: Fri, 14 Nov 2025 00:24:13 +0800 Subject: [PATCH] Fix AITER MHA accuracy issue with correct min_seqlen_q in aiter flash_attn_varlen_func for pure prefill and extend phase Signed-off-by: Xiake Sun --- vllm/v1/attention/backends/rocm_aiter_fa.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/v1/attention/backends/rocm_aiter_fa.py b/vllm/v1/attention/backends/rocm_aiter_fa.py index ad454daa582e..ea611848b0e8 100644 --- a/vllm/v1/attention/backends/rocm_aiter_fa.py +++ b/vllm/v1/attention/backends/rocm_aiter_fa.py @@ -729,7 +729,7 @@ def forward( cu_seqlens_k=attn_metadata.prefill_metadata.query_start_loc, max_seqlen_q=attn_metadata.prefill_metadata.max_query_len, max_seqlen_k=attn_metadata.prefill_metadata.max_seq_len, - min_seqlen_q=attn_metadata.prefill_metadata.min_query_len, + min_seqlen_q=1, dropout_p=0.0, softmax_scale=self.scale, causal=True, @@ -759,7 +759,7 @@ def forward( cu_seqlens_q=attn_metadata.extend_metadata.query_start_loc, max_seqlen_q=attn_metadata.extend_metadata.max_query_len, max_seqlen_k=attn_metadata.extend_metadata.max_seq_len, - min_seqlen_q=attn_metadata.extend_metadata.min_query_len, + min_seqlen_q=1, block_table=attn_metadata.block_table[ num_decodes : num_decodes + num_extends ],