From 05b581d20d60270615179500ac1c3b31db9c11fb Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Mon, 22 Sep 2025 15:03:50 -0400 Subject: [PATCH] [Bugfix] Remove contiguous output req for context parallel MLA Signed-off-by: Michael Goin --- vllm/attention/ops/common.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vllm/attention/ops/common.py b/vllm/attention/ops/common.py index 189b57e8e8b8..6253e1e56b0f 100644 --- a/vllm/attention/ops/common.py +++ b/vllm/attention/ops/common.py @@ -134,6 +134,5 @@ def cp_lse_ag_out_rs(cp_attn_out: torch.Tensor, cp_attn_lse = cp_attn_lse.contiguous() lses = cp_group.all_gather(cp_attn_lse, dim=0).view_as(lses) out, _ = correct_attn_out(cp_attn_out, lses, cp_group.rank_in_group, ctx) - assert out.is_contiguous() out = cp_group.reduce_scatter(out, dim=1) return out