From 9f4eac690ce1178f27a7a61280cb888322e7eacc Mon Sep 17 00:00:00 2001 From: zjy0516 Date: Sat, 8 Nov 2025 16:31:42 +0800 Subject: [PATCH 1/3] init Signed-off-by: zjy0516 --- vllm/model_executor/models/qwen3_next.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/models/qwen3_next.py b/vllm/model_executor/models/qwen3_next.py index b095c79dc954..6601e9f1cb79 100644 --- a/vllm/model_executor/models/qwen3_next.py +++ b/vllm/model_executor/models/qwen3_next.py @@ -1365,8 +1365,8 @@ def fused_gdn_gating_kernel( blk_g = -tl.exp(blk_A_log.to(tl.float32)) * softplus_x tl.store(g + off, blk_g.to(g.dtype.element_ty), mask=mask) # compute beta_output = sigmoid(b) - blk_beta = 1.0 / (1.0 + tl.exp(-blk_b.to(tl.float32))) - tl.store(beta_output + off, blk_beta.to(beta_output.dtype.element_ty), mask=mask) + blk_beta_output = tl.sigmoid(blk_b.to(tl.float32)) + tl.store(beta_output + off, blk_beta_output.to(b.dtype.element_ty), mask=mask) def fused_gdn_gating( From 8265f1714d8d062d69116d4e84ada19a8f3ce5b2 Mon Sep 17 00:00:00 2001 From: zjy0516 Date: Sat, 8 Nov 2025 16:56:06 +0800 Subject: [PATCH 2/3] update Signed-off-by: zjy0516 --- vllm/model_executor/models/qwen3_next.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/models/qwen3_next.py b/vllm/model_executor/models/qwen3_next.py index 6601e9f1cb79..9e8e1c2d4188 100644 --- a/vllm/model_executor/models/qwen3_next.py +++ b/vllm/model_executor/models/qwen3_next.py @@ -1387,7 +1387,7 @@ def fused_gdn_gating( seq_len = 1 grid = (batch, seq_len, triton.cdiv(num_heads, 8)) g = torch.empty(1, batch, num_heads, dtype=torch.float32, device=a.device) - beta_output = torch.empty(1, batch, num_heads, dtype=torch.float32, device=b.device) + beta_output = torch.empty(1, batch, num_heads, dtype=b.dtype, device=b.device) fused_gdn_gating_kernel[grid]( g, beta_output, From fa57bb6fe0b556bec6e93f4aec09980d8093fc61 Mon Sep 17 00:00:00 2001 From: zjy0516 Date: Sat, 8 Nov 2025 17:01:27 +0800 Subject: [PATCH 3/3] update Signed-off-by: zjy0516 --- vllm/model_executor/models/qwen3_next.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/models/qwen3_next.py b/vllm/model_executor/models/qwen3_next.py index 9e8e1c2d4188..906c94b86a1b 100644 --- a/vllm/model_executor/models/qwen3_next.py +++ b/vllm/model_executor/models/qwen3_next.py @@ -1366,7 +1366,9 @@ def fused_gdn_gating_kernel( tl.store(g + off, blk_g.to(g.dtype.element_ty), mask=mask) # compute beta_output = sigmoid(b) blk_beta_output = tl.sigmoid(blk_b.to(tl.float32)) - tl.store(beta_output + off, blk_beta_output.to(b.dtype.element_ty), mask=mask) + tl.store( + beta_output + off, blk_beta_output.to(beta_output.dtype.element_ty), mask=mask + ) def fused_gdn_gating(