vllm-project · youkaichao · Oct 30, 2025 · Oct 27, 2025 · Oct 30, 2025
diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md
@@ -382,6 +382,7 @@ th {
 | `InternLM3ForCausalLM` | InternLM3 | `internlm/internlm3-8b-instruct`, etc. | ✅︎ | ✅︎ |
 | `JAISLMHeadModel` | Jais | `inceptionai/jais-13b`, `inceptionai/jais-13b-chat`, `inceptionai/jais-30b-v3`, `inceptionai/jais-30b-chat-v3`, etc. | | ✅︎ |
 | `JambaForCausalLM` | Jamba | `ai21labs/AI21-Jamba-1.5-Large`, `ai21labs/AI21-Jamba-1.5-Mini`, `ai21labs/Jamba-v0.1`, etc. | ✅︎ | ✅︎ |
+| `KimiLinearForCausalLM` | Kimi-Linear-48B-A3B-Base, Kimi-Linear-48B-A3B-Instruct | `moonshotai/Kimi-Linear-48B-A3B-Base`, `moonshotai/Kimi-Linear-48B-A3B-Instruct` | | ✅︎ |
 | `Lfm2ForCausalLM`  | LFM2  | `LiquidAI/LFM2-1.2B`, `LiquidAI/LFM2-700M`, `LiquidAI/LFM2-350M`, etc. | ✅︎ | ✅︎ |
 | `Lfm2MoeForCausalLM`  | LFM2MoE  | `LiquidAI/LFM2-8B-A1B-preview`, etc. | ✅︎ | ✅︎ |
 | `LlamaForCausalLM` | Llama 3.1, Llama 3, Llama 2, LLaMA, Yi | `meta-llama/Meta-Llama-3.1-405B-Instruct`, `meta-llama/Meta-Llama-3.1-70B`, `meta-llama/Meta-Llama-3-70B-Instruct`, `meta-llama/Llama-2-70b-hf`, `01-ai/Yi-34B`, etc. | ✅︎ | ✅︎ |

@@ -296,6 +296,9 @@ def check_available_online(
             "random": "ai21labs/Jamba-tiny-random",
         },
     ),
+    "KimiLinearForCausalLM": _HfExamplesInfo(
+        "moonshotai/Kimi-Linear-48B-A3B-Instruct", trust_remote_code=True
+    ),
     "Lfm2ForCausalLM": _HfExamplesInfo("LiquidAI/LFM2-1.2B"),
     "Lfm2MoeForCausalLM": _HfExamplesInfo(
         "LiquidAI/LFM2-8B-A1B", min_transformers_version="4.58"

@@ -453,6 +453,7 @@ class CompilationConfig:
         "vllm::linear_attention",
         "vllm::plamo2_mamba_mixer",
         "vllm::gdn_attention",
+        "vllm::kda_attention",
         "vllm::sparse_attn_indexer",
     ]
 

@@ -1257,6 +1257,7 @@ def is_deepseek_mla(self) -> bool:
             "deepseek_v32",
             "deepseek_mtp",
             "kimi_k2",
+            "kimi_linear",
             "longcat_flash",
         ):
             return self.hf_text_config.kv_lora_rank is not None

diff --git a/vllm/model_executor/layers/fla/ops/kda.py b/vllm/model_executor/layers/fla/ops/kda.py
@@ -1304,7 +1304,7 @@ def kda_gate_fwd_kernel(
     tl.store(y_ptr, b_y.to(y.dtype.element_ty), boundary_check=(0, 1))
 
 
-def kda_gate_fwd(
+def fused_kda_gate(
     g: torch.Tensor,
     A: torch.Tensor,
     head_k_dim: int,