|
35 | 35 | RowParallelLinear)
|
36 | 36 | from vllm.model_executor.layers.logits_processor import LogitsProcessor
|
37 | 37 | from vllm.model_executor.layers.mamba.abstract import MambaBase
|
| 38 | +from vllm.model_executor.layers.mamba.mamba2_metadata import update_metadata |
38 | 39 | from vllm.model_executor.layers.mamba.mamba_mixer2 import (
|
39 | 40 | mamba_v2_sharded_weight_loader)
|
40 | 41 | from vllm.model_executor.layers.mamba.mamba_utils import (
|
@@ -414,6 +415,7 @@ def _forward(
|
414 | 415 |
|
415 | 416 | assert isinstance(attn_metadata, dict)
|
416 | 417 | attn_metadata = attn_metadata[self.prefix]
|
| 418 | + conv_metadata = attn_metadata |
417 | 419 | assert isinstance(attn_metadata, GDNAttentionMetadata)
|
418 | 420 | has_initial_state = attn_metadata.has_initial_state
|
419 | 421 | spec_query_start_loc = attn_metadata.spec_query_start_loc
|
@@ -475,17 +477,23 @@ def _forward(
|
475 | 477 |
|
476 | 478 | # 2.2: process the remaining part
|
477 | 479 | if attn_metadata.num_prefills > 0:
|
| 480 | + mixed_qkv_non_spec_T = mixed_qkv_non_spec.transpose(0, 1) |
| 481 | + if conv_metadata.cu_seqlen is None: |
| 482 | + conv_metadata = update_metadata(mixed_qkv_non_spec_T, |
| 483 | + non_spec_query_start_loc, |
| 484 | + conv_metadata) |
478 | 485 | # - "cache_indices" updates the conv_state cache in positions
|
479 | 486 | # pointed to by "mamba_cache_params.state_indices_tensor"
|
480 | 487 | mixed_qkv_non_spec = causal_conv1d_fn(
|
481 |
| - mixed_qkv_non_spec.transpose(0, 1), |
| 488 | + mixed_qkv_non_spec_T, |
482 | 489 | conv_weights,
|
483 | 490 | self.conv1d.bias,
|
484 | 491 | activation=self.activation,
|
485 | 492 | conv_states=conv_state,
|
486 | 493 | has_initial_state=has_initial_state,
|
487 | 494 | cache_indices=non_spec_state_indices_tensor,
|
488 | 495 | query_start_loc=non_spec_query_start_loc,
|
| 496 | + metadata=conv_metadata, |
489 | 497 | ).transpose(0, 1)
|
490 | 498 | elif attn_metadata.num_decodes > 0:
|
491 | 499 | mixed_qkv_non_spec = causal_conv1d_update(
|
|
0 commit comments