From 8510ce89becafb116e064376510126fda2bb5d0b Mon Sep 17 00:00:00 2001 From: Zerohertz Date: Fri, 12 Sep 2025 20:01:01 +0900 Subject: [PATCH 01/11] docs: docstring & indentation of torchao WARNING - griffe: vllm/model_executor/layers/quantization/torchao.py:175: No type or annotation for parameter 'torchao_config' WARNING - griffe: vllm/model_executor/layers/quantization/torchao.py:175: Parameter 'torchao_config' does not appear in the function signature WARNING - griffe: vllm/model_executor/layers/quantization/torchao.py:176: Failed to get 'name: description' pair from 'that encodes the type of quantization and all relevant arguments.' WARNING - griffe: vllm/model_executor/layers/quantization/torchao.py:149: Confusing indentation for continuation line 6 in docstring, should be 3 * 2 = 6 spaces, not 4 WARNING - griffe: vllm/model_executor/layers/quantization/torchao.py:147: No type or annotation for parameter '`param`' WARNING - griffe: vllm/model_executor/layers/quantization/torchao.py:147: Parameter '`param`' does not appear in the function signature WARNING - griffe: vllm/model_executor/layers/quantization/torchao.py:148: No type or annotation for parameter '`torchao_config`' WARNING - griffe: vllm/model_executor/layers/quantization/torchao.py:148: Parameter '`torchao_config`' does not appear in the function signature Signed-off-by: Zerohertz --- vllm/model_executor/layers/quantization/torchao.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vllm/model_executor/layers/quantization/torchao.py b/vllm/model_executor/layers/quantization/torchao.py index 3498d2994c2a..2efb605f203f 100644 --- a/vllm/model_executor/layers/quantization/torchao.py +++ b/vllm/model_executor/layers/quantization/torchao.py @@ -144,9 +144,9 @@ def torchao_quantize_param_data(param: torch.Tensor, """Quantize a Tensor with torchao quantization specified by torchao_config Args: - `param`: weight parameter of the linear module - `torchao_config`: type of quantization and their arguments we want to - use to quantize the Tensor + param: weight parameter of the linear module + torchao_config: type of quantization and their arguments we want to + use to quantize the Tensor """ from torchao.core.config import AOBaseConfig from torchao.quantization import quantize_ @@ -172,8 +172,8 @@ class TorchAOLinearMethod(LinearMethodBase): """Linear method for torchao. Args: - torchao_config: The torchao quantization config, a string - that encodes the type of quantization and all relevant arguments. + quant_config: The torchao quantization config, a string that encodes + the type of quantization and all relevant arguments. """ def __init__(self, quant_config: TorchAOConfig): From 78b8b70915a63dfc2a4281ea1e97bba187e38a5c Mon Sep 17 00:00:00 2001 From: Zerohertz Date: Fri, 12 Sep 2025 20:08:52 +0900 Subject: [PATCH 02/11] docs: typo in docstring WARNING - griffe: vllm/model_executor/layers/quantization/utils/int8_utils.py:426: No type or annotation for parameter 'output_dytpe' WARNING - griffe: vllm/model_executor/layers/quantization/utils/int8_utils.py:426: Parameter 'output_dytpe' does not appear in the function signature Signed-off-by: Zerohertz --- vllm/model_executor/layers/quantization/utils/int8_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/utils/int8_utils.py b/vllm/model_executor/layers/quantization/utils/int8_utils.py index 6840cabbf1ae..62e458ec3c93 100644 --- a/vllm/model_executor/layers/quantization/utils/int8_utils.py +++ b/vllm/model_executor/layers/quantization/utils/int8_utils.py @@ -423,7 +423,7 @@ def w8a8_block_int8_matmul( Bs: The per-block quantization scale for `B`. block_size: The block size for per-block quantization. It should be 2-dim, e.g., [128, 128]. - output_dytpe: The dtype of the returned tensor. + output_dtype: The dtype of the returned tensor. Returns: torch.Tensor: The result of matmul. From 9fcf581c5b1397e25a06d15a2bcaba6e51ae3d5c Mon Sep 17 00:00:00 2001 From: Zerohertz Date: Fri, 12 Sep 2025 20:10:09 +0900 Subject: [PATCH 03/11] docs: docstring in mrope WARNING - griffe: vllm/model_executor/layers/rotary_embedding/mrope.py:138: No type or annotation for parameter 'query' WARNING - griffe: vllm/model_executor/layers/rotary_embedding/mrope.py:138: Parameter 'query' does not appear in the function signature WARNING - griffe: vllm/model_executor/layers/rotary_embedding/mrope.py:139: No type or annotation for parameter 'key' WARNING - griffe: vllm/model_executor/layers/rotary_embedding/mrope.py:139: Parameter 'key' does not appear in the function signature Signed-off-by: Zerohertz --- vllm/model_executor/layers/rotary_embedding/mrope.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/rotary_embedding/mrope.py b/vllm/model_executor/layers/rotary_embedding/mrope.py index 27ca010cdc3a..786a6e1b3e12 100644 --- a/vllm/model_executor/layers/rotary_embedding/mrope.py +++ b/vllm/model_executor/layers/rotary_embedding/mrope.py @@ -135,8 +135,8 @@ def triton_mrope( """Qwen2VL mrope kernel. Args: - query: [num_tokens, num_heads * head_size] - key: [num_tokens, num_kv_heads * head_size] + q: [num_tokens, num_heads * head_size] + k: [num_tokens, num_kv_heads * head_size] cos: [3, num_tokens, head_size //2 ] (T/H/W positions with multimodal inputs) sin: [3, num_tokens, head_size //2 ] From 9b83fa9cdc059861cde9ec313d9bbaf03bec1f24 Mon Sep 17 00:00:00 2001 From: Zerohertz Date: Fri, 12 Sep 2025 20:13:15 +0900 Subject: [PATCH 04/11] docs: docstring in tensorizer WARNING - griffe: vllm/model_executor/model_loader/tensorizer.py:178: No type or annotation for parameter 'tensorizer_uri' WARNING - griffe: vllm/model_executor/model_loader/tensorizer.py:184: No type or annotation for parameter 'tensorizer_dir' WARNING - griffe: vllm/model_executor/model_loader/tensorizer.py:189: No type or annotation for parameter 'vllm_tensorized' WARNING - griffe: vllm/model_executor/model_loader/tensorizer.py:196: No type or annotation for parameter 'verify_hash' WARNING - griffe: vllm/model_executor/model_loader/tensorizer.py:199: No type or annotation for parameter 'num_readers' WARNING - griffe: vllm/model_executor/model_loader/tensorizer.py:203: No type or annotation for parameter 'encryption_keyfile' WARNING - griffe: vllm/model_executor/model_loader/tensorizer.py:207: No type or annotation for parameter 's3_access_key_id' WARNING - griffe: vllm/model_executor/model_loader/tensorizer.py:209: No type or annotation for parameter 's3_secret_access_key' WARNING - griffe: vllm/model_executor/model_loader/tensorizer.py:211: No type or annotation for parameter 's3_endpoint' WARNING - griffe: vllm/model_executor/model_loader/tensorizer.py:213: No type or annotation for parameter 'lora_dir' Signed-off-by: Zerohertz --- .../model_executor/model_loader/tensorizer.py | 89 ++++++++++--------- 1 file changed, 45 insertions(+), 44 deletions(-) diff --git a/vllm/model_executor/model_loader/tensorizer.py b/vllm/model_executor/model_loader/tensorizer.py index 3d491be3156b..58296131fadb 100644 --- a/vllm/model_executor/model_loader/tensorizer.py +++ b/vllm/model_executor/model_loader/tensorizer.py @@ -171,51 +171,52 @@ class TensorizerConfig(MutableMapping): _is_sharded: bool = field(init=False, default=False) _fields: ClassVar[tuple[str, ...]] _keys: ClassVar[frozenset[str]] - """ - Args for the TensorizerConfig class. These are used to configure the - behavior of model serialization and deserialization using Tensorizer. + """Configuration class for Tensorizer settings. - Args: - tensorizer_uri: Path to serialized model tensors. Can be a local file - path or a S3 URI. This is a required field unless lora_dir is - provided and the config is meant to be used for the - `tensorize_lora_adapter` function. Unless a `tensorizer_dir` or - `lora_dir` is passed to this object's initializer, this is a required - argument. - tensorizer_dir: Path to a directory containing serialized model tensors, - and all other potential model artifacts to load the model, such as - configs and tokenizer files. Can be passed instead of `tensorizer_uri` - where the `model.tensors` file will be assumed to be in this - directory. - vllm_tensorized: If True, indicates that the serialized model is a - vLLM model. This is used to determine the behavior of the - TensorDeserializer when loading tensors from a serialized model. - It is far faster to deserialize a vLLM model as it utilizes - tensorizer's optimized GPU loading. Note that this is now - deprecated, as serialized vLLM models are now automatically - inferred as vLLM models. - verify_hash: If True, the hashes of each tensor will be verified against - the hashes stored in the metadata. A `HashMismatchError` will be - raised if any of the hashes do not match. - num_readers: Controls how many threads are allowed to read concurrently - from the source file. Default is `None`, which will dynamically set - the number of readers based on the number of available - resources and model size. This greatly increases performance. - encryption_keyfile: File path to a binary file containing a - binary key to use for decryption. `None` (the default) means - no decryption. See the example script in - examples/others/tensorize_vllm_model.py. - s3_access_key_id: The access key for the S3 bucket. Can also be set via - the S3_ACCESS_KEY_ID environment variable. - s3_secret_access_key: The secret access key for the S3 bucket. Can also - be set via the S3_SECRET_ACCESS_KEY environment variable. - s3_endpoint: The endpoint for the S3 bucket. Can also be set via the - S3_ENDPOINT_URL environment variable. - lora_dir: Path to a directory containing LoRA adapter artifacts for - serialization or deserialization. When serializing LoRA adapters - this is the only necessary parameter to pass to this object's - initializer. - """ + These settings configure the behavior of model serialization and + deserialization using Tensorizer. + + Attributes: + tensorizer_uri: Path to serialized model tensors. Can be a local file + path or a S3 URI. This is a required field unless lora_dir is + provided and the config is meant to be used for the + `tensorize_lora_adapter` function. Unless a `tensorizer_dir` or + `lora_dir` is passed to this object's initializer, this is + a required argument. + tensorizer_dir: Path to a directory containing serialized model tensors, + and all other potential model artifacts to load the model, such as + configs and tokenizer files. Can be passed instead of + `tensorizer_uri` where the `model.tensors` file will be assumed + to be in this directory. + vllm_tensorized: If True, indicates that the serialized model is a + vLLM model. This is used to determine the behavior of the + TensorDeserializer when loading tensors from a serialized model. + It is far faster to deserialize a vLLM model as it utilizes + tensorizer's optimized GPU loading. Note that this is now + deprecated, as serialized vLLM models are now automatically + inferred as vLLM models. + verify_hash: If True, the hashes of each tensor will be verified + against the hashes stored in the metadata. A `HashMismatchError` + will be raised if any of the hashes do not match. + num_readers: Controls how many threads are allowed to read concurrently + from the source file. Default is `None`, which will dynamically set + the number of readers based on the number of available + resources and model size. This greatly increases performance. + encryption_keyfile: File path to a binary file containing a + binary key to use for decryption. `None` (the default) means + no decryption. See the example script in + examples/others/tensorize_vllm_model.py. + s3_access_key_id: The access key for the S3 bucket. Can also be set via + the S3_ACCESS_KEY_ID environment variable. + s3_secret_access_key: The secret access key for the S3 bucket. Can also + be set via the S3_SECRET_ACCESS_KEY environment variable. + s3_endpoint: The endpoint for the S3 bucket. Can also be set via the + S3_ENDPOINT_URL environment variable. + lora_dir: Path to a directory containing LoRA adapter artifacts for + serialization or deserialization. When serializing LoRA adapters + this is the only necessary parameter to pass to this object's + initializer. + """ def __post_init__(self): # check if the configuration is for a sharded vLLM model From 9bbbc2abb8fbde06a97349118cdc3f55d286afce Mon Sep 17 00:00:00 2001 From: Zerohertz Date: Fri, 12 Sep 2025 20:26:06 +0900 Subject: [PATCH 05/11] docs: docstring in aria WARNING - griffe: vllm/model_executor/models/aria.py:145: Parameter 'patch_to_query_dict' does not appear in the function signature WARNING - griffe: vllm/model_executor/models/aria.py:146: Failed to get 'name: description' pair from 'query numbers,' WARNING - griffe: vllm/model_executor/models/aria.py:149: Parameter 'embed_dim' does not appear in the function signature WARNING - griffe: vllm/model_executor/models/aria.py:150: Parameter 'num_heads' does not appear in the function signature WARNING - griffe: vllm/model_executor/models/aria.py:151: Parameter 'kv_dim' does not appear in the function signature WARNING - griffe: vllm/model_executor/models/aria.py:152: Parameter 'ff_dim' does not appear in the function signature WARNING - griffe: vllm/model_executor/models/aria.py:153: Parameter 'output_dim' does not appear in the function signature WARNING - griffe: vllm/model_executor/models/aria.py:154: Parameter 'norm_layer' does not appear in the function signature WARNING - griffe: vllm/model_executor/models/aria.py:285: Failed to get 'name: description' pair from 'sequence_length, hidden_size).' Signed-off-by: Zerohertz --- vllm/model_executor/models/aria.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/vllm/model_executor/models/aria.py b/vllm/model_executor/models/aria.py index 1c7960fa3e0a..d1a9ca8b1908 100644 --- a/vllm/model_executor/models/aria.py +++ b/vllm/model_executor/models/aria.py @@ -143,16 +143,7 @@ class AriaProjector(nn.Module): projects ViT's outputs into MoE's inputs. Args: - patch_to_query_dict (dict): Maps patch numbers to their corresponding - query numbers, - e.g., {1225: 128, 4900: 256}. This allows for different query sizes - based on image resolution. - embed_dim (int): Embedding dimension. - num_heads (int): Number of attention heads. - kv_dim (int): Dimension of key and value. - ff_dim (int): Hidden dimension of the feed-forward network. - output_dim (int): Output dimension. - norm_layer (nn.Module): Normalization layer. Default is nn.LayerNorm. + config: AriaConfig containing projector configuration parameters. Outputs: A tensor with the shape of (batch_size, query_number, output_dim) @@ -282,8 +273,8 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: Forward pass of the MoE Layer. Args: - hidden_states (torch.Tensor): Input tensor of shape (batch_size, - sequence_length, hidden_size). + hidden_states: Input tensor of shape + (batch_size, sequence_length, hidden_size). Returns: torch.Tensor: Output tensor after passing through the MoE layer. From e3d66df53958733904048687271e57436bda10eb Mon Sep 17 00:00:00 2001 From: Zerohertz Date: Fri, 12 Sep 2025 20:31:00 +0900 Subject: [PATCH 06/11] docs: docstring in bart WARNING - griffe: vllm/model_executor/models/bart.py:665: Failed to get 'name: description' pair from 'decoder_input_ids' WARNING - griffe: vllm/model_executor/models/bart.py:669: Failed to get 'name: description' pair from 'decoder_positions' WARNING - griffe: vllm/model_executor/models/bart.py:492: Failed to get 'name: description' pair from 'decoder_hidden_states' WARNING - griffe: vllm/model_executor/models/bart.py:494: Failed to get 'name: description' pair from 'encoder_hidden_states' WARNING - griffe: vllm/model_executor/models/bart.py:586: Failed to get 'name: description' pair from 'input_ids' WARNING - griffe: vllm/model_executor/models/bart.py:590: Failed to get 'name: description' pair from 'positions' WARNING - griffe: vllm/model_executor/models/bart.py:403: Failed to get 'name: description' pair from 'hidden_states' WARNING - griffe: vllm/model_executor/models/bart.py:850: Failed to get 'name: description' pair from 'input_ids' WARNING - griffe: vllm/model_executor/models/bart.py:852: Failed to get 'name: description' pair from 'positions' WARNING - griffe: vllm/model_executor/models/bart.py:854: Failed to get 'name: description' pair from 'encoder_input_ids' WARNING - griffe: vllm/model_executor/models/bart.py:856: Failed to get 'name: description' pair from 'encoder_positions' WARNING - griffe: vllm/model_executor/models/bart.py:734: Failed to get 'name: description' pair from 'input_ids' WARNING - griffe: vllm/model_executor/models/bart.py:738: Failed to get 'name: description' pair from 'positions' WARNING - griffe: vllm/model_executor/models/bart.py:740: Failed to get 'name: description' pair from 'encoder_input_ids' WARNING - griffe: vllm/model_executor/models/bart.py:1118: Failed to get 'name: description' pair from 'decoder_input_ids' WARNING - griffe: vllm/model_executor/models/bart.py:1122: Failed to get 'name: description' pair from 'decoder_positions' WARNING - griffe: vllm/model_executor/models/bart.py:1037: Failed to get 'name: description' pair from 'input_ids' WARNING - griffe: vllm/model_executor/models/bart.py:1041: Failed to get 'name: description' pair from 'positions' WARNING - griffe: vllm/model_executor/models/bart.py:914: Failed to get 'name: description' pair from 'hidden_states' WARNING - griffe: vllm/model_executor/models/bart.py:1187: Failed to get 'name: description' pair from 'input_ids' WARNING - griffe: vllm/model_executor/models/bart.py:1191: Failed to get 'name: description' pair from 'positions' WARNING - griffe: vllm/model_executor/models/bart.py:1193: Failed to get 'name: description' pair from 'encoder_input_ids' Signed-off-by: Zerohertz --- vllm/model_executor/models/bart.py | 104 +++++++++++------------------ 1 file changed, 40 insertions(+), 64 deletions(-) diff --git a/vllm/model_executor/models/bart.py b/vllm/model_executor/models/bart.py index fd4d820a01e9..242530817c64 100644 --- a/vllm/model_executor/models/bart.py +++ b/vllm/model_executor/models/bart.py @@ -401,8 +401,7 @@ def __init__( def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: r""" Args: - hidden_states - torch.Tensor of *encoder* input embeddings. + hidden_states: torch.Tensor of *encoder* input embeddings. Returns: Encoder layer output torch.Tensor """ @@ -490,10 +489,8 @@ def forward( ) -> torch.Tensor: r""" Args: - decoder_hidden_states - torch.Tensor of *decoder* input embeddings. - encoder_hidden_states - torch.Tensor of *encoder* input embeddings. + decoder_hidden_states: torch.Tensor of *decoder* input embeddings. + encoder_hidden_states: torch.Tensor of *encoder* input embeddings. Returns: Decoder layer output torch.Tensor """ @@ -584,12 +581,10 @@ def forward( ) -> torch.Tensor: r""" Args: - input_ids - Indices of *encoder* input sequence tokens in the vocabulary. - Padding will be ignored by default should you - provide it. - positions - Positions of *encoder* input sequence tokens. + input_ids: Indices of *encoder* input sequence tokens in the + vocabulary. + Padding will be ignored by default should you provide it. + positions: Positions of *encoder* input sequence tokens. Returns: Decoder output torch.Tensor """ @@ -663,14 +658,11 @@ def forward( ) -> torch.Tensor: r""" Args: - decoder_input_ids - Indices of *decoder* input sequence tokens in the vocabulary. - Padding will be ignored by default should you - provide it. - decoder_positions - Positions of *decoder* input sequence tokens. - encoder_hidden_states: - Tensor of encoder output embeddings + decoder_input_ids: Indices of *decoder* input sequence tokens + in the vocabulary. + Padding will be ignored by default should you provide it. + decoder_positions: Positions of *decoder* input sequence tokens. + encoder_hidden_states: Tensor of encoder output embeddings. Returns: Decoder output torch.Tensor """ @@ -732,16 +724,13 @@ def forward(self, input_ids: torch.Tensor, positions: torch.Tensor, encoder_positions: torch.Tensor) -> torch.Tensor: r""" Args: - input_ids - Indices of *decoder* input sequence tokens in the vocabulary. - Padding will be ignored by default should you - provide it. - positions - Positions of *decoder* input sequence tokens. - encoder_input_ids - Indices of *encoder* input sequence tokens in the vocabulary. - encoder_positions: - Positions of *encoder* input sequence tokens. + input_ids: Indices of *decoder* input sequence tokens + in the vocabulary. + Padding will be ignored by default should you provide it. + positions: Positions of *decoder* input sequence tokens. + encoder_input_ids: Indices of *encoder* input sequence tokens + in the vocabulary. + encoder_positions: Positions of *encoder* input sequence tokens. Returns: Model output torch.Tensor """ @@ -848,14 +837,10 @@ def forward( ) -> torch.Tensor: r""" Args: - input_ids - torch.Tensor of *decoder* input token ids. - positions - torch.Tensor of *decoder* position indices. - encoder_input_ids - torch.Tensor of *encoder* input token ids. - encoder_positions - torch.Tensor of *encoder* position indices + input_ids: torch.Tensor of *decoder* input token ids. + positions: torch.Tensor of *decoder* position indices. + encoder_input_ids: torch.Tensor of *encoder* input token ids. + encoder_positions: torch.Tensor of *encoder* position indices. Returns: Output torch.Tensor """ @@ -912,8 +897,7 @@ class MBartEncoderLayer(BartEncoderLayer): def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: r""" Args: - hidden_states - torch.Tensor of *encoder* input embeddings. + hidden_states: torch.Tensor of *encoder* input embeddings. Returns: Encoder layer output torch.Tensor """ @@ -1035,12 +1019,10 @@ def forward( ) -> torch.Tensor: r""" Args: - input_ids - Indices of *encoder* input sequence tokens in the vocabulary. - Padding will be ignored by default should you - provide it. - positions - Positions of *encoder* input sequence tokens. + input_ids: Indices of *encoder* input sequence tokens in the + vocabulary. + Padding will be ignored by default should you provide it. + positions: Positions of *encoder* input sequence tokens. Returns: Decoder output torch.Tensor """ @@ -1116,14 +1098,11 @@ def forward( ) -> torch.Tensor: r""" Args: - decoder_input_ids - Indices of *decoder* input sequence tokens in the vocabulary. - Padding will be ignored by default should you - provide it. - decoder_positions - Positions of *decoder* input sequence tokens. - encoder_hidden_states: - Tensor of encoder output embeddings + decoder_input_ids: Indices of *decoder* input sequence tokens + in the vocabulary. + Padding will be ignored by default should you provide it. + decoder_positions: Positions of *decoder* input sequence tokens. + encoder_hidden_states: Tensor of encoder output embeddings. Returns: Decoder output torch.Tensor """ @@ -1185,16 +1164,13 @@ def forward(self, input_ids: torch.Tensor, positions: torch.Tensor, encoder_positions: torch.Tensor) -> torch.Tensor: r""" Args: - input_ids - Indices of *decoder* input sequence tokens in the vocabulary. - Padding will be ignored by default should you - provide it. - positions - Positions of *decoder* input sequence tokens. - encoder_input_ids - Indices of *encoder* input sequence tokens in the vocabulary. - encoder_positions: - Positions of *encoder* input sequence tokens. + input_ids: Indices of *decoder* input sequence tokens + in the vocabulary. + Padding will be ignored by default should you provide it. + positions: Positions of *decoder* input sequence tokens. + encoder_input_ids: Indices of *encoder* input sequence tokens + in the vocabulary. + encoder_positions: Positions of *encoder* input sequence tokens. Returns: Model output torch.Tensor """ From 310310c11d7644bc900ec52b196293719efd96f3 Mon Sep 17 00:00:00 2001 From: Zerohertz Date: Fri, 12 Sep 2025 20:41:25 +0900 Subject: [PATCH 07/11] docs: docstring in blip2 WARNING - griffe: vllm/model_executor/models/blip2.py:681: No type or annotation for parameter 'pixel_values' WARNING - griffe: vllm/model_executor/models/blip2.py:681: Parameter 'pixel_values' does not appear in the function signature Signed-off-by: Zerohertz --- vllm/model_executor/models/blip2.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vllm/model_executor/models/blip2.py b/vllm/model_executor/models/blip2.py index ed98a3008c56..c1e7a7d498b1 100644 --- a/vllm/model_executor/models/blip2.py +++ b/vllm/model_executor/models/blip2.py @@ -678,7 +678,6 @@ def forward( Args: input_ids: Flattened (concatenated) input_ids corresponding to a batch. - pixel_values: The pixels in each input image. Info: [Blip2ImageInputs][] From d82683fc389e82e42dca365d533f5d98813996bb Mon Sep 17 00:00:00 2001 From: Zerohertz Date: Fri, 12 Sep 2025 20:42:07 +0900 Subject: [PATCH 08/11] docs: docstring in donut WARNING - griffe: vllm/model_executor/models/donut.py:353: Failed to get 'name: description' pair from 'input_ids' WARNING - griffe: vllm/model_executor/models/donut.py:355: Failed to get 'name: description' pair from 'positions' WARNING - griffe: vllm/model_executor/models/donut.py:357: Failed to get 'name: description' pair from 'encoder_input_ids' WARNING - griffe: vllm/model_executor/models/donut.py:359: Failed to get 'name: description' pair from 'encoder_positions' WARNING - griffe: vllm/model_executor/models/donut.py:81: Failed to get 'name: description' pair from 'input_ids' WARNING - griffe: vllm/model_executor/models/donut.py:83: Failed to get 'name: description' pair from 'positions' Signed-off-by: Zerohertz --- vllm/model_executor/models/donut.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/vllm/model_executor/models/donut.py b/vllm/model_executor/models/donut.py index c00db52371b6..23f4c6a4f93f 100644 --- a/vllm/model_executor/models/donut.py +++ b/vllm/model_executor/models/donut.py @@ -79,10 +79,8 @@ def forward( ) -> torch.Tensor: r""" Args: - input_ids - torch.Tensor of *decoder* input token ids. - positions - torch.Tensor of *decoder* position indices. + input_ids: torch.Tensor of *decoder* input token ids. + positions: torch.Tensor of *decoder* position indices. Returns: Output torch.Tensor """ @@ -351,14 +349,10 @@ def forward( ) -> torch.Tensor: r""" Args: - input_ids - torch.Tensor of *decoder* input token ids. - positions - torch.Tensor of *decoder* position indices. - encoder_input_ids - torch.Tensor of *encoder* input token ids. - encoder_positions - torch.Tensor of *encoder* position indices + input_ids: torch.Tensor of *decoder* input token ids. + positions: torch.Tensor of *decoder* position indices. + encoder_input_ids: torch.Tensor of *encoder* input token ids. + encoder_positions: torch.Tensor of *encoder* position indices Returns: Output torch.Tensor """ From 60067aef04700fbd38adeef18e11d0e56328940a Mon Sep 17 00:00:00 2001 From: Zerohertz Date: Fri, 12 Sep 2025 20:43:28 +0900 Subject: [PATCH 09/11] docs: docstring in florence2 WARNING - griffe: vllm/model_executor/models/florence2.py:1070: Failed to get 'name: description' pair from 'input_ids' WARNING - griffe: vllm/model_executor/models/florence2.py:1072: Failed to get 'name: description' pair from 'positions' WARNING - griffe: vllm/model_executor/models/florence2.py:1074: Failed to get 'name: description' pair from 'encoder_input_ids' WARNING - griffe: vllm/model_executor/models/florence2.py:1076: Failed to get 'name: description' pair from 'encoder_positions' WARNING - griffe: vllm/model_executor/models/florence2.py:701: Failed to get 'name: description' pair from 'input_ids' WARNING - griffe: vllm/model_executor/models/florence2.py:703: Failed to get 'name: description' pair from 'positions' WARNING - griffe: vllm/model_executor/models/florence2.py:705: Failed to get 'name: description' pair from 'encoder_input_ids' WARNING - griffe: vllm/model_executor/models/florence2.py:707: Failed to get 'name: description' pair from 'encoder_positions' WARNING - griffe: vllm/model_executor/models/florence2.py:633: Failed to get 'name: description' pair from 'input_ids' WARNING - griffe: vllm/model_executor/models/florence2.py:637: Failed to get 'name: description' pair from 'positions' WARNING - griffe: vllm/model_executor/models/florence2.py:639: Failed to get 'name: description' pair from 'encoder_input_ids' Signed-off-by: Zerohertz --- vllm/model_executor/models/florence2.py | 38 +++++++++---------------- 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/vllm/model_executor/models/florence2.py b/vllm/model_executor/models/florence2.py index d0881231fb1e..5e05e0c60f41 100644 --- a/vllm/model_executor/models/florence2.py +++ b/vllm/model_executor/models/florence2.py @@ -631,16 +631,14 @@ def forward( ) -> torch.Tensor: r""" Args: - input_ids - Indices of *decoder* input sequence tokens in the vocabulary. + input_ids: Indices of *decoder* input sequence tokens + in the vocabulary. Padding will be ignored by default should you provide it. - positions - Positions of *decoder* input sequence tokens. - encoder_input_ids - Indices of *encoder* input sequence tokens in the vocabulary. - encoder_positions: - Positions of *encoder* input sequence tokens. + positions: Positions of *decoder* input sequence tokens. + encoder_input_ids: Indices of *encoder* input sequence tokens + in the vocabulary. + encoder_positions: Positions of *encoder* input sequence tokens. Returns: Model output torch.Tensor """ @@ -699,14 +697,10 @@ def forward( ) -> torch.Tensor: r""" Args: - input_ids - torch.Tensor of *decoder* input token ids. - positions - torch.Tensor of *decoder* position indices. - encoder_input_ids - torch.Tensor of *encoder* input token ids. - encoder_positions - torch.Tensor of *encoder* position indices + input_ids: torch.Tensor of *decoder* input token ids. + positions: torch.Tensor of *decoder* position indices. + encoder_input_ids: torch.Tensor of *encoder* input token ids. + encoder_positions: torch.Tensor of *encoder* position indices Returns: Output torch.Tensor """ @@ -1068,14 +1062,10 @@ def forward( ) -> torch.Tensor: r""" Args: - input_ids - torch.Tensor of *decoder* input token ids. - positions - torch.Tensor of *decoder* position indices. - encoder_input_ids - torch.Tensor of *encoder* input token ids. - encoder_positions - torch.Tensor of *encoder* position indices + input_ids: torch.Tensor of *decoder* input token ids. + positions: torch.Tensor of *decoder* position indices. + encoder_input_ids: torch.Tensor of *encoder* input token ids. + encoder_positions: torch.Tensor of *encoder* position indices Returns: Output torch.Tensor """ From 03ca43ffe7c1d92b5dac38564a8ef1a0413bc890 Mon Sep 17 00:00:00 2001 From: Zerohertz Date: Fri, 12 Sep 2025 20:49:18 +0900 Subject: [PATCH 10/11] docs: docstring in glm4_1v WARNING - griffe: vllm/model_executor/models/glm4_1v.py:1586: No type or annotation for parameter 'pixel_values' WARNING - griffe: vllm/model_executor/models/glm4_1v.py:1586: Parameter 'pixel_values' does not appear in the function signature WARNING - griffe: vllm/model_executor/models/glm4_1v.py:1588: No type or annotation for parameter 'image_grid_thw' WARNING - griffe: vllm/model_executor/models/glm4_1v.py:1588: Parameter 'image_grid_thw' does not appear in the function signature WARNING - griffe: vllm/model_executor/models/glm4_1v.py:1590: No type or annotation for parameter 'pixel_values_videos' WARNING - griffe: vllm/model_executor/models/glm4_1v.py:1590: Parameter 'pixel_values_videos' does not appear in the function signature WARNING - griffe: vllm/model_executor/models/glm4_1v.py:1592: No type or annotation for parameter 'video_grid_thw' WARNING - griffe: vllm/model_executor/models/glm4_1v.py:1592: Parameter 'video_grid_thw' does not appear in the function signature WARNING - griffe: vllm/model_executor/models/glm4_1v.py:1594: No type or annotation for parameter 'second_per_grid_ts' WARNING - griffe: vllm/model_executor/models/glm4_1v.py:1594: Parameter 'second_per_grid_ts' does not appear in the function signature Signed-off-by: Zerohertz --- vllm/model_executor/models/glm4_1v.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/vllm/model_executor/models/glm4_1v.py b/vllm/model_executor/models/glm4_1v.py index 539381b61800..e47216590ce0 100644 --- a/vllm/model_executor/models/glm4_1v.py +++ b/vllm/model_executor/models/glm4_1v.py @@ -1583,17 +1583,10 @@ def forward( **NOTE**: If mrope is enabled (default setting for GLM-4V opensource models), the shape will be `(3, seq_len)`, otherwise it will be `(seq_len,). - pixel_values: Pixel values to be fed to a model. - `None` if no images are passed. - image_grid_thw: Tensor `(n_images, 3)` of image 3D grid in LLM. - `None` if no images are passed. - pixel_values_videos: Pixel values of videos to be fed to a model. - `None` if no videos are passed. - video_grid_thw: Tensor `(n_videos, 3)` of video 3D grid in LLM. - `None` if no videos are passed. - second_per_grid_ts: Tensor `(num_videos)` of video time interval ( - in seconds) for each grid along the temporal dimension in the - 3D position IDs. `None` if no videos are passed. + intermediate_tensors: Optional intermediate tensors for pipeline + parallelism. + inputs_embeds: Optional pre-computed input embeddings. + **kwargs: Additional keyword arguments. """ if intermediate_tensors is not None: inputs_embeds = None From 957e61b725866423a55e63570c4cdbc69c66ea65 Mon Sep 17 00:00:00 2001 From: Zerohertz Date: Fri, 12 Sep 2025 21:17:57 +0900 Subject: [PATCH 11/11] docs: explicit link on AriaConfig Signed-off-by: Zerohertz --- vllm/model_executor/models/aria.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/models/aria.py b/vllm/model_executor/models/aria.py index d1a9ca8b1908..db262447d7fa 100644 --- a/vllm/model_executor/models/aria.py +++ b/vllm/model_executor/models/aria.py @@ -143,7 +143,8 @@ class AriaProjector(nn.Module): projects ViT's outputs into MoE's inputs. Args: - config: AriaConfig containing projector configuration parameters. + config: [AriaConfig](https://huggingface.co/docs/transformers/main/model_doc/aria#transformers.AriaConfig) + containing projector configuration parameters. Outputs: A tensor with the shape of (batch_size, query_number, output_dim)