vllm-project · vllm-bot · Sep 15, 2025 · Sep 14, 2025 · Sep 15, 2025
diff --git a/.buildkite/nightly-benchmarks/nightly-descriptions.md b/.buildkite/nightly-benchmarks/nightly-descriptions.md
@@ -8,7 +8,7 @@ This benchmark aims to:
 
 Latest results: [results link](https://blog.vllm.ai/2024/09/05/perf-update.html), scroll to the end.
 
-Latest reproduction guilde: [github issue link](https://github.com/vllm-project/vllm/issues/8176)
+Latest reproduction guide: [github issue link](https://github.com/vllm-project/vllm/issues/8176)
 
 ## Setup
 

@@ -190,7 +190,7 @@ def create_weights(self, layer: torch.nn.Module, num_experts: int,
         group_size = self.quant_config.group_size
         group_size_div_factor = 1
 
-        # make intermediate_size and hidden_size diviable by group_size
+        # make intermediate_size and hidden_size divisible by group_size
         # we reduce the group size to ensure that
         # and we would repeat the loaded_weight later
         while intermediate_size_per_partition % group_size or \

@@ -19,7 +19,7 @@ class MarlinWorkspace:
 
     def __init__(self, out_features, min_thread_n, max_parallel):
         assert (out_features % min_thread_n == 0), (
-            "out_features = {} is undivisible by min_thread_n = {}".format(
+            "out_features = {} is indivisible by min_thread_n = {}".format(
                 out_features, min_thread_n))
 
         max_workspace_size = ((out_features // min_thread_n) * max_parallel)

@@ -649,7 +649,7 @@ def _sample_with_torch(
     else:
         sampled_token_ids_tensor = None
 
-    # Counterintiutively, having two loops here is actually faster.
+    # Counterintuitively, having two loops here is actually faster.
     # The first loop can run without waiting on GPU<->CPU sync.
     for sampling_type in SamplingType:
         sample_indices = categorized_sample_indices[sampling_type]

diff --git a/vllm/model_executor/models/glm4_1v.py b/vllm/model_executor/models/glm4_1v.py
@@ -1524,7 +1524,7 @@ def get_multimodal_embeddings(
             return None
 
         # The result multimodal_embeddings is tuple of tensors, with each
-        # tensor correspoending to a multimodal data item (image or video).
+        # tensor corresponding to a multimodal data item (image or video).
         multimodal_embeddings: tuple[torch.Tensor, ...] = ()
 
         # NOTE: It is important to iterate over the keys in this dictionary

diff --git a/vllm/model_executor/models/interns1.py b/vllm/model_executor/models/interns1.py
@@ -738,7 +738,7 @@ def get_multimodal_embeddings(self,
             return []
 
         # The result multimodal_embeddings is tuple of tensors, with each
-        # tensor correspoending to a multimodal data item (image or video).
+        # tensor corresponding to a multimodal data item (image or video).
         multimodal_embeddings: tuple[torch.Tensor, ...] = ()
 
         # NOTE: It is important to iterate over the keys in this dictionary

diff --git a/vllm/model_executor/models/ultravox.py b/vllm/model_executor/models/ultravox.py
@@ -662,7 +662,7 @@ def pad_and_concat_to_dim3(
     max_len = max(f.shape[-1] for f in features)
     # Ensure all features have dim=3
     features = [f.view(-1, *f.shape[-2:]) for f in features]
-    # Pad and oncatenate:
+    # Pad and concatenate:
     # [[B1, 80, M1], [B2, 80, M2]] -> [B1+B2, 80, max(M1, M2)]
     features = [F.pad(f, (0, max_len - f.shape[-1])) for f in features]
     return torch.cat(features)