diff --git a/.buildkite/nightly-benchmarks/nightly-descriptions.md b/.buildkite/nightly-benchmarks/nightly-descriptions.md index 37e2980eea97..2ef36089b6af 100644 --- a/.buildkite/nightly-benchmarks/nightly-descriptions.md +++ b/.buildkite/nightly-benchmarks/nightly-descriptions.md @@ -8,7 +8,7 @@ This benchmark aims to: Latest results: [results link](https://blog.vllm.ai/2024/09/05/perf-update.html), scroll to the end. -Latest reproduction guilde: [github issue link](https://github.com/vllm-project/vllm/issues/8176) +Latest reproduction guide: [github issue link](https://github.com/vllm-project/vllm/issues/8176) ## Setup diff --git a/vllm/model_executor/layers/quantization/moe_wna16.py b/vllm/model_executor/layers/quantization/moe_wna16.py index d6d7ec9b1580..c25b3dd6080d 100644 --- a/vllm/model_executor/layers/quantization/moe_wna16.py +++ b/vllm/model_executor/layers/quantization/moe_wna16.py @@ -190,7 +190,7 @@ def create_weights(self, layer: torch.nn.Module, num_experts: int, group_size = self.quant_config.group_size group_size_div_factor = 1 - # make intermediate_size and hidden_size diviable by group_size + # make intermediate_size and hidden_size divisible by group_size # we reduce the group size to ensure that # and we would repeat the loaded_weight later while intermediate_size_per_partition % group_size or \ diff --git a/vllm/model_executor/layers/quantization/utils/marlin_utils_test.py b/vllm/model_executor/layers/quantization/utils/marlin_utils_test.py index b2c228c24253..f5acd03cc662 100644 --- a/vllm/model_executor/layers/quantization/utils/marlin_utils_test.py +++ b/vllm/model_executor/layers/quantization/utils/marlin_utils_test.py @@ -19,7 +19,7 @@ class MarlinWorkspace: def __init__(self, out_features, min_thread_n, max_parallel): assert (out_features % min_thread_n == 0), ( - "out_features = {} is undivisible by min_thread_n = {}".format( + "out_features = {} is indivisible by min_thread_n = {}".format( out_features, min_thread_n)) max_workspace_size = ((out_features // min_thread_n) * max_parallel) diff --git a/vllm/model_executor/layers/sampler.py b/vllm/model_executor/layers/sampler.py index 829dd82b0bd4..9d93cad2420a 100644 --- a/vllm/model_executor/layers/sampler.py +++ b/vllm/model_executor/layers/sampler.py @@ -649,7 +649,7 @@ def _sample_with_torch( else: sampled_token_ids_tensor = None - # Counterintiutively, having two loops here is actually faster. + # Counterintuitively, having two loops here is actually faster. # The first loop can run without waiting on GPU<->CPU sync. for sampling_type in SamplingType: sample_indices = categorized_sample_indices[sampling_type] diff --git a/vllm/model_executor/models/glm4_1v.py b/vllm/model_executor/models/glm4_1v.py index 22386a5e819a..13c1ff888702 100644 --- a/vllm/model_executor/models/glm4_1v.py +++ b/vllm/model_executor/models/glm4_1v.py @@ -1524,7 +1524,7 @@ def get_multimodal_embeddings( return None # The result multimodal_embeddings is tuple of tensors, with each - # tensor correspoending to a multimodal data item (image or video). + # tensor corresponding to a multimodal data item (image or video). multimodal_embeddings: tuple[torch.Tensor, ...] = () # NOTE: It is important to iterate over the keys in this dictionary diff --git a/vllm/model_executor/models/interns1.py b/vllm/model_executor/models/interns1.py index d998b8a0ab4f..b59d1b88cf5c 100644 --- a/vllm/model_executor/models/interns1.py +++ b/vllm/model_executor/models/interns1.py @@ -738,7 +738,7 @@ def get_multimodal_embeddings(self, return [] # The result multimodal_embeddings is tuple of tensors, with each - # tensor correspoending to a multimodal data item (image or video). + # tensor corresponding to a multimodal data item (image or video). multimodal_embeddings: tuple[torch.Tensor, ...] = () # NOTE: It is important to iterate over the keys in this dictionary diff --git a/vllm/model_executor/models/ultravox.py b/vllm/model_executor/models/ultravox.py index ad911ebedf89..371ca817d5f9 100644 --- a/vllm/model_executor/models/ultravox.py +++ b/vllm/model_executor/models/ultravox.py @@ -662,7 +662,7 @@ def pad_and_concat_to_dim3( max_len = max(f.shape[-1] for f in features) # Ensure all features have dim=3 features = [f.view(-1, *f.shape[-2:]) for f in features] - # Pad and oncatenate: + # Pad and concatenate: # [[B1, 80, M1], [B2, 80, M2]] -> [B1+B2, 80, max(M1, M2)] features = [F.pad(f, (0, max_len - f.shape[-1])) for f in features] return torch.cat(features)