Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
hanzhi713 committed Dec 19, 2023
1 parent 2f49454 commit 16447e5
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
2 changes: 1 addition & 1 deletion vllm/engine/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class EngineArgs:
quantization: Optional[str] = None
enforce_eager: bool = False
max_context_len_to_capture: int = 8192
disable_fast_allreduce = False
disable_fast_allreduce: bool = False

def __post_init__(self):
if self.tokenizer is None:
Expand Down
2 changes: 1 addition & 1 deletion vllm/worker/model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,7 @@ def capture_model(self, kv_caches: List[KVCache]) -> None:
context_lens = torch.ones(max_batch_size, dtype=torch.int32).cuda()
block_tables = torch.from_numpy(self.graph_block_tables).cuda()

if not self.model_config.disable_fast_allreduce:
if not self.parallel_config.disable_fast_allreduce:
comm_op.init_fast_ar()
comm_op.begin_capture()
# NOTE: Capturing the largest batch size first may help reduce the
Expand Down

0 comments on commit 16447e5

Please sign in to comment.