From ff9c61a84cc93db896a64144e0c702e4a7881cc4 Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Wed, 14 Feb 2024 16:18:42 -0800 Subject: [PATCH] Defensively copy sampling_params If the SamplingParams object passed to LLMEngine.add_request() is mutated after it returns, it could affect the async sampling process for that request. Suggested by @Yard1 https://github.com/vllm-project/vllm/pull/2514#discussion_r1490106059 --- vllm/engine/llm_engine.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 86f0925209..29fa952665 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -464,6 +464,9 @@ def add_request( prompt_token_ids[:prefix_pos], lora_request.lora_int_id if lora_request else 0) if prefix_pos is not None else None + # Defensive copy of SamplingParams, which are used by the sampler + sampling_params = copy.deepcopy(sampling_params) + # Create the sequence group. seq_group = SequenceGroup(request_id, [seq], sampling_params, arrival_time, lora_request, prefix)