From 02606d9ce1a0393e751df018a1db7e1cf770fb01 Mon Sep 17 00:00:00 2001 From: Rui Qiao Date: Thu, 31 Jul 2025 14:44:37 -0700 Subject: [PATCH 1/3] [Misc] Getting and passing ray runtime_env to workers Signed-off-by: Rui Qiao --- vllm/config.py | 5 +++++ vllm/engine/arg_utils.py | 11 +++++++++++ vllm/executor/ray_utils.py | 7 +++++-- vllm/utils/__init__.py | 12 +----------- 4 files changed, 22 insertions(+), 13 deletions(-) diff --git a/vllm/config.py b/vllm/config.py index f038cdd64c67..47ce20a54e29 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -57,6 +57,7 @@ if TYPE_CHECKING: from _typeshed import DataclassInstance from ray.util.placement_group import PlacementGroup + from ray.runtime_env import RuntimeEnv from transformers.configuration_utils import PretrainedConfig import vllm.model_executor.layers.quantization as me_quant @@ -73,6 +74,7 @@ else: DataclassInstance = Any PlacementGroup = Any + RuntimeEnv = Any PretrainedConfig = Any ExecutorBase = Any QuantizationConfig = Any @@ -1950,6 +1952,9 @@ class ParallelConfig: ray_workers_use_nsight: bool = False """Whether to profile Ray workers with nsight, see https://docs.ray.io/en/latest/ray-observability/user-guides/profiling.html#profiling-nsight-profiler.""" + ray_runtime_env: Optional["RuntimeEnv"] = None + """Ray runtime environment to pass to distributed workers.""" + placement_group: Optional["PlacementGroup"] = None """ray distributed model workers placement group.""" diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index aec75f82631a..29ae4e1eb201 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -36,6 +36,7 @@ from vllm.logger import init_logger from vllm.platforms import CpuArchEnum, current_platform from vllm.plugins import load_general_plugins +from vllm.ray.lazy_utils import is_ray_initialized from vllm.reasoning import ReasoningParserManager from vllm.test_utils import MODEL_WEIGHTS_S3_BUCKET, MODELS_ON_S3 from vllm.transformers_utils.utils import check_gguf_file @@ -1060,6 +1061,15 @@ def create_engine_config( calculate_kv_scales=self.calculate_kv_scales, ) + ray_runtime_env = None + if is_ray_initialized(): + # Ray Serve LLM calls `create_engine_config` in the context + # of a Ray task, therefore we check is_ray_initialized() + # as opposed to is_in_ray_actor(). + import ray + ray_runtime_env = ray.get_runtime_context().runtime_env + logger.info(f"Using ray runtime env: {ray_runtime_env}") + # Get the current placement group if Ray is initialized and # we are in a Ray actor. If so, then the placement group will be # passed to spawned processes. @@ -1172,6 +1182,7 @@ def create_engine_config( max_parallel_loading_workers=self.max_parallel_loading_workers, disable_custom_all_reduce=self.disable_custom_all_reduce, ray_workers_use_nsight=self.ray_workers_use_nsight, + ray_runtime_env=ray_runtime_env, placement_group=placement_group, distributed_executor_backend=self.distributed_executor_backend, worker_cls=self.worker_cls, diff --git a/vllm/executor/ray_utils.py b/vllm/executor/ray_utils.py index 033ecc00853b..7abaffa54c08 100644 --- a/vllm/executor/ray_utils.py +++ b/vllm/executor/ray_utils.py @@ -295,9 +295,12 @@ def initialize_ray_cluster( logger.warning( "No existing RAY instance detected. " "A new instance will be launched with current node resources.") - ray.init(address=ray_address, num_gpus=parallel_config.world_size) + ray.init(address=ray_address, + num_gpus=parallel_config.world_size, + runtime_env=parallel_config.ray_runtime_env) else: - ray.init(address=ray_address) + ray.init(address=ray_address, + runtime_env=parallel_config.ray_runtime_env) device_str = current_platform.ray_device_key if not device_str: diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py index 5b9c3b6a50cd..d4687cba9be1 100644 --- a/vllm/utils/__init__.py +++ b/vllm/utils/__init__.py @@ -71,6 +71,7 @@ import vllm.envs as envs from vllm.logger import enable_trace_function_call, init_logger +from vllm.ray.utils import is_in_ray_actor if TYPE_CHECKING: from argparse import Namespace @@ -2864,17 +2865,6 @@ def zmq_socket_ctx( ctx.destroy(linger=linger) -def is_in_ray_actor(): - """Check if we are in a Ray actor.""" - - try: - import ray - return (ray.is_initialized() - and ray.get_runtime_context().get_actor_id() is not None) - except ImportError: - return False - - def _maybe_force_spawn(): """Check if we need to force the use of the `spawn` multiprocessing start method. From ac38dde8cda362f9bc2d902657c7a50dd93b8a06 Mon Sep 17 00:00:00 2001 From: Rui Qiao Date: Thu, 31 Jul 2025 15:03:15 -0700 Subject: [PATCH 2/3] up Signed-off-by: Rui Qiao --- tests/config/test_config_generation.py | 33 ++++++++++++++++++++++++++ vllm/config.py | 2 +- vllm/engine/arg_utils.py | 2 +- vllm/ray/lazy_utils.py | 22 +++++++++++++++++ vllm/utils/__init__.py | 2 +- 5 files changed, 58 insertions(+), 3 deletions(-) create mode 100644 vllm/ray/lazy_utils.py diff --git a/tests/config/test_config_generation.py b/tests/config/test_config_generation.py index 024e81fccc5f..5c24c0d5637e 100644 --- a/tests/config/test_config_generation.py +++ b/tests/config/test_config_generation.py @@ -36,3 +36,36 @@ def create_config(): assert deep_compare(normal_config_dict, empty_config_dict), ( "Configs with normal CUDA_VISIBLE_DEVICES and CUDA_VISIBLE_DEVICES=\"\"" " should be equivalent") + + +def test_ray_runtime_env(monkeypatch: pytest.MonkeyPatch): + # In testing, this method needs to be nested inside as ray does not + # see the test module. + def create_config(): + engine_args = EngineArgs(model="deepseek-ai/DeepSeek-V2-Lite", + trust_remote_code=True) + return engine_args.create_engine_config() + + config = create_config() + parallel_config = config.parallel_config + assert parallel_config.ray_runtime_env is None + + import ray + ray.init() + + runtime_env = { + "env_vars": { + "TEST_ENV_VAR": "test_value", + }, + } + + config_ref = ray.remote(create_config).options( + runtime_env=runtime_env, ).remote() + + config = ray.get(config_ref) + parallel_config = config.parallel_config + assert parallel_config.ray_runtime_env is not None + assert parallel_config.ray_runtime_env.env_vars().get( + "TEST_ENV_VAR") == "test_value" + + ray.shutdown() diff --git a/vllm/config.py b/vllm/config.py index 47ce20a54e29..08bd59be7c12 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -56,8 +56,8 @@ if TYPE_CHECKING: from _typeshed import DataclassInstance - from ray.util.placement_group import PlacementGroup from ray.runtime_env import RuntimeEnv + from ray.util.placement_group import PlacementGroup from transformers.configuration_utils import PretrainedConfig import vllm.model_executor.layers.quantization as me_quant diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 29ae4e1eb201..ba40262e538a 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1068,7 +1068,7 @@ def create_engine_config( # as opposed to is_in_ray_actor(). import ray ray_runtime_env = ray.get_runtime_context().runtime_env - logger.info(f"Using ray runtime env: {ray_runtime_env}") + logger.info("Using ray runtime env: %s", ray_runtime_env) # Get the current placement group if Ray is initialized and # we are in a Ray actor. If so, then the placement group will be diff --git a/vllm/ray/lazy_utils.py b/vllm/ray/lazy_utils.py new file mode 100644 index 000000000000..bb3535579cfd --- /dev/null +++ b/vllm/ray/lazy_utils.py @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + + +def is_ray_initialized(): + """Check if Ray is initialized.""" + try: + import ray + return ray.is_initialized() + except ImportError: + return False + + +def is_in_ray_actor(): + """Check if we are in a Ray actor.""" + + try: + import ray + return (ray.is_initialized() + and ray.get_runtime_context().get_actor_id() is not None) + except ImportError: + return False diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py index d4687cba9be1..afacea1eac68 100644 --- a/vllm/utils/__init__.py +++ b/vllm/utils/__init__.py @@ -71,7 +71,7 @@ import vllm.envs as envs from vllm.logger import enable_trace_function_call, init_logger -from vllm.ray.utils import is_in_ray_actor +from vllm.ray.lazy_utils import is_in_ray_actor if TYPE_CHECKING: from argparse import Namespace From 177c8823dbdd9c2e0e34eeb2b21279cc7230db02 Mon Sep 17 00:00:00 2001 From: Rui Qiao Date: Fri, 1 Aug 2025 15:30:22 +0000 Subject: [PATCH 3/3] minor Signed-off-by: Rui Qiao --- tests/config/test_config_generation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/config/test_config_generation.py b/tests/config/test_config_generation.py index 5c24c0d5637e..e37b6b95941e 100644 --- a/tests/config/test_config_generation.py +++ b/tests/config/test_config_generation.py @@ -60,7 +60,7 @@ def create_config(): } config_ref = ray.remote(create_config).options( - runtime_env=runtime_env, ).remote() + runtime_env=runtime_env).remote() config = ray.get(config_ref) parallel_config = config.parallel_config