diff --git a/vllm/distributed/device_communicators/pynccl_wrapper.py b/vllm/distributed/device_communicators/pynccl_wrapper.py index c3e99e177e2d..2e9a4e024de4 100644 --- a/vllm/distributed/device_communicators/pynccl_wrapper.py +++ b/vllm/distributed/device_communicators/pynccl_wrapper.py @@ -30,7 +30,9 @@ import torch from torch.distributed import ReduceOp +from vllm import envs from vllm.logger import init_logger +from vllm.platforms import current_platform from vllm.utils import find_nccl_library logger = init_logger(__name__) @@ -275,10 +277,27 @@ def __init__(self, so_file: Optional[str] = None): if so_file not in NCCLLibrary.path_to_dict_mapping: _funcs: dict[str, Any] = {} for func in NCCLLibrary.exported_functions: - f = getattr(self.lib, func.name) - f.restype = func.restype - f.argtypes = func.argtypes - _funcs[func.name] = f + try: + f = getattr(self.lib, func.name) + f.restype = func.restype + f.argtypes = func.argtypes + _funcs[func.name] = f + except AttributeError: + if func.name in [ + "ncclCommWindowRegister", + "ncclCommWindowDeregister" + ]: + if envs.VLLM_USE_NCCL_SYMM_MEM: + logger.warning_once( + "The symbol %s is not found in the NCCL " + "library %s. To enable VLLM_USE_NCCL_SYMM_MEM " + " please update your NCCL version to >= " + "2.27.03.", func.name, so_file) + if current_platform.is_rocm(): + # Having an exception here on ROCm platform is + # not allowed during graph capturing + continue + raise NCCLLibrary.path_to_dict_mapping[so_file] = _funcs self._funcs = NCCLLibrary.path_to_dict_mapping[so_file]