From 4446242af4ae72e133d8c3965bed0384364e5e07 Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Wed, 15 Oct 2025 07:38:43 -0700 Subject: [PATCH 1/3] Adding libtorch_nvshmem.so --- CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3afe90b..fc44f87 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -162,6 +162,11 @@ set(PT_LIBS "libjpeg.so.62" ) +if($ENV{CUDA_VERSION} VERSION_GREATER_EQUAL "13.0.2.006") + list(APPEND PT_LIBS "libtorch_nvshmem.so") + set(NVSHMEM_LIB "libtorch_nvshmem.so") +endif() + if (${TRITON_PYTORCH_ENABLE_TORCHVISION}) set(PT_LIBS ${PT_LIBS} @@ -238,6 +243,7 @@ if (${TRITON_PYTORCH_DOCKER_BUILD}) COMMAND docker cp pytorch_backend_ptlib:${PY_INSTALL_PATH}/torch/lib/libtorch_cuda_linalg.so libtorch_cuda_linalg.so COMMAND docker cp pytorch_backend_ptlib:${PY_INSTALL_PATH}/torch/lib/libtorch_global_deps.so libtorch_global_deps.so COMMAND docker cp pytorch_backend_ptlib:${PY_INSTALL_PATH}/torch/lib/libcaffe2_nvrtc.so libcaffe2_nvrtc.so + COMMAND test -n "${NVSHMEM_LIB}" && docker cp pytorch_backend_ptlib:${PY_INSTALL_PATH}/torch/lib/libtorch_nvshmem.so libtorch_nvshmem.so COMMAND /bin/sh -c "if [ ${TRITON_PYTORCH_ENABLE_TORCHVISION} = 'ON' ]; then if [ ${RHEL_BUILD} = 'ON' ]; then docker cp -a -L pytorch_backend_ptlib:/usr/local/lib64/libtorchvision.so libtorchvision.so; else docker cp -a -L pytorch_backend_ptlib:/usr/local/${LIB_DIR}/libtorchvision.so.1 libtorchvision.so.1; fi; fi" COMMAND /bin/sh -c "if [ ${TRITON_PYTORCH_ENABLE_TORCHVISION} = 'ON' ]; then docker cp pytorch_backend_ptlib:/opt/pytorch/vision/torchvision/csrc include/torchvision/torchvision; fi" COMMAND /bin/sh -c "if [ ${TRITON_PYTORCH_ENABLE_TORCHTRT} = 'ON' ]; then docker cp pytorch_backend_ptlib:/usr/local/lib/python3.12/dist-packages/torch_tensorrt/lib/libtorchtrt_runtime.so libtorchtrt_runtime.so; fi" From d3b9a0ba45bcb92e9c82318a3449cbded65e130c Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Wed, 15 Oct 2025 14:13:43 -0700 Subject: [PATCH 2/3] change: CPU only build doesn't have CUDA_VERSION environment variable. Using flag to control library inclusion. --- CMakeLists.txt | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fc44f87..9046900 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,6 +50,7 @@ option(TRITON_ENABLE_STATS "Include statistics collections in backend" ON) option(TRITON_ENABLE_NVTX "Include nvtx markers collection in backend." OFF) option(TRITON_PYTORCH_ENABLE_TORCHTRT "Enable TorchTRT support" OFF) option(TRITON_PYTORCH_ENABLE_TORCHVISION "Enable Torchvision support" ON) +option(TRITON_PYTORCH_NVSHMEM "Enable NVSHMEM support" ON) set(TRITON_PYTORCH_DOCKER_IMAGE "" CACHE STRING "Docker image containing the PyTorch build required by backend.") set(TRITON_PYTORCH_INCLUDE_PATHS "" CACHE PATH "Paths to Torch includes") @@ -160,13 +161,9 @@ set(PT_LIBS "libtorch_cuda_linalg.so" "libtorch_global_deps.so" "libjpeg.so.62" + $,libtorch_nvshmem.so,> ) -if($ENV{CUDA_VERSION} VERSION_GREATER_EQUAL "13.0.2.006") - list(APPEND PT_LIBS "libtorch_nvshmem.so") - set(NVSHMEM_LIB "libtorch_nvshmem.so") -endif() - if (${TRITON_PYTORCH_ENABLE_TORCHVISION}) set(PT_LIBS ${PT_LIBS} @@ -243,7 +240,7 @@ if (${TRITON_PYTORCH_DOCKER_BUILD}) COMMAND docker cp pytorch_backend_ptlib:${PY_INSTALL_PATH}/torch/lib/libtorch_cuda_linalg.so libtorch_cuda_linalg.so COMMAND docker cp pytorch_backend_ptlib:${PY_INSTALL_PATH}/torch/lib/libtorch_global_deps.so libtorch_global_deps.so COMMAND docker cp pytorch_backend_ptlib:${PY_INSTALL_PATH}/torch/lib/libcaffe2_nvrtc.so libcaffe2_nvrtc.so - COMMAND test -n "${NVSHMEM_LIB}" && docker cp pytorch_backend_ptlib:${PY_INSTALL_PATH}/torch/lib/libtorch_nvshmem.so libtorch_nvshmem.so + COMMAND /bin/sh -c "if [ ${TRITON_PYTORCH_NVSHMEM} = 'ON' ]; then docker cp pytorch_backend_ptlib:${PY_INSTALL_PATH}/torch/lib/libtorch_nvshmem.so libtorch_nvshmem.so; fi" COMMAND /bin/sh -c "if [ ${TRITON_PYTORCH_ENABLE_TORCHVISION} = 'ON' ]; then if [ ${RHEL_BUILD} = 'ON' ]; then docker cp -a -L pytorch_backend_ptlib:/usr/local/lib64/libtorchvision.so libtorchvision.so; else docker cp -a -L pytorch_backend_ptlib:/usr/local/${LIB_DIR}/libtorchvision.so.1 libtorchvision.so.1; fi; fi" COMMAND /bin/sh -c "if [ ${TRITON_PYTORCH_ENABLE_TORCHVISION} = 'ON' ]; then docker cp pytorch_backend_ptlib:/opt/pytorch/vision/torchvision/csrc include/torchvision/torchvision; fi" COMMAND /bin/sh -c "if [ ${TRITON_PYTORCH_ENABLE_TORCHTRT} = 'ON' ]; then docker cp pytorch_backend_ptlib:/usr/local/lib/python3.12/dist-packages/torch_tensorrt/lib/libtorchtrt_runtime.so libtorchtrt_runtime.so; fi" From 2e677bb77c2d5ae0e03e96030f5eed801b35e12c Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Wed, 15 Oct 2025 17:55:19 -0700 Subject: [PATCH 3/3] Removing generation expression --- CMakeLists.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9046900..3ec2d55 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -161,9 +161,15 @@ set(PT_LIBS "libtorch_cuda_linalg.so" "libtorch_global_deps.so" "libjpeg.so.62" - $,libtorch_nvshmem.so,> ) +if (${TRITON_PYTORCH_NVSHMEM}) + set(PT_LIBS + ${PT_LIBS} + "libtorch_nvshmem.so" + ) +endif() # TRITON_PYTORCH_NVSHMEM + if (${TRITON_PYTORCH_ENABLE_TORCHVISION}) set(PT_LIBS ${PT_LIBS}