From 5386c1582a80804b8f5da594a35675d61c0d12ad Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Wed, 12 Nov 2025 14:35:15 -0800 Subject: [PATCH 1/8] Change target commit. --- tensorrt_llm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorrt_llm b/tensorrt_llm index 31116825..45b36cc0 160000 --- a/tensorrt_llm +++ b/tensorrt_llm @@ -1 +1 @@ -Subproject commit 31116825b39f4e6a6a1e127001f5204b73d1dc32 +Subproject commit 45b36cc0695540206ca8f42021692f309cfc1d28 From 852ea5c42c0909606542a4a50eb3cf7b9df9528d Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Wed, 12 Nov 2025 21:03:22 -0800 Subject: [PATCH 2/8] Update Dockerfile --- dockerfile/Dockerfile.triton.trt_llm_backend | 30 +++++++++----------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/dockerfile/Dockerfile.triton.trt_llm_backend b/dockerfile/Dockerfile.triton.trt_llm_backend index e36da3ec..b9f5758e 100644 --- a/dockerfile/Dockerfile.triton.trt_llm_backend +++ b/dockerfile/Dockerfile.triton.trt_llm_backend @@ -1,25 +1,24 @@ -ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:25.06-py3-min -ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:25.06-py3 -ARG NVRTC_VER=12.9.86-1 -ARG TRT_VER=10.11.0.33 -ARG NCCL_VER=2.27.5-1+cuda12.9 -ARG RELEASE_URL_TRT_x86=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.11.0/tars/TensorRT-${TRT_VER}.Linux.x86_64-gnu.cuda-12.9.tar.gz -ARG RELEASE_URL_TRT_ARM=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.11.0/tars/TensorRT-${TRT_VER}.Linux.aarch64-gnu.cuda-12.9.tar.gz +ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:25.08-py3-min +ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:25.08-py3 +ARG NVRTC_VER=13.0.48-1 +ARG TRT_VER=10.13.2.6 +ARG NCCL_VER=2.27.7-1+cuda13.0 +ARG RELEASE_URL_TRT_x86=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.2/tars/TensorRT-${TRT_VER}.Linux.x86_64-gnu.cuda-13.0.tar.gz +ARG RELEASE_URL_TRT_ARM=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.2/tars/TensorRT-${TRT_VER}.Linux.aarch64-gnu.cuda-13.0.tar.gz # Versions of packages to copy from pytorch image -ARG TORCH_VER=2.8.0a0+5228986c39.nv25.6 -ARG TORCHVISION_VER=0.22.0a0+95f10a4e -ARG SETUPTOOLS_VER=78.1.1 -ARG PYTORCH_TRITON_VER=3.3.0+git96316ce52.nvinternal -ARG JINJA2_VER=3.1.6 +ARG TORCH_VER=2.8.0a0+34c6371d24.nv25.8 +ARG TORCHVISION_VER=0.23.0a0+428a54c9 +ARG SETUPTOOLS_VER=79.0.1 +ARG PYTORCH_TRITON_VER=3.3.1+gitc8757738 ARG NETWORKX_VER=3.5 ARG SYMPY_VER=1.14.0 ARG PACKAGING_VER=23.2 ARG FLASH_ATTN_VER=2.7.4.post1 ARG TENSORRTLLM_REPO=https://github.com/NVIDIA/TensorRT-LLM.git -ARG TENSORRTLLM_REPO_TAG=release/1.0 -ARG TENSORRTLLM_VER=1.0.0 +ARG TENSORRTLLM_REPO_TAG=v1.2.0rc0 +ARG TENSORRTLLM_VER=1.2.0rc0 FROM ${PYTORCH_IMAGE} AS pytorch_image FROM ${BASE_IMAGE} AS install_dependencies @@ -103,7 +102,6 @@ ARG TORCH_VER ARG TORCHVISION_VER ARG SETUPTOOLS_VER ARG PYTORCH_TRITON_VER -ARG JINJA2_VER ARG NETWORKX_VER ARG SYMPY_VER ARG PACKAGING_VER @@ -121,8 +119,6 @@ COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/setuptools-${S COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/functorch /usr/local/lib/python3.12/dist-packages/functorch COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info /usr/local/lib/python3.12/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/triton /usr/local/lib/python3.12/dist-packages/triton -COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/jinja2 /usr/local/lib/python3.12/dist-packages/jinja2 -COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/jinja2-${JINJA2_VER}.dist-info /usr/local/lib/python3.12/dist-packages/jinja2-${JINJA2_VER}.dist-info COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/networkx /usr/local/lib/python3.12/dist-packages/networkx COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/networkx-${NETWORKX_VER}.dist-info /usr/local/lib/python3.12/dist-packages/networkx-${NETWORKX_VER}.dist-info COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/sympy /usr/local/lib/python3.12/dist-packages/sympy From 02215e80ec340c9f8960a9cf00dea3388da0a8ad Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Mon, 17 Nov 2025 11:13:57 -0800 Subject: [PATCH 3/8] git: Update submodule. --- tensorrt_llm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorrt_llm b/tensorrt_llm index 45b36cc0..4de31bec 160000 --- a/tensorrt_llm +++ b/tensorrt_llm @@ -1 +1 @@ -Subproject commit 45b36cc0695540206ca8f42021692f309cfc1d28 +Subproject commit 4de31bece27b74c61fc39529511e6885b5040870 From 169887628673cad28cf6e04319c05e1727e14c76 Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Mon, 17 Nov 2025 11:15:56 -0800 Subject: [PATCH 4/8] docker: Update base container image --- dockerfile/Dockerfile.triton.trt_llm_backend | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dockerfile/Dockerfile.triton.trt_llm_backend b/dockerfile/Dockerfile.triton.trt_llm_backend index b9f5758e..ed861fba 100644 --- a/dockerfile/Dockerfile.triton.trt_llm_backend +++ b/dockerfile/Dockerfile.triton.trt_llm_backend @@ -1,5 +1,5 @@ -ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:25.08-py3-min -ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:25.08-py3 +ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:25.10-py3-min +ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:25.10-py3 ARG NVRTC_VER=13.0.48-1 ARG TRT_VER=10.13.2.6 ARG NCCL_VER=2.27.7-1+cuda13.0 From d318726eee8ad733474f79613028e62da721d7a8 Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Mon, 17 Nov 2025 12:52:33 -0800 Subject: [PATCH 5/8] Update dockerfile versions --- dockerfile/Dockerfile.triton.trt_llm_backend | 22 ++++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/dockerfile/Dockerfile.triton.trt_llm_backend b/dockerfile/Dockerfile.triton.trt_llm_backend index ed861fba..929fd384 100644 --- a/dockerfile/Dockerfile.triton.trt_llm_backend +++ b/dockerfile/Dockerfile.triton.trt_llm_backend @@ -1,24 +1,24 @@ ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:25.10-py3-min ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:25.10-py3 ARG NVRTC_VER=13.0.48-1 -ARG TRT_VER=10.13.2.6 +ARG TRT_VER=10.13.3.9 ARG NCCL_VER=2.27.7-1+cuda13.0 -ARG RELEASE_URL_TRT_x86=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.2/tars/TensorRT-${TRT_VER}.Linux.x86_64-gnu.cuda-13.0.tar.gz -ARG RELEASE_URL_TRT_ARM=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.2/tars/TensorRT-${TRT_VER}.Linux.aarch64-gnu.cuda-13.0.tar.gz +ARG RELEASE_URL_TRT_x86=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.3/tars/TensorRT-${TRT_VER}.Linux.x86_64-gnu.cuda-13.0.tar.gz +ARG RELEASE_URL_TRT_ARM=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.13.3/tars/TensorRT-${TRT_VER}.Linux.aarch64-gnu.cuda-13.0.tar.gz # Versions of packages to copy from pytorch image -ARG TORCH_VER=2.8.0a0+34c6371d24.nv25.8 -ARG TORCHVISION_VER=0.23.0a0+428a54c9 -ARG SETUPTOOLS_VER=79.0.1 -ARG PYTORCH_TRITON_VER=3.3.1+gitc8757738 +ARG FLASH_ATTN_VER=2.7.4.post1+25.10 ARG NETWORKX_VER=3.5 +ARG PACKAGING_VER=25.0 +ARG PYTORCH_TRITON_VER=3.4.0+gitc817b9b6 +ARG SETUPTOOLS_VER=79.0.1 ARG SYMPY_VER=1.14.0 -ARG PACKAGING_VER=23.2 -ARG FLASH_ATTN_VER=2.7.4.post1 +ARG TORCH_VER=2.9.0a0+145a3a7bda.nv25.10 +ARG TORCHVISION_VER=0.24.0a0+094e7af5 ARG TENSORRTLLM_REPO=https://github.com/NVIDIA/TensorRT-LLM.git -ARG TENSORRTLLM_REPO_TAG=v1.2.0rc0 -ARG TENSORRTLLM_VER=1.2.0rc0 +ARG TENSORRTLLM_REPO_TAG=v1.2.0rc2 +ARG TENSORRTLLM_VER=1.2.0rc2 FROM ${PYTORCH_IMAGE} AS pytorch_image FROM ${BASE_IMAGE} AS install_dependencies From a2e32bc1ff457794b43fd1e326d17823284ac987 Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Mon, 17 Nov 2025 15:24:14 -0800 Subject: [PATCH 6/8] Removing line as un-used --- dockerfile/Dockerfile.triton.trt_llm_backend | 6 ------ 1 file changed, 6 deletions(-) diff --git a/dockerfile/Dockerfile.triton.trt_llm_backend b/dockerfile/Dockerfile.triton.trt_llm_backend index 929fd384..2f68fcfe 100644 --- a/dockerfile/Dockerfile.triton.trt_llm_backend +++ b/dockerfile/Dockerfile.triton.trt_llm_backend @@ -25,9 +25,6 @@ FROM ${BASE_IMAGE} AS install_dependencies WORKDIR /workspace -# Might not need to copy cusparseLt in the future once it's included in DLFW cuda container -COPY --from=pytorch_image /usr/local/cuda/lib64/libcusparseLt* /usr/local/cuda/lib64/ - ENV PIP_BREAK_SYSTEM_PACKAGES=1 RUN apt-get update -q=2 \ && apt-get install -y --no-install-recommends \ @@ -129,9 +126,6 @@ COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/flash_attn /us COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/flash_attn_2_cuda.cpython-312-*-linux-gnu.so /usr/local/lib/python3.12/dist-packages/ -# Might not need to copy cusparseLt in the future once it's included in DLFW cuda container -COPY --from=pytorch_image /usr/local/cuda/lib64/libcusparseLt* /usr/local/cuda/lib64/ - ARG NVRTC_VER ARG NCCL_VER ENV CUDA_VER=$CUDA_VERSION \ From 39653de9a72b001dcda4fb2e469204ef8a5f7290 Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Mon, 17 Nov 2025 21:58:14 -0800 Subject: [PATCH 7/8] Update submodule commit reference. --- tensorrt_llm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorrt_llm b/tensorrt_llm index 4de31bec..93392cef 160000 --- a/tensorrt_llm +++ b/tensorrt_llm @@ -1 +1 @@ -Subproject commit 4de31bece27b74c61fc39529511e6885b5040870 +Subproject commit 93392cefd25a3057383b9736e4bc25806aee7bfa From dd72b6be01f2f5ef9b8b3868aed832beb09611c8 Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Mon, 17 Nov 2025 21:59:49 -0800 Subject: [PATCH 8/8] Update Dokerfile --- dockerfile/Dockerfile.triton.trt_llm_backend | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dockerfile/Dockerfile.triton.trt_llm_backend b/dockerfile/Dockerfile.triton.trt_llm_backend index 2f68fcfe..fe402b73 100644 --- a/dockerfile/Dockerfile.triton.trt_llm_backend +++ b/dockerfile/Dockerfile.triton.trt_llm_backend @@ -17,7 +17,7 @@ ARG TORCH_VER=2.9.0a0+145a3a7bda.nv25.10 ARG TORCHVISION_VER=0.24.0a0+094e7af5 ARG TENSORRTLLM_REPO=https://github.com/NVIDIA/TensorRT-LLM.git -ARG TENSORRTLLM_REPO_TAG=v1.2.0rc2 +ARG TENSORRTLLM_REPO_TAG=release/1.0 ARG TENSORRTLLM_VER=1.2.0rc2 FROM ${PYTORCH_IMAGE} AS pytorch_image