Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 20 additions & 90 deletions dockerfile/Dockerfile.triton.trt_llm_backend
Original file line number Diff line number Diff line change
@@ -1,77 +1,30 @@
ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:25.06-py3-min
ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:25.06-py3
ARG NVRTC_VER=12.9.41-1
ARG NVRTC_VER=12.9.86-1
ARG TRT_VER=10.11.0.33
ARG NCCL_VER=2.27.5-1+cuda12.9
ARG RELEASE_URL_TRT_x86=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.11.0/tars/TensorRT-${TRT_VER}.Linux.x86_64-gnu.cuda-12.9.tar.gz
ARG RELEASE_URL_TRT_ARM=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.11.0/tars/TensorRT-${TRT_VER}.Linux.aarch64-gnu.cuda-12.9.tar.gz

# Versions of packages to copy from pytorch image
ARG FLASH_ATTN_VER=2.7.4.post1
ARG TORCH_VER=2.8.0a0+5228986c39.nv25.6
ARG TORCHVISION_VER=0.22.0a0+95f10a4e
ARG SETUPTOOLS_VER=78.1.1
ARG PYTORCH_TRITON_VER=3.3.0+git96316ce52.nvinternal
ARG JINJA2_VER=3.1.6
ARG NETWORKX_VER=3.5
ARG PACKAGING_VER=23.2
ARG PYTORCH_TRITON_VER=3.3.0+git96316ce52.nvinternal
ARG SETUPTOOLS_VER=78.1.1
ARG SYMPY_VER=1.14.0
ARG TORCH_VER=2.8.0a0+5228986c39.nv25.6
ARG TORCHVISION_VER=0.22.0a0+95f10a4e

FROM ${PYTORCH_IMAGE} AS pytorch_image
FROM ${BASE_IMAGE} AS install_dependencies
ARG PACKAGING_VER=23.2
ARG FLASH_ATTN_VER=2.7.4.post1

ARG TENSORRTLLM_REPO=https://github.com/NVIDIA/TensorRT-LLM.git
ARG TENSORRTLLM_REPO_TAG=release/1.0
ARG TENSORRTLLM_VER=1.0.0

WORKDIR /workspace

ARG CCACHE_REMOTE_STORAGE
ARG CCACHE_URL
ENV CCACHE_DEBUG=1

RUN if [ -n "${CCACHE_REMOTE_STORAGE}" ] ; then \
curl -k -L ${CCACHE_URL} -o ccache.tar.gz ; \
tar -xzf ccache.tar.gz -C /usr/local --strip-components=1 ; \
rm ccache.tar.gz ; \
ccache --set-config=remote_only=true ; \
ccache --set-config=remote_storage=${CCACHE_REMOTE_STORAGE} ; \
ccache --set-config=log_file=/tmp/ccache.log ; \
ccache -p ; \
fi
FROM ${PYTORCH_IMAGE} AS pytorch_image
FROM ${BASE_IMAGE} AS install_dependencies

ARG TORCH_VER
ARG TORCHVISION_VER
ARG SETUPTOOLS_VER
ARG PYTORCH_TRITON_VER
ARG JINJA2_VER
ARG NETWORKX_VER
ARG SYMPY_VER
ARG PACKAGING_VER
ARG FLASH_ATTN_VER
# Copy PyTorch package from PyTorch image
COPY --from=pytorch_image /usr/local/lib/lib* /usr/local/lib/
COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/torch /usr/local/lib/python3.12/dist-packages/torch
COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/torch-${TORCH_VER}.dist-info /usr/local/lib/python3.12/dist-packages/torch-${TORCH_VER}.dist-info
COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/torchgen /usr/local/lib/python3.12/dist-packages/torchgen
COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/torchvision /usr/local/lib/python3.12/dist-packages/torchvision
COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/torchvision-${TORCHVISION_VER}.dist-info /usr/local/lib/python3.12/dist-packages/torchvision-${TORCHVISION_VER}.dist-info
COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/torchvision.libs /usr/local/lib/python3.12/dist-packages/torchvision.libs
COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/setuptools /usr/local/lib/python3.12/dist-packages/setuptools
COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/setuptools-${SETUPTOOLS_VER}.dist-info /usr/local/lib/python3.12/dist-packages/setuptools-${SETUPTOOLS_VER}.dist-info
COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/functorch /usr/local/lib/python3.12/dist-packages/functorch
COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info /usr/local/lib/python3.12/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info
COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/triton /usr/local/lib/python3.12/dist-packages/triton
COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/jinja2 /usr/local/lib/python3.12/dist-packages/jinja2
COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/jinja2-${JINJA2_VER}.dist-info /usr/local/lib/python3.12/dist-packages/jinja2-${JINJA2_VER}.dist-info
COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/networkx /usr/local/lib/python3.12/dist-packages/networkx
COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/networkx-${NETWORKX_VER}.dist-info /usr/local/lib/python3.12/dist-packages/networkx-${NETWORKX_VER}.dist-info
COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/sympy /usr/local/lib/python3.12/dist-packages/sympy
COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/sympy-${SYMPY_VER}.dist-info /usr/local/lib/python3.12/dist-packages/sympy-${SYMPY_VER}.dist-info
COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/packaging /usr/local/lib/python3.12/dist-packages/packaging
COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/packaging-${PACKAGING_VER}.dist-info /usr/local/lib/python3.12/dist-packages/packaging-${PACKAGING_VER}.dist-info
COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/flash_attn /usr/local/lib/python3.12/dist-packages/flash_attn
COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info
COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/flash_attn_2_cuda.cpython-312-*-linux-gnu.so /usr/local/lib/python3.12/dist-packages/
WORKDIR /workspace

# Might not need to copy cusparseLt in the future once it's included in DLFW cuda container
COPY --from=pytorch_image /usr/local/cuda/lib64/libcusparseLt* /usr/local/cuda/lib64/
Expand Down Expand Up @@ -134,7 +87,7 @@ RUN pip3 install /usr/local/tensorrt/python/tensorrt-*-cp$( python3 -c "import s
ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib:${LD_LIBRARY_PATH}
ENV TRT_ROOT=/usr/local/tensorrt

FROM install_dependencies AS tensorrt_llm_build
FROM install_dependencies AS tensorrt_llm_code

WORKDIR /workspace

Expand All @@ -143,27 +96,6 @@ ARG TENSORRTLLM_REPO_TAG
RUN git-lfs install \
&& git clone --single-branch --recurse-submodules --depth=1 -b ${TENSORRTLLM_REPO_TAG} ${TENSORRTLLM_REPO} tensorrt_llm

RUN pip3 install --no-cache-dir polygraphy==0.49.9 mpi4py==3.1.5 cmake==4.0.3 ninja \
&& apt-get update \
&& apt-get install -y \
pkg-config \
libzmq3-dev \
libibverbs-dev \
libnuma-dev \
libucx-dev \
ibverbs-providers \
openmpi-bin \
libopenmpi-dev

ENV CMAKE_POLICY_VERSION_MINIMUM=3.5

RUN cd tensorrt_llm && \
if [ -n "${CCACHE_REMOTE_STORAGE}" ] ; then \
python3 scripts/build_wheel.py --trt_root="${TRT_ROOT}" --clean --use_ccache ; \
else \
python3 scripts/build_wheel.py --trt_root="${TRT_ROOT}" --clean ; \
fi

# Final stage to build the TRT-LLM container
FROM ${BASE_IMAGE} AS final_stage

Expand Down Expand Up @@ -250,17 +182,15 @@ ENV TRT_VERSION=$TRT_VER
ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib:${LD_LIBRARY_PATH}
ENV TRT_ROOT=/usr/local/tensorrt

WORKDIR /tmp

# Install TRT-LLM wheel after all the dependencies are installed
COPY --from=tensorrt_llm_build /workspace/tensorrt_llm/build/tensorrt_llm*whl .
RUN pip3 install --no-cache-dir tensorrt_llm*.whl \
&& rm -f tensorrt_llm*.whl
ARG TENSORRTLLM_VER
RUN --mount=type=secret,id=pypi_extra_values,env=PYPI_EXTRA_VALUES \
pip install --no-cache-dir ${PYPI_EXTRA_VALUES} tensorrt_llm==${TENSORRTLLM_VER}

# Copying the Tensorrt LLM scripts and applications
WORKDIR /app
COPY --from=tensorrt_llm_build /workspace/tensorrt_llm/triton_backend/scripts scripts
COPY --from=tensorrt_llm_build /workspace/tensorrt_llm/triton_backend/all_models all_models
COPY --from=tensorrt_llm_build /workspace/tensorrt_llm/triton_backend/inflight_batcher_llm/client client
COPY --from=tensorrt_llm_build /workspace/tensorrt_llm/triton_backend/tools tools
COPY --from=tensorrt_llm_build /workspace/tensorrt_llm/examples examples
COPY --from=tensorrt_llm_code /workspace/tensorrt_llm/triton_backend/scripts scripts
COPY --from=tensorrt_llm_code /workspace/tensorrt_llm/triton_backend/all_models all_models
COPY --from=tensorrt_llm_code /workspace/tensorrt_llm/triton_backend/inflight_batcher_llm/client client
COPY --from=tensorrt_llm_code /workspace/tensorrt_llm/triton_backend/tools tools
COPY --from=tensorrt_llm_code /workspace/tensorrt_llm/examples examples