diff --git a/dockerfile/Dockerfile.triton.trt_llm_backend b/dockerfile/Dockerfile.triton.trt_llm_backend index a875222..e36da3e 100644 --- a/dockerfile/Dockerfile.triton.trt_llm_backend +++ b/dockerfile/Dockerfile.triton.trt_llm_backend @@ -1,77 +1,30 @@ ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:25.06-py3-min ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:25.06-py3 -ARG NVRTC_VER=12.9.41-1 +ARG NVRTC_VER=12.9.86-1 ARG TRT_VER=10.11.0.33 ARG NCCL_VER=2.27.5-1+cuda12.9 ARG RELEASE_URL_TRT_x86=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.11.0/tars/TensorRT-${TRT_VER}.Linux.x86_64-gnu.cuda-12.9.tar.gz ARG RELEASE_URL_TRT_ARM=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.11.0/tars/TensorRT-${TRT_VER}.Linux.aarch64-gnu.cuda-12.9.tar.gz # Versions of packages to copy from pytorch image -ARG FLASH_ATTN_VER=2.7.4.post1 +ARG TORCH_VER=2.8.0a0+5228986c39.nv25.6 +ARG TORCHVISION_VER=0.22.0a0+95f10a4e +ARG SETUPTOOLS_VER=78.1.1 +ARG PYTORCH_TRITON_VER=3.3.0+git96316ce52.nvinternal ARG JINJA2_VER=3.1.6 ARG NETWORKX_VER=3.5 -ARG PACKAGING_VER=23.2 -ARG PYTORCH_TRITON_VER=3.3.0+git96316ce52.nvinternal -ARG SETUPTOOLS_VER=78.1.1 ARG SYMPY_VER=1.14.0 -ARG TORCH_VER=2.8.0a0+5228986c39.nv25.6 -ARG TORCHVISION_VER=0.22.0a0+95f10a4e - -FROM ${PYTORCH_IMAGE} AS pytorch_image -FROM ${BASE_IMAGE} AS install_dependencies +ARG PACKAGING_VER=23.2 +ARG FLASH_ATTN_VER=2.7.4.post1 ARG TENSORRTLLM_REPO=https://github.com/NVIDIA/TensorRT-LLM.git ARG TENSORRTLLM_REPO_TAG=release/1.0 +ARG TENSORRTLLM_VER=1.0.0 -WORKDIR /workspace - -ARG CCACHE_REMOTE_STORAGE -ARG CCACHE_URL -ENV CCACHE_DEBUG=1 - -RUN if [ -n "${CCACHE_REMOTE_STORAGE}" ] ; then \ - curl -k -L ${CCACHE_URL} -o ccache.tar.gz ; \ - tar -xzf ccache.tar.gz -C /usr/local --strip-components=1 ; \ - rm ccache.tar.gz ; \ - ccache --set-config=remote_only=true ; \ - ccache --set-config=remote_storage=${CCACHE_REMOTE_STORAGE} ; \ - ccache --set-config=log_file=/tmp/ccache.log ; \ - ccache -p ; \ - fi +FROM ${PYTORCH_IMAGE} AS pytorch_image +FROM ${BASE_IMAGE} AS install_dependencies -ARG TORCH_VER -ARG TORCHVISION_VER -ARG SETUPTOOLS_VER -ARG PYTORCH_TRITON_VER -ARG JINJA2_VER -ARG NETWORKX_VER -ARG SYMPY_VER -ARG PACKAGING_VER -ARG FLASH_ATTN_VER -# Copy PyTorch package from PyTorch image -COPY --from=pytorch_image /usr/local/lib/lib* /usr/local/lib/ -COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/torch /usr/local/lib/python3.12/dist-packages/torch -COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/torch-${TORCH_VER}.dist-info /usr/local/lib/python3.12/dist-packages/torch-${TORCH_VER}.dist-info -COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/torchgen /usr/local/lib/python3.12/dist-packages/torchgen -COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/torchvision /usr/local/lib/python3.12/dist-packages/torchvision -COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/torchvision-${TORCHVISION_VER}.dist-info /usr/local/lib/python3.12/dist-packages/torchvision-${TORCHVISION_VER}.dist-info -COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/torchvision.libs /usr/local/lib/python3.12/dist-packages/torchvision.libs -COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/setuptools /usr/local/lib/python3.12/dist-packages/setuptools -COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/setuptools-${SETUPTOOLS_VER}.dist-info /usr/local/lib/python3.12/dist-packages/setuptools-${SETUPTOOLS_VER}.dist-info -COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/functorch /usr/local/lib/python3.12/dist-packages/functorch -COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info /usr/local/lib/python3.12/dist-packages/pytorch_triton-${PYTORCH_TRITON_VER}.dist-info -COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/triton /usr/local/lib/python3.12/dist-packages/triton -COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/jinja2 /usr/local/lib/python3.12/dist-packages/jinja2 -COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/jinja2-${JINJA2_VER}.dist-info /usr/local/lib/python3.12/dist-packages/jinja2-${JINJA2_VER}.dist-info -COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/networkx /usr/local/lib/python3.12/dist-packages/networkx -COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/networkx-${NETWORKX_VER}.dist-info /usr/local/lib/python3.12/dist-packages/networkx-${NETWORKX_VER}.dist-info -COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/sympy /usr/local/lib/python3.12/dist-packages/sympy -COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/sympy-${SYMPY_VER}.dist-info /usr/local/lib/python3.12/dist-packages/sympy-${SYMPY_VER}.dist-info -COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/packaging /usr/local/lib/python3.12/dist-packages/packaging -COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/packaging-${PACKAGING_VER}.dist-info /usr/local/lib/python3.12/dist-packages/packaging-${PACKAGING_VER}.dist-info -COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/flash_attn /usr/local/lib/python3.12/dist-packages/flash_attn -COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info /usr/local/lib/python3.12/dist-packages/flash_attn-${FLASH_ATTN_VER}.dist-info -COPY --from=pytorch_image /usr/local/lib/python3.12/dist-packages/flash_attn_2_cuda.cpython-312-*-linux-gnu.so /usr/local/lib/python3.12/dist-packages/ +WORKDIR /workspace # Might not need to copy cusparseLt in the future once it's included in DLFW cuda container COPY --from=pytorch_image /usr/local/cuda/lib64/libcusparseLt* /usr/local/cuda/lib64/ @@ -134,7 +87,7 @@ RUN pip3 install /usr/local/tensorrt/python/tensorrt-*-cp$( python3 -c "import s ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib:${LD_LIBRARY_PATH} ENV TRT_ROOT=/usr/local/tensorrt -FROM install_dependencies AS tensorrt_llm_build +FROM install_dependencies AS tensorrt_llm_code WORKDIR /workspace @@ -143,27 +96,6 @@ ARG TENSORRTLLM_REPO_TAG RUN git-lfs install \ && git clone --single-branch --recurse-submodules --depth=1 -b ${TENSORRTLLM_REPO_TAG} ${TENSORRTLLM_REPO} tensorrt_llm -RUN pip3 install --no-cache-dir polygraphy==0.49.9 mpi4py==3.1.5 cmake==4.0.3 ninja \ - && apt-get update \ - && apt-get install -y \ - pkg-config \ - libzmq3-dev \ - libibverbs-dev \ - libnuma-dev \ - libucx-dev \ - ibverbs-providers \ - openmpi-bin \ - libopenmpi-dev - -ENV CMAKE_POLICY_VERSION_MINIMUM=3.5 - -RUN cd tensorrt_llm && \ - if [ -n "${CCACHE_REMOTE_STORAGE}" ] ; then \ - python3 scripts/build_wheel.py --trt_root="${TRT_ROOT}" --clean --use_ccache ; \ - else \ - python3 scripts/build_wheel.py --trt_root="${TRT_ROOT}" --clean ; \ - fi - # Final stage to build the TRT-LLM container FROM ${BASE_IMAGE} AS final_stage @@ -250,17 +182,15 @@ ENV TRT_VERSION=$TRT_VER ENV LD_LIBRARY_PATH=/usr/local/tensorrt/lib:${LD_LIBRARY_PATH} ENV TRT_ROOT=/usr/local/tensorrt -WORKDIR /tmp - # Install TRT-LLM wheel after all the dependencies are installed -COPY --from=tensorrt_llm_build /workspace/tensorrt_llm/build/tensorrt_llm*whl . -RUN pip3 install --no-cache-dir tensorrt_llm*.whl \ - && rm -f tensorrt_llm*.whl +ARG TENSORRTLLM_VER +RUN --mount=type=secret,id=pypi_extra_values,env=PYPI_EXTRA_VALUES \ + pip install --no-cache-dir ${PYPI_EXTRA_VALUES} tensorrt_llm==${TENSORRTLLM_VER} # Copying the Tensorrt LLM scripts and applications WORKDIR /app -COPY --from=tensorrt_llm_build /workspace/tensorrt_llm/triton_backend/scripts scripts -COPY --from=tensorrt_llm_build /workspace/tensorrt_llm/triton_backend/all_models all_models -COPY --from=tensorrt_llm_build /workspace/tensorrt_llm/triton_backend/inflight_batcher_llm/client client -COPY --from=tensorrt_llm_build /workspace/tensorrt_llm/triton_backend/tools tools -COPY --from=tensorrt_llm_build /workspace/tensorrt_llm/examples examples +COPY --from=tensorrt_llm_code /workspace/tensorrt_llm/triton_backend/scripts scripts +COPY --from=tensorrt_llm_code /workspace/tensorrt_llm/triton_backend/all_models all_models +COPY --from=tensorrt_llm_code /workspace/tensorrt_llm/triton_backend/inflight_batcher_llm/client client +COPY --from=tensorrt_llm_code /workspace/tensorrt_llm/triton_backend/tools tools +COPY --from=tensorrt_llm_code /workspace/tensorrt_llm/examples examples