Merge branch 'main' into igpu

triton-inference-server · Apr 9, 2024 · 0a543b8 · 0a543b8
2 parents 8022678 + 157ec72
commit 0a543b8
Show file tree

Hide file tree

Showing 40 changed files with 893 additions and 257 deletions.
diff --git a/Dockerfile.sdk b/Dockerfile.sdk
@@ -29,7 +29,7 @@
 #
 
 # Base image on the minimum Triton container
-ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.02-py3-min
+ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.03-py3-min
 
 ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
 ARG TRITON_COMMON_REPO_TAG=main

diff --git a/Dockerfile.win10.min b/Dockerfile.win10.min
@@ -65,18 +65,6 @@ FROM ${BASE_IMAGE} as build_base
 
 SHELL ["cmd", "/S", "/C"]
 
-ARG CUDNN_VERSION
-ENV CUDNN_VERSION ${CUDNN_VERSION}
-COPY --from=dependency_base /cudnn /cudnn
-RUN setx PATH "c:\cudnn\bin;c:\cudnn\lib\x64;c:\cudnn\include;%PATH%"
-LABEL CUDNN_VERSION="${CUDNN_VERSION}"
-
-ARG TENSORRT_VERSION
-ENV TRT_VERSION ${TENSORRT_VERSION}
-COPY --from=dependency_base /TensorRT /TensorRT
-RUN setx PATH "c:\TensorRT\lib;%PATH%"
-LABEL TENSORRT_VERSION="${TENSORRT_VERSION}"
-
 RUN mkdir c:\tmp
 WORKDIR /tmp
 
@@ -87,23 +75,31 @@ RUN choco install git docker unzip -y
 #
 # Installing python
 #
-ARG PYTHON_VERSION=3.10.11
+ARG PYTHON_VERSION=3.8.10
 ARG PYTHON_SOURCE=https://www.python.org/ftp/python/${PYTHON_VERSION}/python-${PYTHON_VERSION}-amd64.exe
 ADD ${PYTHON_SOURCE} python-${PYTHON_VERSION}-amd64.exe
 RUN python-%PYTHON_VERSION%-amd64.exe /quiet InstallAllUsers=1 PrependPath=1 Include_doc=0 TargetDir="C:\python%PYTHON_VERSION%"
 RUN mklink "C:\python%PYTHON_VERSION%\python3.exe" "C:\python%PYTHON_VERSION%\python.exe"
 RUN pip install --upgrade wheel setuptools docker
-RUN pip install grpcio-tools
+RUN pip install grpcio-tools psutil
 
 LABEL PYTHON_VERSION=${PYTHON_VERSION}
 
 #
 # Installing CMake
 #
 ARG CMAKE_VERSION=3.27.1
-RUN pip install cmake==%CMAKE_VERSION%
+ARG CMAKE_FILE=cmake-${CMAKE_VERSION}-windows-x86_64
+ARG CMAKE_SOURCE=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_FILE}.zip
+
+ADD ${CMAKE_SOURCE} ${CMAKE_FILE}.zip
+RUN unzip %CMAKE_FILE%.zip
+RUN move %CMAKE_FILE% "c:\CMake"
+RUN setx PATH "c:\CMake\bin;%PATH%"
+
 ENV CMAKE_TOOLCHAIN_FILE /vcpkg/scripts/buildsystems/vcpkg.cmake
 ENV VCPKG_TARGET_TRIPLET x64-windows
+
 LABEL CMAKE_VERSION=${CMAKE_VERSION}
 
 # Be aware that pip can interact badly with VS cmd shell so need to pip install before
@@ -190,6 +186,20 @@ RUN copy "%CUDA_INSTALL_ROOT_WP%\extras\visual_studio_integration\MSBuildExtensi
 
 RUN setx PATH "%CUDA_INSTALL_ROOT_WP%\bin;%PATH%"
 
+ARG CUDNN_VERSION
+ENV CUDNN_VERSION ${CUDNN_VERSION}
+COPY --from=dependency_base /cudnn /cudnn
+RUN copy cudnn\bin\cudnn*.dll "%CUDA_INSTALL_ROOT_WP%\bin\."
+RUN copy cudnn\lib\x64\cudnn*.lib "%CUDA_INSTALL_ROOT_WP%\lib\x64\."
+RUN copy cudnn\include\cudnn*.h "%CUDA_INSTALL_ROOT_WP%\include\."
+LABEL CUDNN_VERSION="${CUDNN_VERSION}"
+
+ARG TENSORRT_VERSION
+ENV TRT_VERSION ${TENSORRT_VERSION}
+COPY --from=dependency_base /TensorRT /TensorRT
+RUN setx PATH "c:\TensorRT\lib;%PATH%"
+LABEL TENSORRT_VERSION="${TENSORRT_VERSION}"
+
 LABEL CUDA_VERSION="${CUDA_VERSION}"
 # It is important that the entrypoint initialize VisualStudio
 # environment otherwise the build will fail. Also set

diff --git a/README.md b/README.md
@@ -28,12 +28,25 @@
 
 # Triton Inference Server
 
+📣 **Triton Meetup at the NVIDIA Headquarters on April 30th 3:00 - 6:30 pm**
+
+We are excited to announce that we will be hosting our Triton user meetup at
+the NVIDIA Headquarters on April 30th 3:00 - 6:30 pm. Join us for this
+exclusive event where you will learn about the newest Triton features, get a
+glimpse into the roadmap, and connect with fellow users and the NVIDIA Triton
+engineering and product teams. Seating is limited and registration confirmation
+is required to attend - please register [here](https://lu.ma/tl06fqc1) to join
+the meetup. We can’t wait to welcome you and share what’s next for the Triton
+Inference Server.
+
+---
+
 [![License](https://img.shields.io/badge/License-BSD3-lightgrey.svg)](https://opensource.org/licenses/BSD-3-Clause)
 
 > [!WARNING]
 > ##### LATEST RELEASE
 > You are currently on the `main` branch which tracks under-development progress towards the next release.
-> The current release is version [2.43.0](https://github.com/triton-inference-server/server/releases/latest) and corresponds to the 24.02 container release on NVIDIA GPU Cloud (NGC).
+> The current release is version [2.44.0](https://github.com/triton-inference-server/server/releases/latest) and corresponds to the 24.03 container release on NVIDIA GPU Cloud (NGC).
 
 Triton Inference Server is an open source inference serving software that
 streamlines AI inferencing. Triton enables teams to deploy any AI model from
@@ -91,16 +104,16 @@ Inference Server with the
 
 ```bash
 # Step 1: Create the example model repository
-git clone -b r24.02 https://github.com/triton-inference-server/server.git
+git clone -b r24.03 https://github.com/triton-inference-server/server.git
 cd server/docs/examples
 ./fetch_models.sh
 
 # Step 2: Launch triton from the NGC Triton container
-docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:24.02-py3 tritonserver --model-repository=/models
+docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:24.03-py3 tritonserver --model-repository=/models
 
 # Step 3: Sending an Inference Request
 # In a separate console, launch the image_client example from the NGC Triton SDK container
-docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:24.02-py3-sdk
+docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:24.03-py3-sdk
 /workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg
 
 # Inference should return the following