Integer multi GPU full benchmarks #20

Workflow file for this run

.github/workflows/integer_multi_gpu_full_benchmark.yml at f937524

	# Run all integer benchmarks on an instance with CUDA and return parsed results to Slab CI bot.
	name: Integer multi GPU full benchmarks

	on:
	workflow_dispatch:
	schedule:
	# Weekly benchmarks will be triggered each Saturday at 1a.m.
	- cron: '0 1 * * 6'

	env:
	CARGO_TERM_COLOR: always
	RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
	ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
	RUST_BACKTRACE: "full"
	RUST_MIN_STACK: "8388608"
	SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
	SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
	SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
	SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}

	jobs:
	setup-instance:
	name: Setup instance (cuda-integer-full-multi-gpu-benchmarks)
	runs-on: ubuntu-latest
	if: github.event_name != 'schedule' \|\|
	(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
	outputs:
	runner-name: ${{ steps.start-instance.outputs.label }}
	steps:
	- name: Start instance
	id: start-instance
	uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
	with:
	mode: start
	github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
	slab-url: ${{ secrets.SLAB_BASE_URL }}
	job-secret: ${{ secrets.JOB_SECRET }}
	backend: hyperstack
	profile: multi-h100-nvlink

	cuda-integer-full-multi-gpu-benchmarks:
	name: Execute multi GPU integer benchmarks for all operations flavor
	needs: setup-instance
	runs-on: ${{ needs.setup-instance.outputs.runner-name }}
	timeout-minutes: 1440 # 24 hours
	continue-on-error: true
	strategy:
	fail-fast: false
	max-parallel: 1
	matrix:
	command: [integer, integer_multi_bit]
	op_flavor: [default, unchecked]
	# explicit include-based build matrix, of known valid options
	include:
	- os: ubuntu-22.04
	cuda: "12.2"
	gcc: 11
	env:
	CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
	CMAKE_VERSION: 3.29.6
	steps:
	# Mandatory on hyperstack since a bootable volume is not re-usable yet.
	- name: Install dependencies
	run: \|
	sudo apt update
	sudo apt install -y checkinstall zlib1g-dev libssl-dev
	wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
	tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
	cd cmake-${{ env.CMAKE_VERSION }}
	./bootstrap
	make -j"$(nproc)"
	sudo make install

	- name: Checkout tfhe-rs repo with tags
	uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
	with:
	fetch-depth: 0

	- name: Get benchmark details
	run: \|
	{
	echo "BENCH_DATE=$(date --iso-8601=seconds)";
	echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
	echo "COMMIT_HASH=$(git describe --tags --dirty)";
	} >> "${GITHUB_ENV}"

	- name: Set up home
	# "Install rust" step require root user to have a HOME directory which is not set.
	run: \|
	echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

	- name: Install rust
	uses: dtolnay/rust-toolchain@21dc36fb71dd22e3317045c0c31a3f4249868b17
	with:
	toolchain: nightly

	- name: Export CUDA variables
	if: ${{ !cancelled() }}
	run: \|
	{
	echo "CUDA_PATH=$CUDA_PATH";
	echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
	echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
	} >> "${GITHUB_ENV}"
	echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"

	# Specify the correct host compilers
	- name: Export gcc and g++ variables
	if: ${{ !cancelled() }}
	run: \|
	{
	echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
	echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
	echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
	} >> "${GITHUB_ENV}"

	- name: Checkout Slab repo
	uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
	with:
	repository: zama-ai/slab
	path: slab
	token: ${{ secrets.FHE_ACTIONS_TOKEN }}

	- name: Check device is detected
	if: ${{ !cancelled() }}
	run: nvidia-smi

	- name: Run benchmarks with AVX512
	run: \|
	make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_${{ matrix.command }}_gpu

	- name: Parse results
	run: \|
	python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
	--database tfhe_rs \
	--hardware "n3-H100x8-NVLink" \
	--backend gpu \
	--project-version "${{ env.COMMIT_HASH }}" \
	--branch ${{ github.ref_name }} \
	--commit-date "${{ env.COMMIT_DATE }}" \
	--bench-date "${{ env.BENCH_DATE }}" \
	--walk-subdirs \
	--name-suffix avx512 \
	--throughput

	- name: Upload parsed results artifact
	uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b
	with:
	name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
	path: ${{ env.RESULTS_FILENAME }}

	- name: Send data to Slab
	shell: bash
	run: \|
	echo "Computing HMac on results file"
	SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
	echo "Sending results to Slab..."
	curl -v -k \
	-H "Content-Type: application/json" \
	-H "X-Slab-Repository: ${{ github.repository }}" \
	-H "X-Slab-Command: store_data_v2" \
	-H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
	-d @${{ env.RESULTS_FILENAME }} \
	${{ secrets.SLAB_URL }}

	slack-notify:
	name: Slack Notification
	needs: [ setup-instance, cuda-integer-full-multi-gpu-benchmarks ]
	runs-on: ubuntu-latest
	if: ${{ !success() && !cancelled() }}
	continue-on-error: true
	steps:
	- name: Send message
	uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
	env:
	SLACK_COLOR: ${{ needs.cuda-integer-full-multi-gpu-benchmarks.result }}
	SLACK_MESSAGE: "Integer GPU full benchmarks finished with status: ${{ needs.cuda-integer-full-multi-gpu-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"

	teardown-instance:
	name: Teardown instance (cuda-integer-full-multi-gpu-benchmarks)
	if: ${{ always() && needs.setup-instance.result != 'skipped' }}
	needs: [ setup-instance, cuda-integer-full-multi-gpu-benchmarks ]
	runs-on: ubuntu-latest
	steps:
	- name: Stop instance
	id: stop-instance
	uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
	with:
	mode: stop
	github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
	slab-url: ${{ secrets.SLAB_BASE_URL }}
	job-secret: ${{ secrets.JOB_SECRET }}
	label: ${{ needs.setup-instance.outputs.runner-name }}

	- name: Slack Notification
	if: ${{ failure() }}
	continue-on-error: true
	uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
	env:
	SLACK_COLOR: ${{ job.status }}
	SLACK_MESSAGE: "Instance teardown (cuda-integer-full-multi-gpu-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Integer multi GPU full benchmarks #20

Workflow file

Integer multi GPU full benchmarks #20

Jobs

Run details

Workflow file for this run