TFHE Cuda Backend - Full tests multi-GPU

refactor(gpu): Specify launch bounds on kernels #816

Workflow file for this run

.github/workflows/aws_tfhe_multi_gpu_tests.yml at 24cb37d

	# Compile and test tfhe-cuda-backend on an AWS instance
	name: TFHE Cuda Backend - Full tests multi-GPU

	env:
	CARGO_TERM_COLOR: always
	ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
	RUSTFLAGS: "-C target-cpu=native"
	RUST_BACKTRACE: "full"
	RUST_MIN_STACK: "8388608"
	SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
	SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
	SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
	SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
	IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}

	on:
	# Allows you to run this workflow manually from the Actions tab as an alternative.
	workflow_dispatch:
	pull_request:
	types: [ labeled ]

	jobs:
	should-run:
	runs-on: ubuntu-latest
	permissions:
	pull-requests: write
	outputs:
	gpu_test: ${{ env.IS_PULL_REQUEST == 'false' \|\| steps.changed-files.outputs.gpu_any_changed }}
	steps:
	- name: Checkout tfhe-rs
	uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
	with:
	fetch-depth: 0

	- name: Check for file changes
	id: changed-files
	uses: tj-actions/changed-files@6b2903bdce6310cfbddd87c418f253cf29b2dec9
	with:
	since_last_remote_commit: true
	files_yaml: \|
	gpu:
	- tfhe/Cargo.toml
	- tfhe/build.rs
	- backends/tfhe-cuda-backend/**
	- tfhe/src/core_crypto/gpu/**
	- tfhe/src/integer/gpu/**
	- tfhe/shortint/parameters/**
	- tfhe/src/high_level_api/**
	- tfhe/src/c_api/**
	- 'tfhe/docs/**.md'
	- Makefile
	- '.github/workflows/aws_tfhe_multi_gpu**'
	- scripts/**
	- ci/**

	setup-instance:
	name: Setup instance (cuda-tests-multi-gpu)
	needs: should-run
	if: github.event_name != 'pull_request' \|\|
	(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') \|\|
	(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
	runs-on: ubuntu-latest
	outputs:
	runner-name: ${{ steps.start-instance.outputs.label }}
	steps:
	- name: Start instance
	id: start-instance
	uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
	with:
	mode: start
	github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
	slab-url: ${{ secrets.SLAB_BASE_URL }}
	job-secret: ${{ secrets.JOB_SECRET }}
	backend: aws
	profile: multi-gpu-test

	cuda-tests-linux:
	name: CUDA multi-GPU tests
	needs: [ should-run, setup-instance ]
	if: github.event_name != 'pull_request' \|\|
	(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
	concurrency:
	group: ${{ github.workflow }}_${{ github.ref }}
	cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
	runs-on: ${{ needs.setup-instance.outputs.runner-name }}
	strategy:
	fail-fast: false
	# explicit include-based build matrix, of known valid options
	matrix:
	include:
	- os: ubuntu-22.04
	cuda: "12.2"
	gcc: 9
	env:
	CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}

	steps:
	- name: Checkout tfhe-rs
	uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332

	- name: Set up home
	run: \|
	echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"

	- name: Install latest stable
	uses: dtolnay/rust-toolchain@21dc36fb71dd22e3317045c0c31a3f4249868b17
	with:
	toolchain: stable

	- name: Export CUDA variables
	if: ${{ !cancelled() }}
	run: \|
	echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
	echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
	echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
	echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"

	# Specify the correct host compilers
	- name: Export gcc and g++ variables
	if: ${{ !cancelled() }}
	run: \|
	{
	echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
	echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
	echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
	echo "HOME=/home/ubuntu";
	} >> "${GITHUB_ENV}"

	# No need to test core_crypto and classic PBS in integer since it's already tested on single GPU.
	- name: Run multi-bit CUDA integer tests
	run: \|
	make test_integer_multi_bit_gpu_ci

	- name: Run user docs tests
	run: \|
	make test_user_doc_gpu

	- name: Test C API
	run: \|
	make test_c_api_gpu

	- name: Run High Level API Tests
	run: \|
	make test_high_level_api_gpu

	- name: Slack Notification
	if: ${{ always() }}
	continue-on-error: true
	uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
	env:
	SLACK_COLOR: ${{ job.status }}
	SLACK_MESSAGE: "CUDA AWS multi-GPU tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

	teardown-instance:
	name: Teardown instance (cuda-tests-multi-gpu)
	if: ${{ always() && needs.setup-instance.result != 'skipped' }}
	needs: [ setup-instance, cuda-tests-linux ]
	runs-on: ubuntu-latest
	steps:
	- name: Stop instance
	id: stop-instance
	uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
	with:
	mode: stop
	github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
	slab-url: ${{ secrets.SLAB_BASE_URL }}
	job-secret: ${{ secrets.JOB_SECRET }}
	label: ${{ needs.setup-instance.outputs.runner-name }}

	- name: Slack Notification
	if: ${{ failure() }}
	continue-on-error: true
	uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
	env:
	SLACK_COLOR: ${{ job.status }}
	SLACK_MESSAGE: "Instance teardown (cuda-tests-multi-gpu) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

refactor(gpu): Specify launch bounds on kernels #816

Workflow file

refactor(gpu): Specify launch bounds on kernels #816

Jobs

Run details

Workflow file for this run