diff --git a/compilers/concrete-compiler/compiler/include/concretelang/Runtime/GPUDFG.hpp b/compilers/concrete-compiler/compiler/include/concretelang/Runtime/GPUDFG.hpp new file mode 100644 index 000000000..db51c860a --- /dev/null +++ b/compilers/concrete-compiler/compiler/include/concretelang/Runtime/GPUDFG.hpp @@ -0,0 +1,28 @@ +// Part of the Concrete Compiler Project, under the BSD3 License with Zama +// Exceptions. See +// https://github.com/zama-ai/concrete/blob/main/LICENSE.txt +// for license information. + +#ifndef CONCRETELANG_GPUDFG_HPP +#define CONCRETELANG_GPUDFG_HPP + +#ifdef CONCRETELANG_CUDA_SUPPORT +#include "bootstrap.h" +#include "device.h" +#include "keyswitch.h" +#include "linear_algebra.h" + +#endif + +namespace mlir { +namespace concretelang { +namespace gpu_dfg { + +bool check_cuda_device_available(); +bool check_cuda_runtime_enabled(); + +} // namespace gpu_dfg +} // namespace concretelang +} // namespace mlir + +#endif diff --git a/compilers/concrete-compiler/compiler/include/concretelang/Support/CompilerEngine.h b/compilers/concrete-compiler/compiler/include/concretelang/Support/CompilerEngine.h index ae56ea941..a9fbfcc97 100644 --- a/compilers/concrete-compiler/compiler/include/concretelang/Support/CompilerEngine.h +++ b/compilers/concrete-compiler/compiler/include/concretelang/Support/CompilerEngine.h @@ -26,8 +26,6 @@ using concretelang::protocol::Message; namespace mlir { namespace concretelang { -bool getEmitGPUOption(); - /// Compilation context that acts as the root owner of LLVM and MLIR /// data structures directly and indirectly referenced by artefacts /// produced by the `CompilerEngine`. diff --git a/compilers/concrete-compiler/compiler/lib/Bindings/Python/CompilerAPIModule.cpp b/compilers/concrete-compiler/compiler/lib/Bindings/Python/CompilerAPIModule.cpp index 6d226364b..554ac8b58 100644 --- a/compilers/concrete-compiler/compiler/lib/Bindings/Python/CompilerAPIModule.cpp +++ b/compilers/concrete-compiler/compiler/lib/Bindings/Python/CompilerAPIModule.cpp @@ -12,6 +12,7 @@ #include "concretelang/Common/Keysets.h" #include "concretelang/Dialect/FHE/IR/FHEOpsDialect.h.inc" #include "concretelang/Runtime/DFRuntime.hpp" +#include "concretelang/Runtime/GPUDFG.hpp" #include "concretelang/ServerLib/ServerLib.h" #include "concretelang/Support/logging.h" #include @@ -462,6 +463,14 @@ void initDataflowParallelization() { mlir::concretelang::dfr::_dfr_set_required(true); } +bool checkGPURuntimeEnabled() { + return mlir::concretelang::gpu_dfg::check_cuda_runtime_enabled(); +} + +bool checkCudaDeviceAvailable() { + return mlir::concretelang::gpu_dfg::check_cuda_device_available(); +} + std::string roundTrip(const char *module) { std::shared_ptr ccx = mlir::concretelang::CompilationContext::createShared(); @@ -673,6 +682,8 @@ void mlir::concretelang::python::populateCompilerAPISubmodule( m.def("terminate_df_parallelization", &terminateDataflowParallelization); m.def("init_df_parallelization", &initDataflowParallelization); + m.def("check_gpu_runtime_enabled", &checkGPURuntimeEnabled); + m.def("check_cuda_device_available", &checkCudaDeviceAvailable); pybind11::enum_(m, "Backend") .value("CPU", mlir::concretelang::Backend::CPU) diff --git a/compilers/concrete-compiler/compiler/lib/Bindings/Python/concrete/compiler/__init__.py b/compilers/concrete-compiler/compiler/lib/Bindings/Python/concrete/compiler/__init__.py index d122706dc..fbd4d631d 100644 --- a/compilers/concrete-compiler/compiler/lib/Bindings/Python/concrete/compiler/__init__.py +++ b/compilers/concrete-compiler/compiler/lib/Bindings/Python/concrete/compiler/__init__.py @@ -8,6 +8,8 @@ from mlir._mlir_libs._concretelang._compiler import ( terminate_df_parallelization as _terminate_df_parallelization, init_df_parallelization as _init_df_parallelization, + check_gpu_runtime_enabled as _check_gpu_runtime_enabled, + check_cuda_device_available as _check_cuda_device_available, ) from mlir._mlir_libs._concretelang._compiler import round_trip as _round_trip from mlir._mlir_libs._concretelang._compiler import ( @@ -49,6 +51,18 @@ def init_dfr(): _init_df_parallelization() +def check_gpu_enabled() -> bool: + """Check whether the compiler and runtime support GPU offloading. + + GPU offloading is not always available, in particular in non-GPU wheels.""" + return _check_gpu_runtime_enabled() + + +def check_gpu_available() -> bool: + """Check whether a CUDA device is available and online.""" + return _check_cuda_device_available() + + # Cleanly terminate the dataflow runtime if it has been initialized # (does nothing otherwise) atexit.register(_terminate_df_parallelization) diff --git a/compilers/concrete-compiler/compiler/lib/Runtime/CMakeLists.txt b/compilers/concrete-compiler/compiler/lib/Runtime/CMakeLists.txt index 13d477500..2b2b7e429 100644 --- a/compilers/concrete-compiler/compiler/lib/Runtime/CMakeLists.txt +++ b/compilers/concrete-compiler/compiler/lib/Runtime/CMakeLists.txt @@ -6,7 +6,7 @@ if(CONCRETELANG_CUDA_SUPPORT) target_link_libraries(ConcretelangRuntime PRIVATE hwloc) else() add_library(ConcretelangRuntime SHARED context.cpp simulation.cpp wrappers.cpp DFRuntime.cpp key_manager.cpp - StreamEmulator.cpp) + GPUDFG.cpp) endif() add_dependencies(ConcretelangRuntime concrete_cpu concrete_cpu_noise_model concrete-protocol) diff --git a/compilers/concrete-compiler/compiler/lib/Runtime/GPUDFG.cpp b/compilers/concrete-compiler/compiler/lib/Runtime/GPUDFG.cpp index 3cb3c078c..fbd4e8602 100644 --- a/compilers/concrete-compiler/compiler/lib/Runtime/GPUDFG.cpp +++ b/compilers/concrete-compiler/compiler/lib/Runtime/GPUDFG.cpp @@ -3,6 +3,7 @@ // https://github.com/zama-ai/concrete/blob/main/LICENSE.txt // for license information. +#ifdef CONCRETELANG_CUDA_SUPPORT #include #include #include @@ -18,15 +19,10 @@ #include #include +#include #include #include -#ifdef CONCRETELANG_CUDA_SUPPORT -#include "bootstrap.h" -#include "device.h" -#include "keyswitch.h" -#include "linear_algebra.h" - using RuntimeContext = mlir::concretelang::RuntimeContext; namespace mlir { @@ -1652,3 +1648,30 @@ void *stream_emulator_init() { void stream_emulator_run(void *dfg) {} void stream_emulator_delete(void *dfg) { delete (GPU_DFG *)dfg; } #endif + +namespace mlir { +namespace concretelang { +namespace gpu_dfg { + +bool check_cuda_device_available() { +#ifdef CONCRETELANG_CUDA_SUPPORT + int num; + if (cudaGetDeviceCount(&num) != cudaSuccess) + return false; + return num > 0; +#else + return false; +#endif +} + +bool check_cuda_runtime_enabled() { +#ifdef CONCRETELANG_CUDA_SUPPORT + return true; +#else + return false; +#endif +} + +} // namespace gpu_dfg +} // namespace concretelang +} // namespace mlir diff --git a/compilers/concrete-compiler/compiler/lib/Runtime/context.cpp b/compilers/concrete-compiler/compiler/lib/Runtime/context.cpp index 9d0df8d0d..8b52a07b8 100644 --- a/compilers/concrete-compiler/compiler/lib/Runtime/context.cpp +++ b/compilers/concrete-compiler/compiler/lib/Runtime/context.cpp @@ -41,14 +41,15 @@ RuntimeContext::RuntimeContext(ServerKeyset serverKeyset) } #ifdef CONCRETELANG_CUDA_SUPPORT - assert(cudaGetDeviceCount(&num_devices) == cudaSuccess); - bsk_gpu.resize(num_devices); - ksk_gpu.resize(num_devices); - for (int i = 0; i < num_devices; ++i) { - bsk_gpu[i].resize(serverKeyset.lweBootstrapKeys.size(), nullptr); - ksk_gpu[i].resize(serverKeyset.lweKeyswitchKeys.size(), nullptr); - bsk_gpu_mutex.push_back(std::make_unique()); - ksk_gpu_mutex.push_back(std::make_unique()); + if (cudaGetDeviceCount(&num_devices) == cudaSuccess) { + bsk_gpu.resize(num_devices); + ksk_gpu.resize(num_devices); + for (int i = 0; i < num_devices; ++i) { + bsk_gpu[i].resize(serverKeyset.lweBootstrapKeys.size(), nullptr); + ksk_gpu[i].resize(serverKeyset.lweKeyswitchKeys.size(), nullptr); + bsk_gpu_mutex.push_back(std::make_unique()); + ksk_gpu_mutex.push_back(std::make_unique()); + } } #endif } diff --git a/compilers/concrete-compiler/compiler/lib/Support/CompilerEngine.cpp b/compilers/concrete-compiler/compiler/lib/Support/CompilerEngine.cpp index 3260d920f..844f15ed3 100644 --- a/compilers/concrete-compiler/compiler/lib/Support/CompilerEngine.cpp +++ b/compilers/concrete-compiler/compiler/lib/Support/CompilerEngine.cpp @@ -63,14 +63,10 @@ #include "concretelang/Support/LLVMEmitFile.h" #include "concretelang/Support/Pipeline.h" #include "concretelang/Support/Utils.h" +#include namespace mlir { namespace concretelang { -// TODO: should be removed when bufferization is not related to CAPI lowering -// Control whether we should call a cpu of gpu function when lowering -// to CAPI -static bool EMIT_GPU_OPS; -bool getEmitGPUOption() { return EMIT_GPU_OPS; } /// Creates a new compilation context that can be shared across /// compilation engines and results @@ -297,9 +293,6 @@ CompilerEngine::compile(mlir::ModuleOp moduleOp, Target target, mlir::MLIRContext &mlirContext = *this->compilationContext->getMLIRContext(); - // enable/disable usage of gpu functions during bufferization - EMIT_GPU_OPS = options.emitGPUOps; - auto dataflowParallelize = options.autoParallelize || options.dataflowParallelize; auto loopParallelize = options.autoParallelize || options.loopParallelize; @@ -310,6 +303,45 @@ CompilerEngine::compile(mlir::ModuleOp moduleOp, Target target, if (dataflowParallelize) mlir::concretelang::dfr::_dfr_set_required(true); + // Sanity checks for enabling GPU usage: the compiler must have been + // compiled with Cuda support (especially important when building + // python wheels), and at least one device must be available to + // execute on. + if (options.emitGPUOps) { + // If this compiler is not compiled using Cuda support, then + // requesting GPU is forbidden - instead of a hard error, issue a + // warning and disable the GPU option. + if (!mlir::concretelang::gpu_dfg::check_cuda_runtime_enabled()) { + // Allow compilation to complete if only code generation is expected. + if (target != Target::LIBRARY) { + warnx("This instance of the Concrete compiler does not support GPU " + "acceleration." + " Allowing code generation to proceed, but execution will not be " + "possible."); + } else { + warnx("This instance of the Concrete compiler does not support GPU " + "acceleration." + " If you are using Concrete-Python, it means that the module " + "installed is not GPU enabled.\n" + "Continuing without GPU acceleration."); + options.emitGPUOps = false; + options.emitSDFGOps = false; + options.batchTFHEOps = false; + } + } else { + // Ensure that at least one Cuda device is available if GPU option + // is used + if (!mlir::concretelang::gpu_dfg::check_cuda_device_available()) { + warnx("No Cuda device available on this system (either not present or " + "the driver is not online).\n" + "Continuing without GPU acceleration."); + options.emitGPUOps = false; + options.emitSDFGOps = false; + options.batchTFHEOps = false; + } + } + } + mlir::OwningOpRef mlirModuleRef(moduleOp); res.mlirModuleRef = std::move(mlirModuleRef); mlir::ModuleOp module = res.mlirModuleRef->get();