Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sanity checks when GPU is requested #903

Merged
merged 2 commits into from
Jun 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Part of the Concrete Compiler Project, under the BSD3 License with Zama
// Exceptions. See
// https://github.com/zama-ai/concrete/blob/main/LICENSE.txt
// for license information.

#ifndef CONCRETELANG_GPUDFG_HPP
#define CONCRETELANG_GPUDFG_HPP

#ifdef CONCRETELANG_CUDA_SUPPORT
#include "bootstrap.h"
#include "device.h"
#include "keyswitch.h"
#include "linear_algebra.h"

#endif

namespace mlir {
namespace concretelang {
namespace gpu_dfg {

bool check_cuda_device_available();
bool check_cuda_runtime_enabled();

} // namespace gpu_dfg
} // namespace concretelang
} // namespace mlir

#endif
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ using concretelang::protocol::Message;
namespace mlir {
namespace concretelang {

bool getEmitGPUOption();

/// Compilation context that acts as the root owner of LLVM and MLIR
/// data structures directly and indirectly referenced by artefacts
/// produced by the `CompilerEngine`.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "concretelang/Common/Keysets.h"
#include "concretelang/Dialect/FHE/IR/FHEOpsDialect.h.inc"
#include "concretelang/Runtime/DFRuntime.hpp"
#include "concretelang/Runtime/GPUDFG.hpp"
#include "concretelang/ServerLib/ServerLib.h"
#include "concretelang/Support/logging.h"
#include <llvm/Support/Debug.h>
Expand Down Expand Up @@ -462,6 +463,14 @@ void initDataflowParallelization() {
mlir::concretelang::dfr::_dfr_set_required(true);
}

bool checkGPURuntimeEnabled() {
return mlir::concretelang::gpu_dfg::check_cuda_runtime_enabled();
}

bool checkCudaDeviceAvailable() {
return mlir::concretelang::gpu_dfg::check_cuda_device_available();
}

std::string roundTrip(const char *module) {
std::shared_ptr<mlir::concretelang::CompilationContext> ccx =
mlir::concretelang::CompilationContext::createShared();
Expand Down Expand Up @@ -673,6 +682,8 @@ void mlir::concretelang::python::populateCompilerAPISubmodule(
m.def("terminate_df_parallelization", &terminateDataflowParallelization);

m.def("init_df_parallelization", &initDataflowParallelization);
m.def("check_gpu_runtime_enabled", &checkGPURuntimeEnabled);
m.def("check_cuda_device_available", &checkCudaDeviceAvailable);

pybind11::enum_<mlir::concretelang::Backend>(m, "Backend")
.value("CPU", mlir::concretelang::Backend::CPU)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from mlir._mlir_libs._concretelang._compiler import (
terminate_df_parallelization as _terminate_df_parallelization,
init_df_parallelization as _init_df_parallelization,
check_gpu_runtime_enabled as _check_gpu_runtime_enabled,
check_cuda_device_available as _check_cuda_device_available,
)
from mlir._mlir_libs._concretelang._compiler import round_trip as _round_trip
from mlir._mlir_libs._concretelang._compiler import (
Expand Down Expand Up @@ -49,6 +51,18 @@ def init_dfr():
_init_df_parallelization()


def check_gpu_enabled() -> bool:
"""Check whether the compiler and runtime support GPU offloading.

GPU offloading is not always available, in particular in non-GPU wheels."""
return _check_gpu_runtime_enabled()


def check_gpu_available() -> bool:
"""Check whether a CUDA device is available and online."""
return _check_cuda_device_available()


# Cleanly terminate the dataflow runtime if it has been initialized
# (does nothing otherwise)
atexit.register(_terminate_df_parallelization)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ if(CONCRETELANG_CUDA_SUPPORT)
target_link_libraries(ConcretelangRuntime PRIVATE hwloc)
else()
add_library(ConcretelangRuntime SHARED context.cpp simulation.cpp wrappers.cpp DFRuntime.cpp key_manager.cpp
StreamEmulator.cpp)
GPUDFG.cpp)
endif()

add_dependencies(ConcretelangRuntime concrete_cpu concrete_cpu_noise_model concrete-protocol)
Expand Down
35 changes: 29 additions & 6 deletions compilers/concrete-compiler/compiler/lib/Runtime/GPUDFG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// https://github.com/zama-ai/concrete/blob/main/LICENSE.txt
// for license information.

#ifdef CONCRETELANG_CUDA_SUPPORT
#include <atomic>
#include <cmath>
#include <cstdarg>
Expand All @@ -18,15 +19,10 @@
#include <utility>
#include <vector>

#include <concretelang/Runtime/GPUDFG.hpp>
#include <concretelang/Runtime/stream_emulator_api.h>
#include <concretelang/Runtime/wrappers.h>

#ifdef CONCRETELANG_CUDA_SUPPORT
#include "bootstrap.h"
#include "device.h"
#include "keyswitch.h"
#include "linear_algebra.h"

using RuntimeContext = mlir::concretelang::RuntimeContext;

namespace mlir {
Expand Down Expand Up @@ -1652,3 +1648,30 @@ void *stream_emulator_init() {
void stream_emulator_run(void *dfg) {}
void stream_emulator_delete(void *dfg) { delete (GPU_DFG *)dfg; }
#endif

namespace mlir {
namespace concretelang {
namespace gpu_dfg {

bool check_cuda_device_available() {
#ifdef CONCRETELANG_CUDA_SUPPORT
int num;
if (cudaGetDeviceCount(&num) != cudaSuccess)
return false;
return num > 0;
#else
return false;
#endif
}

bool check_cuda_runtime_enabled() {
#ifdef CONCRETELANG_CUDA_SUPPORT
return true;
#else
return false;
#endif
}

} // namespace gpu_dfg
} // namespace concretelang
} // namespace mlir
17 changes: 9 additions & 8 deletions compilers/concrete-compiler/compiler/lib/Runtime/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,15 @@ RuntimeContext::RuntimeContext(ServerKeyset serverKeyset)
}

#ifdef CONCRETELANG_CUDA_SUPPORT
assert(cudaGetDeviceCount(&num_devices) == cudaSuccess);
bsk_gpu.resize(num_devices);
ksk_gpu.resize(num_devices);
for (int i = 0; i < num_devices; ++i) {
bsk_gpu[i].resize(serverKeyset.lweBootstrapKeys.size(), nullptr);
ksk_gpu[i].resize(serverKeyset.lweKeyswitchKeys.size(), nullptr);
bsk_gpu_mutex.push_back(std::make_unique<std::mutex>());
ksk_gpu_mutex.push_back(std::make_unique<std::mutex>());
if (cudaGetDeviceCount(&num_devices) == cudaSuccess) {
bsk_gpu.resize(num_devices);
ksk_gpu.resize(num_devices);
for (int i = 0; i < num_devices; ++i) {
bsk_gpu[i].resize(serverKeyset.lweBootstrapKeys.size(), nullptr);
ksk_gpu[i].resize(serverKeyset.lweKeyswitchKeys.size(), nullptr);
bsk_gpu_mutex.push_back(std::make_unique<std::mutex>());
ksk_gpu_mutex.push_back(std::make_unique<std::mutex>());
}
}
#endif
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,14 +63,10 @@
#include "concretelang/Support/LLVMEmitFile.h"
#include "concretelang/Support/Pipeline.h"
#include "concretelang/Support/Utils.h"
#include <concretelang/Runtime/GPUDFG.hpp>

namespace mlir {
namespace concretelang {
// TODO: should be removed when bufferization is not related to CAPI lowering
// Control whether we should call a cpu of gpu function when lowering
// to CAPI
static bool EMIT_GPU_OPS;
bool getEmitGPUOption() { return EMIT_GPU_OPS; }

/// Creates a new compilation context that can be shared across
/// compilation engines and results
Expand Down Expand Up @@ -297,9 +293,6 @@ CompilerEngine::compile(mlir::ModuleOp moduleOp, Target target,

mlir::MLIRContext &mlirContext = *this->compilationContext->getMLIRContext();

// enable/disable usage of gpu functions during bufferization
EMIT_GPU_OPS = options.emitGPUOps;

auto dataflowParallelize =
options.autoParallelize || options.dataflowParallelize;
auto loopParallelize = options.autoParallelize || options.loopParallelize;
Expand All @@ -310,6 +303,45 @@ CompilerEngine::compile(mlir::ModuleOp moduleOp, Target target,
if (dataflowParallelize)
mlir::concretelang::dfr::_dfr_set_required(true);

// Sanity checks for enabling GPU usage: the compiler must have been
// compiled with Cuda support (especially important when building
// python wheels), and at least one device must be available to
// execute on.
if (options.emitGPUOps) {
// If this compiler is not compiled using Cuda support, then
// requesting GPU is forbidden - instead of a hard error, issue a
// warning and disable the GPU option.
if (!mlir::concretelang::gpu_dfg::check_cuda_runtime_enabled()) {
// Allow compilation to complete if only code generation is expected.
if (target != Target::LIBRARY) {
warnx("This instance of the Concrete compiler does not support GPU "
"acceleration."
" Allowing code generation to proceed, but execution will not be "
"possible.");
} else {
warnx("This instance of the Concrete compiler does not support GPU "
"acceleration."
" If you are using Concrete-Python, it means that the module "
"installed is not GPU enabled.\n"
"Continuing without GPU acceleration.");
options.emitGPUOps = false;
options.emitSDFGOps = false;
options.batchTFHEOps = false;
}
} else {
// Ensure that at least one Cuda device is available if GPU option
// is used
if (!mlir::concretelang::gpu_dfg::check_cuda_device_available()) {
warnx("No Cuda device available on this system (either not present or "
"the driver is not online).\n"
"Continuing without GPU acceleration.");
options.emitGPUOps = false;
options.emitSDFGOps = false;
options.batchTFHEOps = false;
}
}
}

mlir::OwningOpRef<mlir::ModuleOp> mlirModuleRef(moduleOp);
res.mlirModuleRef = std::move(mlirModuleRef);
mlir::ModuleOp module = res.mlirModuleRef->get();
Expand Down
Loading