Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 53 additions & 36 deletions vortex-cuda/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@

#![expect(clippy::unwrap_used)]
#![expect(clippy::expect_used)]
#![expect(clippy::panic)]
#![expect(clippy::use_debug)]

use std::env;
use std::fs::File;
use std::fs;
use std::io;
use std::path::Path;
use std::path::PathBuf;
Expand All @@ -30,11 +31,10 @@ fn main() {
// Output directory for compiled .ptx files - separate by profile.
let kernels_gen = Path::new(&manifest_dir).join("kernels/gen").join(&profile);

std::fs::create_dir_all(&kernels_gen).expect("Failed to create kernels/gen directory");
fs::create_dir_all(&kernels_gen).expect("Failed to create kernels/gen directory");

// Always emit the kernels output directory path as a compile-time env var so any binary
// linking against vortex-cuda can find the PTX files. This must be set regardless
// of CUDA availability since the code using env!() is always compiled.
// Emit the kernels output directory path as a compile-time env var so any binary
// linking against vortex-cuda can find the PTX files.
// At runtime, VORTEX_CUDA_KERNELS_DIR can be set to override this path.
println!(
"cargo:rustc-env=VORTEX_CUDA_KERNELS_DIR={}",
Expand All @@ -43,7 +43,7 @@ fn main() {

println!("cargo:rerun-if-env-changed=PROFILE");

// Regenerate bit_unpack kernels only when the generator changes
// Regenerate bit_unpack kernels only when the generator changes.
println!(
"cargo:rerun-if-changed={}",
Path::new(&manifest_dir)
Expand All @@ -63,55 +63,73 @@ fn main() {
return;
}

// Watch and compile .cu and .cuh files from kernels/src to PTX in kernels/gen
if let Ok(entries) = std::fs::read_dir(&kernels_src) {
for path in entries.flatten().map(|entry| entry.path()) {
let is_generated = path
.file_name()
.and_then(|n| n.to_str())
.is_some_and(|n| n.starts_with("bit_unpack_"));
// Compile .cu files to PTX. We deliberately do NOT register .cu/.cuh files
// with rerun-if-changed so that editing a .cu file does not trigger Rust
// recompilation.
let mut cu_files = Vec::new();
let mut newest_header = std::time::SystemTime::UNIX_EPOCH;

if let Ok(entries) = fs::read_dir(&kernels_src) {
for path in entries.flatten().map(|entry| entry.path()) {
match path.extension().and_then(|e| e.to_str()) {
Some("cuh") | Some("h") => {
// Only watch hand-written .cuh/.h files, not generated ones
// (generated files are rebuilt when cuda_kernel_generator changes)
if !is_generated {
println!("cargo:rerun-if-changed={}", path.display());
if let Ok(mtime) = fs::metadata(&path).and_then(|m| m.modified()) {
newest_header = newest_header.max(mtime);
}
}
Some("cu") => {
// Only watch hand-written .cu files, not generated ones
// (generated files are rebuilt when cuda_kernel_generator changes)
if !is_generated {
println!("cargo:rerun-if-changed={}", path.display());
}
// Compile all .cu files to PTX in gen directory
nvcc_compile_ptx(&kernels_src, &kernels_gen, &path, &profile)
.map_err(|e| {
format!("Failed to compile CUDA kernel {}: {}", path.display(), e)
})
.unwrap();
cu_files.push(path);
}
_ => {}
}
}
}

// Only compile .cu files whose PTX is stale (older than the source or any header).
for cu_path in &cu_files {
let ptx_path = kernels_gen
.join(cu_path.file_name().unwrap())
.with_extension("ptx");

let cu_mtime = fs::metadata(cu_path)
.and_then(|m| m.modified())
.unwrap_or(std::time::SystemTime::UNIX_EPOCH);
let newest_input = cu_mtime.max(newest_header);

let ptx_mtime = fs::metadata(&ptx_path).and_then(|m| m.modified()).ok();
if ptx_mtime.is_some_and(|t| t >= newest_input) {
continue;
}

nvcc_compile_ptx(&kernels_src, &kernels_gen, cu_path, &profile)
.map_err(|e| format!("Failed to compile CUDA kernel {}: {}", cu_path.display(), e))
.unwrap();
}
}

fn generate_unpack<T: FastLanes>(output_dir: &Path, thread_count: usize) -> io::Result<PathBuf> {
fn generate_unpack<T: FastLanes>(output_dir: &Path, thread_count: usize) -> io::Result<()> {
// Generate the lanes header (.cuh) — device functions only, no __global__ kernels.
// This is what dynamic_dispatch.cu includes (via bit_unpack.cuh).
let cuh_path = output_dir.join(format!("bit_unpack_{}_lanes.cuh", T::T));
let mut cuh_file = File::create(&cuh_path)?;
generate_cuda_unpack_lanes::<T>(&mut cuh_file)?;
let mut cuh_buf = Vec::new();
generate_cuda_unpack_lanes::<T>(&mut cuh_buf)?;
write_if_changed(&cuh_path, &cuh_buf);

// Generate the standalone kernels (.cu) — includes the lanes header,
// adds _device template + __global__ wrappers. Compiled to its own PTX.
let cu_path = output_dir.join(format!("bit_unpack_{}.cu", T::T));
let mut cu_file = File::create(&cu_path)?;
generate_cuda_unpack_kernels::<T>(&mut cu_file, thread_count)?;
let mut cu_buf = Vec::new();
generate_cuda_unpack_kernels::<T>(&mut cu_buf, thread_count)?;
write_if_changed(&cu_path, &cu_buf);

Ok(cu_path)
Ok(())
}

fn write_if_changed(path: &Path, content: &[u8]) {
if fs::read(path).is_ok_and(|existing| existing == content) {
return;
}
fs::write(path, content).unwrap_or_else(|e| panic!("Failed to write {}: {e}", path.display()));
}

fn nvcc_compile_ptx(
Expand Down Expand Up @@ -223,14 +241,13 @@ fn generate_patches_bindings(kernels_src: &Path, out_dir: &Path) {
.derive_copy(true)
.derive_debug(true)
.generate()
.expect("Failed to generate dynamic_dispatch bindings");
.expect("Failed to generate patches bindings");

bindings
.write_to_file(out_dir.join("patches.rs"))
.expect("Failed to write patches.rs");
}

/// Check if CUDA is available based on nvcc.
fn is_cuda_available() -> bool {
Command::new("nvcc")
.arg("--version")
Expand Down
Loading