diff --git a/vortex-cuda/build.rs b/vortex-cuda/build.rs index 90e0de1d881..b8673cdda83 100644 --- a/vortex-cuda/build.rs +++ b/vortex-cuda/build.rs @@ -3,11 +3,10 @@ #![expect(clippy::unwrap_used)] #![expect(clippy::expect_used)] -#![expect(clippy::panic)] #![expect(clippy::use_debug)] use std::env; -use std::fs; +use std::fs::File; use std::io; use std::path::Path; use std::path::PathBuf; @@ -31,10 +30,11 @@ fn main() { // Output directory for compiled .ptx files - separate by profile. let kernels_gen = Path::new(&manifest_dir).join("kernels/gen").join(&profile); - fs::create_dir_all(&kernels_gen).expect("Failed to create kernels/gen directory"); + std::fs::create_dir_all(&kernels_gen).expect("Failed to create kernels/gen directory"); - // Emit the kernels output directory path as a compile-time env var so any binary - // linking against vortex-cuda can find the PTX files. + // Always emit the kernels output directory path as a compile-time env var so any binary + // linking against vortex-cuda can find the PTX files. This must be set regardless + // of CUDA availability since the code using env!() is always compiled. // At runtime, VORTEX_CUDA_KERNELS_DIR can be set to override this path. println!( "cargo:rustc-env=VORTEX_CUDA_KERNELS_DIR={}", @@ -43,7 +43,7 @@ fn main() { println!("cargo:rerun-if-env-changed=PROFILE"); - // Regenerate bit_unpack kernels only when the generator changes. + // Regenerate bit_unpack kernels only when the generator changes println!( "cargo:rerun-if-changed={}", Path::new(&manifest_dir) @@ -63,73 +63,55 @@ fn main() { return; } - // Compile .cu files to PTX. We deliberately do NOT register .cu/.cuh files - // with rerun-if-changed so that editing a .cu file does not trigger Rust - // recompilation. - let mut cu_files = Vec::new(); - let mut newest_header = std::time::SystemTime::UNIX_EPOCH; - - if let Ok(entries) = fs::read_dir(&kernels_src) { + // Watch and compile .cu and .cuh files from kernels/src to PTX in kernels/gen + if let Ok(entries) = std::fs::read_dir(&kernels_src) { for path in entries.flatten().map(|entry| entry.path()) { + let is_generated = path + .file_name() + .and_then(|n| n.to_str()) + .is_some_and(|n| n.starts_with("bit_unpack_")); + match path.extension().and_then(|e| e.to_str()) { Some("cuh") | Some("h") => { - if let Ok(mtime) = fs::metadata(&path).and_then(|m| m.modified()) { - newest_header = newest_header.max(mtime); + // Only watch hand-written .cuh/.h files, not generated ones + // (generated files are rebuilt when cuda_kernel_generator changes) + if !is_generated { + println!("cargo:rerun-if-changed={}", path.display()); } } Some("cu") => { - cu_files.push(path); + // Only watch hand-written .cu files, not generated ones + // (generated files are rebuilt when cuda_kernel_generator changes) + if !is_generated { + println!("cargo:rerun-if-changed={}", path.display()); + } + // Compile all .cu files to PTX in gen directory + nvcc_compile_ptx(&kernels_src, &kernels_gen, &path, &profile) + .map_err(|e| { + format!("Failed to compile CUDA kernel {}: {}", path.display(), e) + }) + .unwrap(); } _ => {} } } } - - // Only compile .cu files whose PTX is stale (older than the source or any header). - for cu_path in &cu_files { - let ptx_path = kernels_gen - .join(cu_path.file_name().unwrap()) - .with_extension("ptx"); - - let cu_mtime = fs::metadata(cu_path) - .and_then(|m| m.modified()) - .unwrap_or(std::time::SystemTime::UNIX_EPOCH); - let newest_input = cu_mtime.max(newest_header); - - let ptx_mtime = fs::metadata(&ptx_path).and_then(|m| m.modified()).ok(); - if ptx_mtime.is_some_and(|t| t >= newest_input) { - continue; - } - - nvcc_compile_ptx(&kernels_src, &kernels_gen, cu_path, &profile) - .map_err(|e| format!("Failed to compile CUDA kernel {}: {}", cu_path.display(), e)) - .unwrap(); - } } -fn generate_unpack(output_dir: &Path, thread_count: usize) -> io::Result<()> { +fn generate_unpack(output_dir: &Path, thread_count: usize) -> io::Result { // Generate the lanes header (.cuh) — device functions only, no __global__ kernels. // This is what dynamic_dispatch.cu includes (via bit_unpack.cuh). let cuh_path = output_dir.join(format!("bit_unpack_{}_lanes.cuh", T::T)); - let mut cuh_buf = Vec::new(); - generate_cuda_unpack_lanes::(&mut cuh_buf)?; - write_if_changed(&cuh_path, &cuh_buf); + let mut cuh_file = File::create(&cuh_path)?; + generate_cuda_unpack_lanes::(&mut cuh_file)?; // Generate the standalone kernels (.cu) — includes the lanes header, // adds _device template + __global__ wrappers. Compiled to its own PTX. let cu_path = output_dir.join(format!("bit_unpack_{}.cu", T::T)); - let mut cu_buf = Vec::new(); - generate_cuda_unpack_kernels::(&mut cu_buf, thread_count)?; - write_if_changed(&cu_path, &cu_buf); + let mut cu_file = File::create(&cu_path)?; + generate_cuda_unpack_kernels::(&mut cu_file, thread_count)?; - Ok(()) -} - -fn write_if_changed(path: &Path, content: &[u8]) { - if fs::read(path).is_ok_and(|existing| existing == content) { - return; - } - fs::write(path, content).unwrap_or_else(|e| panic!("Failed to write {}: {e}", path.display())); + Ok(cu_path) } fn nvcc_compile_ptx( @@ -241,13 +223,14 @@ fn generate_patches_bindings(kernels_src: &Path, out_dir: &Path) { .derive_copy(true) .derive_debug(true) .generate() - .expect("Failed to generate patches bindings"); + .expect("Failed to generate dynamic_dispatch bindings"); bindings .write_to_file(out_dir.join("patches.rs")) .expect("Failed to write patches.rs"); } +/// Check if CUDA is available based on nvcc. fn is_cuda_available() -> bool { Command::new("nvcc") .arg("--version")