Skip to content

Commit

Permalink
ARROW-7624: [Rust] Soundness issues via Buffer methods
Browse files Browse the repository at this point in the history
Closes apache#6397 from paddyhoran/from-raw-parts-unsafe and squashes the following commits:

963c177 <Paddy Horan> Fixes calls to unsafe functions
d02fa8b <Paddy Horan> Fixed typo.
cffcc0b <Paddy Horan> Makes `typed_data` unsafe.
c6ad47c <Paddy Horan> Updates `from_raw_parts` to be unsafe

Authored-by: Paddy Horan <paddyhoran@hotmail.com>
Signed-off-by: Paddy Horan <paddyhoran@hotmail.com>
  • Loading branch information
paddyhoran committed Feb 12, 2020
1 parent 9fecae0 commit e6eb6bd
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 29 deletions.
6 changes: 3 additions & 3 deletions rust/arrow/src/array/array.rs
Expand Up @@ -2821,7 +2821,7 @@ mod tests {
#[should_panic(expected = "memory is not aligned")]
fn test_primitive_array_alignment() {
let ptr = memory::allocate_aligned(8);
let buf = Buffer::from_raw_parts(ptr, 8);
let buf = unsafe { Buffer::from_raw_parts(ptr, 8) };
let buf2 = buf.slice(1);
let array_data = ArrayData::builder(DataType::Int32).add_buffer(buf2).build();
Int32Array::from(array_data);
Expand All @@ -2831,7 +2831,7 @@ mod tests {
#[should_panic(expected = "memory is not aligned")]
fn test_list_array_alignment() {
let ptr = memory::allocate_aligned(8);
let buf = Buffer::from_raw_parts(ptr, 8);
let buf = unsafe { Buffer::from_raw_parts(ptr, 8) };
let buf2 = buf.slice(1);

let values: [i32; 8] = [0; 8];
Expand All @@ -2851,7 +2851,7 @@ mod tests {
#[should_panic(expected = "memory is not aligned")]
fn test_binary_array_alignment() {
let ptr = memory::allocate_aligned(8);
let buf = Buffer::from_raw_parts(ptr, 8);
let buf = unsafe { Buffer::from_raw_parts(ptr, 8) };
let buf2 = buf.slice(1);

let values: [u8; 12] = [0; 12];
Expand Down
76 changes: 54 additions & 22 deletions rust/arrow/src/buffer.rs
Expand Up @@ -97,27 +97,52 @@ impl Debug for BufferData {
}

impl Buffer {
/// Creates a buffer from an existing memory region (must already be byte-aligned), and this
/// buffer will free this piece of memory when dropped.
pub fn from_raw_parts(ptr: *const u8, len: usize) -> Self {
/// Creates a buffer from an existing memory region (must already be byte-aligned), this
/// `Buffer` will free this piece of memory when dropped.
///
/// # Arguments
///
/// * `ptr` - Pointer to raw parts
/// * `len` - Length of raw parts in **bytes**
///
/// # Safety
///
/// This function is unsafe as there is no guarantee that the given pointer is valid for `len`
/// bytes.
pub unsafe fn from_raw_parts(ptr: *const u8, len: usize) -> Self {
Buffer::build_with_arguments(ptr, len, true)
}

/// Creates a buffer from an existing memory region (must already be byte-aligned), and this
/// buffers doesn't free this piece of memory when dropped.
pub fn from_unowned(ptr: *const u8, len: usize) -> Self {
/// Creates a buffer from an existing memory region (must already be byte-aligned), this
/// `Buffer` **does not** free this piece of memory when dropped.
///
/// # Arguments
///
/// * `ptr` - Pointer to raw parts
/// * `len` - Length of raw parts in **bytes**
///
/// # Safety
///
/// This function is unsafe as there is no guarantee that the given pointer is valid for `len`
/// bytes.
pub unsafe fn from_unowned(ptr: *const u8, len: usize) -> Self {
Buffer::build_with_arguments(ptr, len, false)
}

/// Creates a buffer from an existing memory region (must already be byte-aligned)
/// Creates a buffer from an existing memory region (must already be byte-aligned).
///
/// # Arguments
///
/// * `ptr` - Pointer to raw parts.
/// * `ptr` - Pointer to raw parts
/// * `len` - Length of raw parts in bytes
/// * `owned` - Whether the raw parts is owned by this buffer. If true, this buffer will free
/// this memory when dropped, otherwise it will skip freeing the raw parts.
fn build_with_arguments(ptr: *const u8, len: usize, owned: bool) -> Self {
/// * `owned` - Whether the raw parts is owned by this `Buffer`. If true, this `Buffer` will
/// free this memory when dropped, otherwise it will skip freeing the raw parts.
///
/// # Safety
///
/// This function is unsafe as there is no guarantee that the given pointer is valid for `len`
/// bytes.
unsafe fn build_with_arguments(ptr: *const u8, len: usize, owned: bool) -> Self {
assert!(
memory::is_aligned(ptr, memory::ALIGNMENT),
"memory not aligned"
Expand Down Expand Up @@ -165,20 +190,27 @@ impl Buffer {
}

/// View buffer as typed slice.
pub fn typed_data<T: ArrowNativeType + num::Num>(&self) -> &[T] {
///
/// # Safety
///
/// `ArrowNativeType` is public so that it can be used as a trait bound for other public
/// components, such as the `ToByteSlice` trait. However, this means that it can be
/// implemented by user defined types, which it is not intended for.
///
/// Also `typed_data::<bool>` is unsafe as `0x00` and `0x01` are the only valid values for
/// `bool` in Rust. However, `bool` arrays in Arrow are bit-packed which breaks this condition.
pub unsafe fn typed_data<T: ArrowNativeType + num::Num>(&self) -> &[T] {
assert_eq!(self.len() % mem::size_of::<T>(), 0);
assert!(memory::is_ptr_aligned::<T>(self.raw_data() as *const T));
unsafe {
from_raw_parts(
mem::transmute::<*const u8, *const T>(self.raw_data()),
self.len() / mem::size_of::<T>(),
)
}
from_raw_parts(
mem::transmute::<*const u8, *const T>(self.raw_data()),
self.len() / mem::size_of::<T>(),
)
}

/// Returns an empty buffer.
pub fn empty() -> Self {
Self::from_raw_parts(::std::ptr::null(), 0)
unsafe { Self::from_raw_parts(::std::ptr::null(), 0) }
}
}

Expand All @@ -202,8 +234,8 @@ impl<T: AsRef<[u8]>> From<T> for Buffer {
let buffer = memory::allocate_aligned(capacity);
unsafe {
memory::memcpy(buffer, slice.as_ptr(), len);
Buffer::from_raw_parts(buffer, len)
}
Buffer::from_raw_parts(buffer, len)
}
}

Expand Down Expand Up @@ -552,7 +584,7 @@ mod tests {

#[test]
fn test_from_raw_parts() {
let buf = Buffer::from_raw_parts(null_mut(), 0);
let buf = unsafe { Buffer::from_raw_parts(null_mut(), 0) };
assert_eq!(0, buf.len());
assert_eq!(0, buf.data().len());
assert!(buf.raw_data().is_null());
Expand Down Expand Up @@ -772,7 +804,7 @@ mod tests {
macro_rules! check_as_typed_data {
($input: expr, $native_t: ty) => {{
let buffer = Buffer::from($input.to_byte_slice());
let slice: &[$native_t] = buffer.typed_data::<$native_t>();
let slice: &[$native_t] = unsafe { buffer.typed_data::<$native_t>() };
assert_eq!($input, slice);
}};
}
Expand Down
16 changes: 12 additions & 4 deletions rust/parquet/src/arrow/array_reader.rs
Expand Up @@ -232,11 +232,15 @@ impl<T: DataType> ArrayReader for PrimitiveArrayReader<T> {
}

fn get_def_levels(&self) -> Option<&[i16]> {
self.def_levels_buffer.as_ref().map(|buf| buf.typed_data())
self.def_levels_buffer
.as_ref()
.map(|buf| unsafe { buf.typed_data() })
}

fn get_rep_levels(&self) -> Option<&[i16]> {
self.rep_levels_buffer.as_ref().map(|buf| buf.typed_data())
self.rep_levels_buffer
.as_ref()
.map(|buf| unsafe { buf.typed_data() })
}
}

Expand Down Expand Up @@ -579,11 +583,15 @@ impl ArrayReader for StructArrayReader {
}

fn get_def_levels(&self) -> Option<&[i16]> {
self.def_level_buffer.as_ref().map(|buf| buf.typed_data())
self.def_level_buffer
.as_ref()
.map(|buf| unsafe { buf.typed_data() })
}

fn get_rep_levels(&self) -> Option<&[i16]> {
self.rep_level_buffer.as_ref().map(|buf| buf.typed_data())
self.rep_level_buffer
.as_ref()
.map(|buf| unsafe { buf.typed_data() })
}
}

Expand Down

0 comments on commit e6eb6bd

Please sign in to comment.