From 94fe016af5de016ca4d0fc7882df940e7dd49d9b Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Fri, 1 Apr 2016 13:33:08 -0700 Subject: [PATCH] Fix BufferBuilder edge case where all inputs were length-0 --- cpp/src/feather/buffer.h | 13 ++++++++++--- cpp/src/feather/tests/io-test.cc | 13 +++++++++++++ python/feather/tests/test_reader.py | 4 ++++ 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/cpp/src/feather/buffer.h b/cpp/src/feather/buffer.h index 783121a..76af78a 100644 --- a/cpp/src/feather/buffer.h +++ b/cpp/src/feather/buffer.h @@ -129,13 +129,20 @@ class BufferBuilder { RETURN_NOT_OK(buffer_->Resize(capacity_)); data_ = buffer_->mutable_data(); } - memcpy(data_ + size_, data, length); - size_ += length; + if (length > 0) { + memcpy(data_ + size_, data, length); + size_ += length; + } return Status::OK(); } std::shared_ptr Finish() { - auto result = buffer_; + std::shared_ptr result; + if (data_ == nullptr) { + result = std::make_shared(nullptr, 0); + } else { + result = buffer_; + } buffer_ = nullptr; return result; } diff --git a/cpp/src/feather/tests/io-test.cc b/cpp/src/feather/tests/io-test.cc index b119a56..2ad747e 100644 --- a/cpp/src/feather/tests/io-test.cc +++ b/cpp/src/feather/tests/io-test.cc @@ -73,4 +73,17 @@ TEST(FileOutputStream, NonExistentDirectory) { ASSERT_TRUE(s.IsIOError()); } +TEST(BufferBuilder, EmptyStrings) { + BufferBuilder builder; + + builder.Append(nullptr, 0); + builder.Append(nullptr, 0); + builder.Append(nullptr, 0); + + std::shared_ptr result = builder.Finish(); + + ASSERT_EQ(nullptr, result->data()); + ASSERT_EQ(0, result->size()); +} + } // namespace feather diff --git a/python/feather/tests/test_reader.py b/python/feather/tests/test_reader.py index 0d0eb11..7ec585d 100644 --- a/python/feather/tests/test_reader.py +++ b/python/feather/tests/test_reader.py @@ -204,6 +204,10 @@ def test_strings(self): expected = pd.DataFrame({'strings': values * repeats}) self._check_pandas_roundtrip(df, expected) + def test_empty_strings(self): + df = pd.DataFrame({'strings': [''] * 10}) + self._check_pandas_roundtrip(df) + def test_nan_as_null(self): # Create a nan that is not numpy.nan values = np.array(['foo', np.nan, np.nan * 2, 'bar'] * 10)