diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a4d529951..7c0943a03 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -9,6 +9,7 @@ Unreleased * :doc:`/scripts/csvstat` adds a :code:`--non-nulls` option to only output counts of non-null values. * :doc:`/scripts/csvstat` adds a :code:`--max-precision` option to only output the most decimal places. * feat: Add a :code:`--null-value` option to commands with the :code:`--blanks` option, to convert additional values to NULL. +* fix: No longer errors if a NUL byte occurs in an input file. * Add Python 3.12 support. 1.2.0 - October 4, 2023 diff --git a/csvkit/cli.py b/csvkit/cli.py index 2cf1a5ceb..e62a066a9 100644 --- a/csvkit/cli.py +++ b/csvkit/cli.py @@ -35,10 +35,7 @@ def __init__(self, init, *args, **kwargs): self._lazy_kwargs = kwargs def __getattr__(self, name): - if not self._is_lazy_opened: - self.f = self.init(*self._lazy_args, **self._lazy_kwargs) - self._is_lazy_opened = True - + self._open() return getattr(self.f, name) def __iter__(self): @@ -51,12 +48,14 @@ def close(self): self._is_lazy_opened = False def __next__(self): + self._open() + return next(self.f).replace('\0', '') + + def _open(self): if not self._is_lazy_opened: self.f = self.init(*self._lazy_args, **self._lazy_kwargs) self._is_lazy_opened = True - return next(self.f) - class CSVKitUtility: description = '' diff --git a/examples/null_byte.csv b/examples/null_byte.csv new file mode 100644 index 000000000..c6f12e71d Binary files /dev/null and b/examples/null_byte.csv differ diff --git a/tests/test_utilities/test_csvcut.py b/tests/test_utilities/test_csvcut.py index c78e98248..92a2dcf9d 100644 --- a/tests/test_utilities/test_csvcut.py +++ b/tests/test_utilities/test_csvcut.py @@ -90,3 +90,7 @@ def test_names_with_skip_lines(self): ' 2: b', ' 3: c', ]) + + def test_null_byte(self): + # Test that csvcut doesn't error on a null byte. + self.get_output(['-C', '', 'examples/null_byte.csv'])