From 4457f6d276779fa1a39f662730c91cdcf5091419 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Tue, 17 Oct 2023 17:22:30 -0400 Subject: [PATCH] fix: No longer error if a NUL byte occurs in an input file, closes #927 This affects the performance of all reads of input files, but the penalty doesn't seem consequential --- CHANGELOG.rst | 1 + csvkit/cli.py | 11 +++++------ examples/null_byte.csv | Bin 0 -> 12 bytes tests/test_utilities/test_csvcut.py | 4 ++++ 4 files changed, 10 insertions(+), 6 deletions(-) create mode 100644 examples/null_byte.csv diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a4d529951..7c0943a03 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -9,6 +9,7 @@ Unreleased * :doc:`/scripts/csvstat` adds a :code:`--non-nulls` option to only output counts of non-null values. * :doc:`/scripts/csvstat` adds a :code:`--max-precision` option to only output the most decimal places. * feat: Add a :code:`--null-value` option to commands with the :code:`--blanks` option, to convert additional values to NULL. +* fix: No longer errors if a NUL byte occurs in an input file. * Add Python 3.12 support. 1.2.0 - October 4, 2023 diff --git a/csvkit/cli.py b/csvkit/cli.py index 2cf1a5ceb..e62a066a9 100644 --- a/csvkit/cli.py +++ b/csvkit/cli.py @@ -35,10 +35,7 @@ def __init__(self, init, *args, **kwargs): self._lazy_kwargs = kwargs def __getattr__(self, name): - if not self._is_lazy_opened: - self.f = self.init(*self._lazy_args, **self._lazy_kwargs) - self._is_lazy_opened = True - + self._open() return getattr(self.f, name) def __iter__(self): @@ -51,12 +48,14 @@ def close(self): self._is_lazy_opened = False def __next__(self): + self._open() + return next(self.f).replace('\0', '') + + def _open(self): if not self._is_lazy_opened: self.f = self.init(*self._lazy_args, **self._lazy_kwargs) self._is_lazy_opened = True - return next(self.f) - class CSVKitUtility: description = '' diff --git a/examples/null_byte.csv b/examples/null_byte.csv new file mode 100644 index 0000000000000000000000000000000000000000..c6f12e71d5c45b540a0ceefe05a2cc5151de3a21 GIT binary patch literal 12 TcmYe)NzzH?V$d