in2csv: Add a --use-sheet-names option, closes #987

wireservice · Oct 18, 2023 · ca10dc6 · ca10dc6
1 parent 5691b98
commit ca10dc6
Show file tree

Hide file tree

Showing 5 changed files with 58 additions and 7 deletions.
diff --git a/AUTHORS.rst b/AUTHORS.rst
@@ -108,3 +108,4 @@ The following individuals have contributed code to csvkit:
 * Werner Robitza
 * Mark Mayo
 * Kitagawa Yasutaka
+* rachekalmir
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -11,6 +11,7 @@ Unreleased
 * :doc:`/scripts/csvstat` reports a "Most decimal places" statistic (or a :code:`maxprecision` column when :code:`--csv` is set).
 * :doc:`/scripts/csvstat` adds a :code:`--non-nulls` option to only output counts of non-null values.
 * :doc:`/scripts/csvstat` adds a :code:`--max-precision` option to only output the most decimal places.
+* :doc:`/scripts/in2csv` adds a :code:`--use-sheet-names` option to use the sheet names as file names when :code:`--write-sheets` is set.
 * feat: Add a :code:`--null-value` option to commands with the :code:`--blanks` option, to convert additional values to NULL.
 * fix: Reconfigure the encoding of standard input according to the :code:`--encoding` option, which defaults to ``utf-8-sig``. Affected users no longer need to set the ``PYTHONIOENCODING`` environment variable.
 * fix: Prompt the user if additional input is expected (i.e. if no input file or piped data is provided) in :doc:`/scripts/csvjoin`, :doc:`/scripts/csvsql` and :doc:`/scripts/csvstack`.

diff --git a/csvkit/utilities/in2csv.py b/csvkit/utilities/in2csv.py
@@ -46,6 +46,9 @@ def add_arguments(self):
         self.argparser.add_argument(
             '--write-sheets', dest='write_sheets',
             help='The names of the Excel sheets to write to files, or "-" to write all sheets.')
+        self.argparser.add_argument(
+            '--use-sheet-names', dest='use_sheet_names', action='store_true',
+            help='Use the sheet names as file names when --write-sheets is set.')
         self.argparser.add_argument(
             '--encoding-xls', dest='encoding_xls',
             help='Specify the encoding of the input XLS file.')
@@ -177,8 +180,12 @@ def main(self):
                 tables = agate.Table.from_xlsx(self.input_file, sheet=sheets, **kwargs)
 
             base = splitext(self.input_file.name)[0]
-            for i, table in enumerate(tables.values()):
-                with open('%s_%d.csv' % (base, i), 'w') as f:
+            for i, (sheet_name, table) in enumerate(tables.items()):
+                if self.args.use_sheet_names:
+                    filename = '%s_%s.csv' % (base, sheet_name)
+                else:
+                    filename = '%s_%d.csv' % (base, i)
+                with open(filename, 'w') as f:
                     table.to_csv(f, **self.writer_kwargs)
 
         self.input_file.close()

diff --git a/docs/scripts/in2csv.rst b/docs/scripts/in2csv.rst
@@ -22,11 +22,13 @@ The header line is required though the columns may be in any order:
 
    usage: in2csv [-h] [-d DELIMITER] [-t] [-q QUOTECHAR] [-u {0,1,2,3}] [-b]
                  [-p ESCAPECHAR] [-z FIELD_SIZE_LIMIT] [-e ENCODING] [-L LOCALE]
-                 [-S] [--blanks] [--date-format DATE_FORMAT]
-                 [--datetime-format DATETIME_FORMAT] [-H] [-K SKIP_LINES] [-v]
-                 [-l] [--zero] [-V] [-f FILETYPE] [-s SCHEMA] [-k KEY] [-n]
-                 [--sheet SHEET] [--write-sheets WRITE_SHEETS]
-                 [--encoding-xls ENCODING_XLS] [-y SNIFF_LIMIT] [-I]
+                 [-S] [--blanks] [--null-value NULL_VALUES [NULL_VALUES ...]]
+                 [--date-format DATE_FORMAT] [--datetime-format DATETIME_FORMAT]
+                 [-H] [-K SKIP_LINES] [-v] [-l] [--zero] [-V]
+                 [-f {csv,dbf,fixed,geojson,json,ndjson,xls,xlsx}] [-s SCHEMA]
+                 [-k KEY] [-n] [--sheet SHEET] [--write-sheets WRITE_SHEETS]
+                 [--use-sheet-names] [--encoding-xls ENCODING_XLS]
+                 [-y SNIFF_LIMIT] [-I]
                  [FILE]
 
    Convert common, but less awesome, tabular data formats to CSV.
@@ -50,6 +52,8 @@ The header line is required though the columns may be in any order:
      --write-sheets WRITE_SHEETS
                            The names of the Excel sheets to write to files, or
                            "-" to write all sheets.
+     --use-sheet-names     Use the sheet names as file names when --write-sheets
+                           is set.
      --encoding-xls ENCODING_XLS
                            Specify the encoding of the input XLS file.
      -y SNIFF_LIMIT, --snifflimit SNIFF_LIMIT

diff --git a/tests/test_utilities/test_in2csv.py b/tests/test_utilities/test_in2csv.py
@@ -257,3 +257,41 @@ def test_convert_xlsx_with_write_sheets(self):
                 path = 'examples/sheets_%d.csv' % suffix
                 if os.path.exists(path):
                     os.remove(path)
+
+    def test_convert_xls_with_write_sheets_with_names(self):
+        try:
+            self.assertConverted('xls', 'examples/sheets.xls', 'examples/testxls_converted.csv',
+                                 ['--sheet', 'data', '--write-sheets', "ʤ,1", '--write-sheet-names'])
+            with open('examples/sheets_ʤ.csv', 'r') as f:
+                with open('examples/testxls_unicode_converted.csv', 'r') as g:
+                    self.assertEqual(f.read(), g.read())
+            with open('examples/sheets_data.csv', 'r') as f:
+                with open('examples/testxls_converted.csv', 'r') as g:
+                    self.assertEqual(f.read(), g.read())
+            self.assertFalse(os.path.exists('examples/sheets_0.csv'))
+            self.assertFalse(os.path.exists('examples/sheets_1.csv'))
+            self.assertFalse(os.path.exists('examples/sheets_2.csv'))
+        finally:
+            for suffix in ('ʤ', 'data'):
+                path = 'examples/sheets_%s.csv' % suffix
+                if os.path.exists(path):
+                    os.remove(path)
+
+    def test_convert_xlsx_with_write_sheets_with_names(self):
+        try:
+            self.assertConverted('xlsx', 'examples/sheets.xlsx', 'examples/testxlsx_noinference_converted.csv',
+                                 ['--no-inference', '--sheet', 'data', '--write-sheets', "ʤ,1", '--write-sheet-names'])
+            with open('examples/sheets_ʤ.csv', 'r') as f:
+                with open('examples/testxlsx_unicode_converted.csv', 'r') as g:
+                    self.assertEqual(f.read(), g.read())
+            with open('examples/sheets_data.csv', 'r') as f:
+                with open('examples/testxlsx_noinference_converted.csv', 'r') as g:
+                    self.assertEqual(f.read(), g.read())
+            self.assertFalse(os.path.exists('examples/sheets_0.csv'))
+            self.assertFalse(os.path.exists('examples/sheets_1.csv'))
+            self.assertFalse(os.path.exists('examples/sheets_2.csv'))
+        finally:
+            for suffix in ('ʤ', 'data'):
+                path = 'examples/sheets_%s.csv' % suffix
+                if os.path.exists(path):
+                    os.remove(path)