Skip to content

Commit

Permalink
in2csv: Add a --use-sheet-names option, closes #987
Browse files Browse the repository at this point in the history
  • Loading branch information
jpmckinney committed Oct 18, 2023
1 parent 5691b98 commit ca10dc6
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 7 deletions.
1 change: 1 addition & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,4 @@ The following individuals have contributed code to csvkit:
* Werner Robitza
* Mark Mayo
* Kitagawa Yasutaka
* rachekalmir
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Unreleased
* :doc:`/scripts/csvstat` reports a "Most decimal places" statistic (or a :code:`maxprecision` column when :code:`--csv` is set).
* :doc:`/scripts/csvstat` adds a :code:`--non-nulls` option to only output counts of non-null values.
* :doc:`/scripts/csvstat` adds a :code:`--max-precision` option to only output the most decimal places.
* :doc:`/scripts/in2csv` adds a :code:`--use-sheet-names` option to use the sheet names as file names when :code:`--write-sheets` is set.
* feat: Add a :code:`--null-value` option to commands with the :code:`--blanks` option, to convert additional values to NULL.
* fix: Reconfigure the encoding of standard input according to the :code:`--encoding` option, which defaults to ``utf-8-sig``. Affected users no longer need to set the ``PYTHONIOENCODING`` environment variable.
* fix: Prompt the user if additional input is expected (i.e. if no input file or piped data is provided) in :doc:`/scripts/csvjoin`, :doc:`/scripts/csvsql` and :doc:`/scripts/csvstack`.
Expand Down
11 changes: 9 additions & 2 deletions csvkit/utilities/in2csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ def add_arguments(self):
self.argparser.add_argument(
'--write-sheets', dest='write_sheets',
help='The names of the Excel sheets to write to files, or "-" to write all sheets.')
self.argparser.add_argument(
'--use-sheet-names', dest='use_sheet_names', action='store_true',
help='Use the sheet names as file names when --write-sheets is set.')
self.argparser.add_argument(
'--encoding-xls', dest='encoding_xls',
help='Specify the encoding of the input XLS file.')
Expand Down Expand Up @@ -177,8 +180,12 @@ def main(self):
tables = agate.Table.from_xlsx(self.input_file, sheet=sheets, **kwargs)

base = splitext(self.input_file.name)[0]
for i, table in enumerate(tables.values()):
with open('%s_%d.csv' % (base, i), 'w') as f:
for i, (sheet_name, table) in enumerate(tables.items()):
if self.args.use_sheet_names:
filename = '%s_%s.csv' % (base, sheet_name)
else:
filename = '%s_%d.csv' % (base, i)
with open(filename, 'w') as f:
table.to_csv(f, **self.writer_kwargs)

self.input_file.close()
Expand Down
14 changes: 9 additions & 5 deletions docs/scripts/in2csv.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,13 @@ The header line is required though the columns may be in any order:
usage: in2csv [-h] [-d DELIMITER] [-t] [-q QUOTECHAR] [-u {0,1,2,3}] [-b]
[-p ESCAPECHAR] [-z FIELD_SIZE_LIMIT] [-e ENCODING] [-L LOCALE]
[-S] [--blanks] [--date-format DATE_FORMAT]
[--datetime-format DATETIME_FORMAT] [-H] [-K SKIP_LINES] [-v]
[-l] [--zero] [-V] [-f FILETYPE] [-s SCHEMA] [-k KEY] [-n]
[--sheet SHEET] [--write-sheets WRITE_SHEETS]
[--encoding-xls ENCODING_XLS] [-y SNIFF_LIMIT] [-I]
[-S] [--blanks] [--null-value NULL_VALUES [NULL_VALUES ...]]
[--date-format DATE_FORMAT] [--datetime-format DATETIME_FORMAT]
[-H] [-K SKIP_LINES] [-v] [-l] [--zero] [-V]
[-f {csv,dbf,fixed,geojson,json,ndjson,xls,xlsx}] [-s SCHEMA]
[-k KEY] [-n] [--sheet SHEET] [--write-sheets WRITE_SHEETS]
[--use-sheet-names] [--encoding-xls ENCODING_XLS]
[-y SNIFF_LIMIT] [-I]
[FILE]
Convert common, but less awesome, tabular data formats to CSV.
Expand All @@ -50,6 +52,8 @@ The header line is required though the columns may be in any order:
--write-sheets WRITE_SHEETS
The names of the Excel sheets to write to files, or
"-" to write all sheets.
--use-sheet-names Use the sheet names as file names when --write-sheets
is set.
--encoding-xls ENCODING_XLS
Specify the encoding of the input XLS file.
-y SNIFF_LIMIT, --snifflimit SNIFF_LIMIT
Expand Down
38 changes: 38 additions & 0 deletions tests/test_utilities/test_in2csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,3 +257,41 @@ def test_convert_xlsx_with_write_sheets(self):
path = 'examples/sheets_%d.csv' % suffix
if os.path.exists(path):
os.remove(path)

def test_convert_xls_with_write_sheets_with_names(self):
try:
self.assertConverted('xls', 'examples/sheets.xls', 'examples/testxls_converted.csv',
['--sheet', 'data', '--write-sheets', "ʤ,1", '--write-sheet-names'])
with open('examples/sheets_ʤ.csv', 'r') as f:
with open('examples/testxls_unicode_converted.csv', 'r') as g:
self.assertEqual(f.read(), g.read())
with open('examples/sheets_data.csv', 'r') as f:
with open('examples/testxls_converted.csv', 'r') as g:
self.assertEqual(f.read(), g.read())
self.assertFalse(os.path.exists('examples/sheets_0.csv'))
self.assertFalse(os.path.exists('examples/sheets_1.csv'))
self.assertFalse(os.path.exists('examples/sheets_2.csv'))
finally:
for suffix in ('ʤ', 'data'):
path = 'examples/sheets_%s.csv' % suffix
if os.path.exists(path):
os.remove(path)

def test_convert_xlsx_with_write_sheets_with_names(self):
try:
self.assertConverted('xlsx', 'examples/sheets.xlsx', 'examples/testxlsx_noinference_converted.csv',
['--no-inference', '--sheet', 'data', '--write-sheets', "ʤ,1", '--write-sheet-names'])
with open('examples/sheets_ʤ.csv', 'r') as f:
with open('examples/testxlsx_unicode_converted.csv', 'r') as g:
self.assertEqual(f.read(), g.read())
with open('examples/sheets_data.csv', 'r') as f:
with open('examples/testxlsx_noinference_converted.csv', 'r') as g:
self.assertEqual(f.read(), g.read())
self.assertFalse(os.path.exists('examples/sheets_0.csv'))
self.assertFalse(os.path.exists('examples/sheets_1.csv'))
self.assertFalse(os.path.exists('examples/sheets_2.csv'))
finally:
for suffix in ('ʤ', 'data'):
path = 'examples/sheets_%s.csv' % suffix
if os.path.exists(path):
os.remove(path)

0 comments on commit ca10dc6

Please sign in to comment.