From 7cd31a23e3e677427c8d5146f60165b138192453 Mon Sep 17 00:00:00 2001 From: James McKinney Date: Wed, 5 Sep 2012 12:13:49 -0400 Subject: [PATCH 1/5] add global --no-header-row flag --- csvkit/cli.py | 3 +++ docs/scripts/common_arguments.rst | 1 + docs/scripts/csvjson.rst | 2 +- docs/scripts/csvpy.rst | 2 +- docs/scripts/csvsql.rst | 2 +- 5 files changed, 7 insertions(+), 3 deletions(-) diff --git a/csvkit/cli.py b/csvkit/cli.py index 159d5a67d..d5fefcc59 100644 --- a/csvkit/cli.py +++ b/csvkit/cli.py @@ -177,6 +177,9 @@ def _init_common_parser(self): if 'e' not in self.override_flags: self.argparser.add_argument('-e', '--encoding', dest='encoding', default='utf-8', help='Specify the encoding the input CSV file.') + if 'H' not in self.override_flags: + self.argparser.add_argument('-H', '--no-header-row', dest='no_header_row', action='store_true', + help='Specifies that the input CSV file has no header row.') if 'v' not in self.override_flags: self.argparser.add_argument('-v', '--verbose', dest='verbose', action='store_true', help='Print detailed tracebacks when errors occur.') diff --git a/docs/scripts/common_arguments.rst b/docs/scripts/common_arguments.rst index fad49d281..2d91dd5e9 100644 --- a/docs/scripts/common_arguments.rst +++ b/docs/scripts/common_arguments.rst @@ -26,6 +26,7 @@ All utilities which accept CSV as input share a set of common command-line argum -z MAXFIELDSIZE, --maxfieldsize MAXFIELDSIZE Maximum length of a single field in the input CSV file. + -H, --no-header-row Specifies that the input CSV file has no header row. -e ENCODING, --encoding ENCODING -v, --verbose Print detailed tracebacks when errors occur. Specify the encoding the input file. diff --git a/docs/scripts/csvjson.rst b/docs/scripts/csvjson.rst index 3683461a7..3f07e6aad 100644 --- a/docs/scripts/csvjson.rst +++ b/docs/scripts/csvjson.rst @@ -8,7 +8,7 @@ Description Converts a CSV file into JSON or GeoJSON (depending on flags):: usage: csvjson [-h] [-d DELIMITER] [-t] [-q QUOTECHAR] [-u {0,1,2,3}] [-b] - [-p ESCAPECHAR] [-z MAXFIELDSIZE] [-e ENCODING] [-v] [-l] + [-p ESCAPECHAR] [-z MAXFIELDSIZE] [-e ENCODING] [-H] [-v] [-l] [--zero] [-i INDENT] [-k KEY] [--lat LAT] [--lon LON] [--crs CRS] [FILE] diff --git a/docs/scripts/csvpy.rst b/docs/scripts/csvpy.rst index 1da320a96..f994e5964 100644 --- a/docs/scripts/csvpy.rst +++ b/docs/scripts/csvpy.rst @@ -8,7 +8,7 @@ Description Loads a CSV file into a :class:`csvkit.CSVKitReader` object and then drops into a Python shell so the user can inspect the data however they see fit:: usage: csvpy [-h] [-d DELIMITER] [-t] [-q QUOTECHAR] [-u {0,1,2,3}] [-b] - [-p ESCAPECHAR] [-z MAXFIELDSIZE] [-e ENCODING] [-v] + [-p ESCAPECHAR] [-z MAXFIELDSIZE] [-e ENCODING] [-H] [-v] FILE Load a CSV file into a CSVKitReader object and then drops into a Python shell. diff --git a/docs/scripts/csvsql.rst b/docs/scripts/csvsql.rst index f90064a6f..9f071aab0 100644 --- a/docs/scripts/csvsql.rst +++ b/docs/scripts/csvsql.rst @@ -8,7 +8,7 @@ Description Generate SQL statements for a CSV file or create execute those statements directly on a database. In the latter case supports both creating tables and inserting data.:: usage: csvsql [-h] [-d DELIMITER] [-t] [-q QUOTECHAR] [-u {0,1,2,3}] [-b] - [-p ESCAPECHAR] [-z MAXFIELDSIZE] [-e ENCODING] [-v] + [-p ESCAPECHAR] [-z MAXFIELDSIZE] [-e ENCODING] [-H] [-v] [-y SNIFFLIMIT] [-i {access,sybase,sqlite,informix,firebird,mysql,oracle,maxdb,postgresql,mssql}] [--db CONNECTION_STRING] [--insert] From 9cbcd32eaa9f370669e602a1c8daa32d03fea347 Mon Sep 17 00:00:00 2001 From: James McKinney Date: Wed, 5 Sep 2012 12:13:52 -0400 Subject: [PATCH 2/5] csvstack respects --no-header-row --- csvkit/utilities/csvstack.py | 13 +++++++------ examples/no_header_row.csv | 1 + examples/no_header_row2.csv | 1 + tests/test_utilities/test_csvstack.py | 14 ++++++++++++++ 4 files changed, 23 insertions(+), 6 deletions(-) create mode 100644 examples/no_header_row.csv create mode 100644 examples/no_header_row2.csv diff --git a/csvkit/utilities/csvstack.py b/csvkit/utilities/csvstack.py index 109067d70..7fa163a80 100644 --- a/csvkit/utilities/csvstack.py +++ b/csvkit/utilities/csvstack.py @@ -38,13 +38,14 @@ def main(self): for i, f in enumerate(self.args.files): rows = CSVKitReader(f, **self.reader_kwargs) - headers = rows.next() + if not self.args.no_header_row: + headers = rows.next() - if i == 0: - if groups: - headers.insert(0, group_name) - - output.writerow(headers) + if i == 0: + if groups: + headers.insert(0, group_name) + + output.writerow(headers) for row in rows: if groups: diff --git a/examples/no_header_row.csv b/examples/no_header_row.csv new file mode 100644 index 000000000..b0246d596 --- /dev/null +++ b/examples/no_header_row.csv @@ -0,0 +1 @@ +1,2,3 diff --git a/examples/no_header_row2.csv b/examples/no_header_row2.csv new file mode 100644 index 000000000..6400ac846 --- /dev/null +++ b/examples/no_header_row2.csv @@ -0,0 +1 @@ +4,5,6 diff --git a/tests/test_utilities/test_csvstack.py b/tests/test_utilities/test_csvstack.py index 67e30d2ac..49d51e2b5 100644 --- a/tests/test_utilities/test_csvstack.py +++ b/tests/test_utilities/test_csvstack.py @@ -55,3 +55,17 @@ def test_no_grouping(self): self.assertEqual(reader.next()[0], '1') self.assertEqual(reader.next()[0], '1') + def test_no_header_row(self): + # stack two CSV files + args = ['--no-header-row', 'examples/no_header_row.csv', 'examples/no_header_row2.csv'] + output_file = StringIO.StringIO() + utility = CSVStack(args, output_file) + + utility.main() + + # verify the stacked file's contents + input_file = StringIO.StringIO(output_file.getvalue()) + reader = CSVKitReader(input_file) + + self.assertEqual(reader.next()[0], '1') + self.assertEqual(reader.next()[0], '4') From 0805968e18453161672b985483f165ce910781a1 Mon Sep 17 00:00:00 2001 From: James McKinney Date: Wed, 5 Sep 2012 12:53:56 -0400 Subject: [PATCH 3/5] csvlook respects --no-header-row --- csvkit/utilities/csvlook.py | 2 +- examples/no_header_row3.csv | 2 ++ tests/test_utilities/test_csvlook.py | 14 ++++++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 examples/no_header_row3.csv diff --git a/csvkit/utilities/csvlook.py b/csvkit/utilities/csvlook.py index 8e49a2c95..3e44b640a 100644 --- a/csvkit/utilities/csvlook.py +++ b/csvkit/utilities/csvlook.py @@ -40,7 +40,7 @@ def main(self): self.output_file.write(('| %s |\n' % ('|'.join(output))).encode('utf-8')) - if i == 0 or i == len(rows) - 1: + if (i == 0 and not self.args.no_header_row) or i == len(rows) - 1: self.output_file.write('%s\n' % divider) def launch_new_instance(): diff --git a/examples/no_header_row3.csv b/examples/no_header_row3.csv new file mode 100644 index 000000000..da813b688 --- /dev/null +++ b/examples/no_header_row3.csv @@ -0,0 +1,2 @@ +1,2,3 +4,5,6 diff --git a/tests/test_utilities/test_csvlook.py b/tests/test_utilities/test_csvlook.py index acd20ddf5..fda3ae647 100644 --- a/tests/test_utilities/test_csvlook.py +++ b/tests/test_utilities/test_csvlook.py @@ -22,3 +22,17 @@ def test_simple(self): self.assertEqual(input_file.next(), '| 1 | 4 | 5 |\n') self.assertEqual(input_file.next(), '|----+---+----|\n') + def test_no_header(self): + args = ['--no-header-row', 'examples/no_header_row3.csv'] + output_file = StringIO.StringIO() + utility = CSVLook(args, output_file) + + utility.main() + + input_file = StringIO.StringIO(output_file.getvalue()) + + self.assertEqual(input_file.next(), '|----+---+----|\n') + self.assertEqual(input_file.next(), '| 1 | 2 | 3 |\n') + self.assertEqual(input_file.next(), '| 4 | 5 | 6 |\n') + self.assertEqual(input_file.next(), '|----+---+----|\n') + From f9dd81d0e568f8f7eb1a9c72b0a265ab587e60b4 Mon Sep 17 00:00:00 2001 From: James McKinney Date: Wed, 5 Sep 2012 12:54:04 -0400 Subject: [PATCH 4/5] csvpy handles --no-header-row --- csvkit/exceptions.py | 6 ++++++ csvkit/utilities/csvpy.py | 6 +++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/csvkit/exceptions.py b/csvkit/exceptions.py index d60e4414e..3a9d6be83 100644 --- a/csvkit/exceptions.py +++ b/csvkit/exceptions.py @@ -85,3 +85,9 @@ def __init__(self, errors): msg = 'Encountered errors converting values in %i columns' % len(errors) super(InvalidValueForTypeListException, self).__init__(msg) +class RequiredHeaderError(CustomException): + """ + Exception raised when an operation requires a CSV file to have a header row. + """ + pass + diff --git a/csvkit/utilities/csvpy.py b/csvkit/utilities/csvpy.py index a840f2f4a..57b00f7fb 100644 --- a/csvkit/utilities/csvpy.py +++ b/csvkit/utilities/csvpy.py @@ -2,6 +2,7 @@ from csvkit import CSVKitReader, CSVKitDictReader from csvkit.cli import CSVFileType, CSVKitUtility +from csvkit.exceptions import RequiredHeaderError class CSVPy(CSVKitUtility): description = 'Load a CSV file into a CSVKitReader object and then drops into a Python shell.' @@ -18,7 +19,10 @@ def main(self): filename = self.args.file.name if self.args.as_dict: - reader_class = CSVKitDictReader + if self.args.no_header_row: + raise RequiredHeaderError, 'You cannot use --no-header-row with the csvpy --dict option.' + else: + reader_class = CSVKitDictReader else: reader_class = CSVKitReader From 3903c62fbc20507fd1f64d1df74f34c813c7bd18 Mon Sep 17 00:00:00 2001 From: James McKinney Date: Wed, 5 Sep 2012 13:09:48 -0400 Subject: [PATCH 5/5] Cannot set both --names and --no-header-row on csvgrep, csvsort, csvcut --- csvkit/cli.py | 5 ++++- tests/test_utilities/test_csvcut.py | 14 ++++++++++++++ tests/test_utilities/test_csvgrep.py | 14 ++++++++++++++ tests/test_utilities/test_csvsort.py | 14 ++++++++++++++ 4 files changed, 46 insertions(+), 1 deletion(-) diff --git a/csvkit/cli.py b/csvkit/cli.py index d5fefcc59..11bb4d6ab 100644 --- a/csvkit/cli.py +++ b/csvkit/cli.py @@ -7,7 +7,7 @@ import sys from csvkit import CSVKitReader -from csvkit.exceptions import ColumnIdentifierError +from csvkit.exceptions import ColumnIdentifierError, RequiredHeaderError def lazy_opener(fn): def wrapped(self, *args, **kwargs): @@ -258,6 +258,9 @@ def print_column_names(self): """ Pretty-prints the names and indices of all columns to a file-like object (usually sys.stdout). """ + if self.args.no_header_row: + raise RequiredHeaderError, 'You cannot use --no-header-row with the -n or --names options.' + f = self.args.file output = self.output_file try: diff --git a/tests/test_utilities/test_csvcut.py b/tests/test_utilities/test_csvcut.py index e4ec12e89..23247e077 100644 --- a/tests/test_utilities/test_csvcut.py +++ b/tests/test_utilities/test_csvcut.py @@ -5,6 +5,7 @@ from csvkit import CSVKitReader from csvkit.utilities.csvcut import CSVCut +from csvkit.exceptions import ColumnIdentifierError, RequiredHeaderError class TestCSVCut(unittest.TestCase): def test_simple(self): @@ -85,3 +86,16 @@ def test_include_and_exclude(self): self.assertEqual(reader.next(), ['a']) self.assertEqual(reader.next(), ['1']) + def test_invalid_column(self): + args = ['-c', '0', 'examples/dummy.csv'] + output_file = StringIO.StringIO() + utility = CSVCut(args, output_file) + + self.assertRaises(ColumnIdentifierError, utility.main) + + def test_invalid_options(self): + args = ['-n', '--no-header-row', 'examples/dummy.csv'] + output_file = StringIO.StringIO() + utility = CSVCut(args, output_file) + + self.assertRaises(RequiredHeaderError, utility.main) diff --git a/tests/test_utilities/test_csvgrep.py b/tests/test_utilities/test_csvgrep.py index 7dff3558b..c7afeb9b8 100644 --- a/tests/test_utilities/test_csvgrep.py +++ b/tests/test_utilities/test_csvgrep.py @@ -5,6 +5,7 @@ from csvkit import CSVKitReader from csvkit.utilities.csvgrep import CSVGrep +from csvkit.exceptions import ColumnIdentifierError, RequiredHeaderError class TestCSVCut(unittest.TestCase): def test_match(self): @@ -71,3 +72,16 @@ def test_string_match(self): self.assertEqual(reader.next(), ['State Name', 'State Abbreviate', 'Code', 'Montgomery GI Bill-Active Duty', 'Montgomery GI Bill- Selective Reserve', 'Dependents\' Educational Assistance', 'Reserve Educational Assistance Program', 'Post-Vietnam Era Veteran\'s Educational Assistance Program', 'TOTAL', '']) self.assertEqual(reader.next(), ['ILLINOIS', 'IL', '17', '15,659', '2,491', '2,025', '1,770', '19', '21,964', '']) + def test_invalid_column(self): + args = ['-c', '0', '-m', '1', 'examples/dummy.csv'] + output_file = StringIO.StringIO() + utility = CSVGrep(args, output_file) + + self.assertRaises(ColumnIdentifierError, utility.main) + + def test_invalid_options(self): + args = ['-n', '--no-header-row', 'examples/dummy.csv'] + output_file = StringIO.StringIO() + utility = CSVGrep(args, output_file) + + self.assertRaises(RequiredHeaderError, utility.main) diff --git a/tests/test_utilities/test_csvsort.py b/tests/test_utilities/test_csvsort.py index 74ad43377..daa86d26f 100644 --- a/tests/test_utilities/test_csvsort.py +++ b/tests/test_utilities/test_csvsort.py @@ -6,6 +6,7 @@ from csvkit import CSVKitReader from csvkit.utilities.csvsort import CSVSort +from csvkit.exceptions import ColumnIdentifierError, RequiredHeaderError class TestCSVSort(unittest.TestCase): def test_sort_string_reverse(self): @@ -38,3 +39,16 @@ def test_sort_date(self): self.assertEqual(test_order, new_order) + def test_invalid_column(self): + args = ['-c', '0', 'examples/dummy.csv'] + output_file = StringIO.StringIO() + utility = CSVSort(args, output_file) + + self.assertRaises(ColumnIdentifierError, utility.main) + + def test_invalid_options(self): + args = ['-n', '--no-header-row', 'examples/dummy.csv'] + output_file = StringIO.StringIO() + utility = CSVSort(args, output_file) + + self.assertRaises(RequiredHeaderError, utility.main)