From 8b0b256003320f0ac6df0d43f481a1b2c55c7707 Mon Sep 17 00:00:00 2001 From: Ryan Pitts Date: Mon, 12 Sep 2011 20:29:41 -0700 Subject: [PATCH 1/3] adding --columns arg to csvstat --- csvkit/table.py | 21 ++++++++++++++++----- csvkit/utilities/csvstat.py | 7 +++++-- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/csvkit/table.py b/csvkit/table.py index 168aebe83..777284e67 100644 --- a/csvkit/table.py +++ b/csvkit/table.py @@ -6,6 +6,7 @@ from csvkit import CSVKitReader, CSVKitWriter from csvkit import sniffer from csvkit import typeinference +from csvkit.cli import parse_column_identifiers class InvalidType(object): """ @@ -179,7 +180,7 @@ def row(self, i): return row_data @classmethod - def from_csv(cls, f, name='from_csv_table', snifflimit=None, **kwargs): + def from_csv(cls, f, name='from_csv_table', snifflimit=None, column_ids=None, **kwargs): """ Creates a new Table from a file-like object containing CSV data. """ @@ -198,20 +199,30 @@ def from_csv(cls, f, name='from_csv_table', snifflimit=None, **kwargs): reader = CSVKitReader(f, dialect=dialect, **kwargs) headers = reader.next() - - data_columns = [[] for c in headers] + + # Prepare the proper number of containers + if column_ids: + column_ids = parse_column_identifiers(column_ids, headers) + # Spin off list of chosen column names + headers_copy = list(headers) + for i, c in enumerate(column_ids): + headers[i] = headers_copy[c] + data_columns = [[] for c in column_ids] + else: + column_ids = [i for i in range(len(headers))] + data_columns = [[] for c in headers] for row in reader: for i, d in enumerate(row): try: - data_columns[i].append(d.strip()) + data_columns[i].append(row[column_ids[i]].strip()) except IndexError: # Non-rectangular data is truncated break columns = [] - for i, c in enumerate(data_columns): + for i, c in enumerate(data_columns): columns.append(Column(i, headers[i], c)) return Table(columns, name=name) diff --git a/csvkit/utilities/csvstat.py b/csvkit/utilities/csvstat.py index c0c10dddc..0c243fd56 100755 --- a/csvkit/utilities/csvstat.py +++ b/csvkit/utilities/csvstat.py @@ -4,7 +4,7 @@ import sys from csvkit import table -from csvkit.cli import CSVKitUtility +from csvkit.cli import CSVKitUtility from heapq import nlargest from operator import itemgetter @@ -15,9 +15,12 @@ class CSVStat(CSVKitUtility): def add_arguments(self): self.argparser.add_argument('-y', '--snifflimit', dest='snifflimit', type=int, help='Limit CSV dialect sniffing to the specified number of bytes.') + self.argparser.add_argument('-c', '--columns', dest='columns', + help='A comma separated list of column indices or names to be examined. Defaults to all columns.') def main(self): - tab = table.Table.from_csv(self.args.file, snifflimit=self.args.snifflimit, **self.reader_kwargs) + tab = table.Table.from_csv(self.args.file, snifflimit=self.args.snifflimit, column_ids=self.args.columns, **self.reader_kwargs) + #tab = table.Table.from_csv(self.args.file, snifflimit=self.args.snifflimit, **self.reader_kwargs) null_excluder = lambda i: i is not None From 0838de57c9f272e132ec0fb20cbd98cbab412ba7 Mon Sep 17 00:00:00 2001 From: Ryan Pitts Date: Mon, 12 Sep 2011 21:08:57 -0700 Subject: [PATCH 2/3] small doc fixes to match expected output, csvgrep syntax --- docs/tutorial/examining_the_data.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/tutorial/examining_the_data.rst b/docs/tutorial/examining_the_data.rst index e6c216652..c37f7b34c 100644 --- a/docs/tutorial/examining_the_data.rst +++ b/docs/tutorial/examining_the_data.rst @@ -78,7 +78,7 @@ Searching for rows with csvgrep After reviewing the summary statistics you might wonder where your home state falls in the order. To get a simple answer to the question we can use :doc:`/scripts/csvgrep` to search for the state's name amongst the rows. Let's also use csvcut to just look at the columns we care about:: - $ csvcut -c 1,"TOTAL" 2009.csv | csvgrep -c 1 ILLINOIS + $ csvcut -c 1,"TOTAL" 2009.csv | csvgrep -c 1 -m ILLINOIS State Name,TOTAL ILLINOIS,"21,964" @@ -117,7 +117,6 @@ Now we can use :doc:`/scripts/csvsort` to sort the rows by the first column:: 40402,TEXAS 36394,FLORIDA 33986,ARIZONA - 21964,ILLINOIS The -r tells ``csvsort`` to sort in descending order. From 4f26a3f1e3a36d077f971394ad7da05d7cde9b48 Mon Sep 17 00:00:00 2001 From: Ryan Pitts Date: Mon, 12 Sep 2011 21:11:00 -0700 Subject: [PATCH 3/3] removing commented line accidentally left in place --- csvkit/utilities/csvstat.py | 1 - 1 file changed, 1 deletion(-) diff --git a/csvkit/utilities/csvstat.py b/csvkit/utilities/csvstat.py index 0c243fd56..614d847f9 100755 --- a/csvkit/utilities/csvstat.py +++ b/csvkit/utilities/csvstat.py @@ -20,7 +20,6 @@ def add_arguments(self): def main(self): tab = table.Table.from_csv(self.args.file, snifflimit=self.args.snifflimit, column_ids=self.args.columns, **self.reader_kwargs) - #tab = table.Table.from_csv(self.args.file, snifflimit=self.args.snifflimit, **self.reader_kwargs) null_excluder = lambda i: i is not None