Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge branch 'master' of https://github.com/ryanpitts/csvkit
  • Loading branch information
onyxfish committed Sep 23, 2011
2 parents ddacca6 + 4f26a3f commit 12094b9
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 9 deletions.
21 changes: 16 additions & 5 deletions csvkit/table.py
Expand Up @@ -6,6 +6,7 @@
from csvkit import CSVKitReader, CSVKitWriter
from csvkit import sniffer
from csvkit import typeinference
from csvkit.cli import parse_column_identifiers

class InvalidType(object):
"""
Expand Down Expand Up @@ -179,7 +180,7 @@ def row(self, i):
return row_data

@classmethod
def from_csv(cls, f, name='from_csv_table', snifflimit=None, **kwargs):
def from_csv(cls, f, name='from_csv_table', snifflimit=None, column_ids=None, **kwargs):
"""
Creates a new Table from a file-like object containing CSV data.
"""
Expand All @@ -198,20 +199,30 @@ def from_csv(cls, f, name='from_csv_table', snifflimit=None, **kwargs):
reader = CSVKitReader(f, dialect=dialect, **kwargs)

headers = reader.next()

data_columns = [[] for c in headers]

# Prepare the proper number of containers
if column_ids:
column_ids = parse_column_identifiers(column_ids, headers)
# Spin off list of chosen column names
headers_copy = list(headers)
for i, c in enumerate(column_ids):
headers[i] = headers_copy[c]
data_columns = [[] for c in column_ids]
else:
column_ids = [i for i in range(len(headers))]
data_columns = [[] for c in headers]

for row in reader:
for i, d in enumerate(row):
try:
data_columns[i].append(d.strip())
data_columns[i].append(row[column_ids[i]].strip())
except IndexError:
# Non-rectangular data is truncated
break

columns = []

for i, c in enumerate(data_columns):
for i, c in enumerate(data_columns):
columns.append(Column(i, headers[i], c))

return Table(columns, name=name)
Expand Down
6 changes: 4 additions & 2 deletions csvkit/utilities/csvstat.py
Expand Up @@ -3,7 +3,7 @@
import datetime

from csvkit import table
from csvkit.cli import CSVKitUtility
from csvkit.cli import CSVKitUtility
from heapq import nlargest
from operator import itemgetter

Expand All @@ -14,9 +14,11 @@ class CSVStat(CSVKitUtility):
def add_arguments(self):
self.argparser.add_argument('-y', '--snifflimit', dest='snifflimit', type=int,
help='Limit CSV dialect sniffing to the specified number of bytes.')
self.argparser.add_argument('-c', '--columns', dest='columns',
help='A comma separated list of column indices or names to be examined. Defaults to all columns.')

def main(self):
tab = table.Table.from_csv(self.args.file, snifflimit=self.args.snifflimit, **self.reader_kwargs)
tab = table.Table.from_csv(self.args.file, snifflimit=self.args.snifflimit, column_ids=self.args.columns, **self.reader_kwargs)

null_excluder = lambda i: i is not None

Expand Down
3 changes: 1 addition & 2 deletions docs/tutorial/examining_the_data.rst
Expand Up @@ -78,7 +78,7 @@ Searching for rows with csvgrep

After reviewing the summary statistics you might wonder where your home state falls in the order. To get a simple answer to the question we can use :doc:`/scripts/csvgrep` to search for the state's name amongst the rows. Let's also use csvcut to just look at the columns we care about::

$ csvcut -c 1,"TOTAL" 2009.csv | csvgrep -c 1 ILLINOIS
$ csvcut -c 1,"TOTAL" 2009.csv | csvgrep -c 1 -m ILLINOIS
State Name,TOTAL
ILLINOIS,"21,964"

Expand Down Expand Up @@ -117,7 +117,6 @@ Now we can use :doc:`/scripts/csvsort` to sort the rows by the first column::
40402,TEXAS
36394,FLORIDA
33986,ARIZONA
21964,ILLINOIS

The -r tells ``csvsort`` to sort in descending order.

Expand Down

0 comments on commit 12094b9

Please sign in to comment.