forked from wireservice/csvkit
-
Notifications
You must be signed in to change notification settings - Fork 1
/
csvcut.py
52 lines (37 loc) · 1.9 KB
/
csvcut.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/env python
"""
csvcut is originally the work of eminent hackers Joe Germuska and Aaron Bycoffe.
This code is forked from:
https://gist.github.com/561347/9846ebf8d0a69b06681da9255ffe3d3f59ec2c97
Used and modified with permission.
"""
from csvkit import CSVKitReader, CSVKitWriter
from csvkit.cli import CSVKitUtility, parse_column_identifiers
class CSVCut(CSVKitUtility):
description = 'Filter and truncate CSV files. Like unix "cut" command, but for tabular data.'
def add_arguments(self):
self.argparser.add_argument('-n', '--names', dest='names_only', action='store_true',
help='Display column names and indices from the input CSV and exit.')
self.argparser.add_argument('-c', '--columns', dest='columns',
help='A comma separated list of column indices or names to be extracted. Defaults to all columns.')
self.argparser.add_argument('-x', '--delete-empty-rows', dest='delete_empty', action='store_true',
help='After cutting, delete rows which are completely empty.')
def main(self):
if self.args.names_only:
self.print_column_names()
return
rows = CSVKitReader(self.args.file, **self.reader_kwargs)
column_names = rows.next()
column_ids = parse_column_identifiers(self.args.columns, column_names, self.args.zero_based)
output = CSVKitWriter(self.output_file, **self.writer_kwargs)
output.writerow([column_names[c] for c in column_ids])
for i, row in enumerate(rows):
self.input_line_number = i + 1
out_row = [row[c] if c < len(row) else None for c in column_ids]
if self.args.delete_empty:
if ''.join(out_row) == '':
continue
output.writerow(out_row)
if __name__ == "__main__":
utility = CSVCut()
utility.main()