forked from wireservice/csvkit
-
Notifications
You must be signed in to change notification settings - Fork 1
/
csvstack.py
57 lines (41 loc) · 2.31 KB
/
csvstack.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/usr/bin/env python
import os
from csvkit import CSVKitReader, CSVKitWriter
from csvkit.cli import CSVFileType, CSVKitUtility
class CSVStack(CSVKitUtility):
description = 'Stack up the rows from multiple CSV files, optionally adding a grouping value.'
override_flags = 'f'
def add_arguments(self):
self.argparser.add_argument('files', metavar='FILES', nargs='+', type=CSVFileType())
self.argparser.add_argument('-g', '--groups', dest='groups',
help='A comma-seperated list of values to add as "grouping factors", one for each CSV being stacked. These will be added to the stacked CSV as a new column. You may specify a name for the grouping column using the -n flag.')
self.argparser.add_argument('-n', '--group-name', dest='group_name',
help='A name for the grouping column, e.g. "year". Only used when also specifying -g.')
self.argparser.add_argument('--filenames', dest='group_by_filenames', action='store_true',
help='Use the filename of each input file as its grouping value. When specified, -g will be ignored.')
def main(self):
if len(self.args.files) < 2:
self.argparser.error('You must specify at least two files to stack.')
if self.args.group_by_filenames:
groups = [os.path.split(f.name)[1] for f in self.args.files]
elif self.args.groups:
groups = self.args.groups.split(',')
if len(groups) != len(self.args.files):
self.argparser.error('The number of grouping values must be equal to the number of CSV files being stacked.')
else:
groups = None
group_name = self.args.group_name if self.args.group_name else 'group'
output = CSVKitWriter(self.output_file, **self.writer_kwargs)
for i, f in enumerate(self.args.files):
rows = CSVKitReader(f, **self.reader_kwargs)
headers = rows.next()
if i == 0:
if groups:
headers.insert(0, group_name)
output.writerow(headers)
for row in rows:
if groups:
row.insert(0, groups[i])
output.writerow(row)
if __name__ == '__main__':
CSVStack().main()