forked from wireservice/csvkit
-
Notifications
You must be signed in to change notification settings - Fork 1
/
xlsx.py
76 lines (54 loc) · 1.87 KB
/
xlsx.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/env python
from cStringIO import StringIO
import datetime
from openpyxl.reader.excel import load_workbook
from csvkit import CSVKitWriter
from csvkit.typeinference import NULL_TIME
def normalize_datetime(dt):
if dt.microsecond == 0:
return dt
ms = dt.microsecond
if ms < 1000:
return dt.replace(microsecond=0)
elif ms > 999000:
return dt.replace(microsecond=0) + datetime.timedelta(seconds=1)
return dt
def xlsx2csv(f, output=None, **kwargs):
"""
Convert an Excel .xlsx file to csv.
Note: Unlike other convertor's, this one allows output columns to contain mixed data types.
Blank headers are also possible.
"""
streaming = True if output else False
if not streaming:
output = StringIO()
writer = CSVKitWriter(output)
book = load_workbook(f, use_iterators=True, data_only=True)
if 'sheet' in kwargs:
sheet = book.get_sheet_by_name(kwargs['sheet'])
else:
sheet = book.get_active_sheet()
for i, row in enumerate(sheet.iter_rows()):
if i == 0:
writer.writerow([c.internal_value for c in row])
continue
out_row = []
for c in row:
value = c.internal_value
if value.__class__ is datetime.datetime:
if value.time() != NULL_TIME:
value = normalize_datetime(value)
else:
value = value.date()
elif value.__class__ is float:
if value % 1 == 0:
value = int(value)
if value.__class__ in (datetime.datetime, datetime.date, datetime.time):
value = value.isoformat()
out_row.append(value)
writer.writerow(out_row)
if not streaming:
data = output.getvalue()
return data
# Return empty string when streaming
return ''