forked from esitarski/CrossMgr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Excel.py
115 lines (101 loc) · 3.48 KB
/
Excel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# -*- coding: utf-8 -*-
from __future__ import print_function
import xlrd
import xml.etree.ElementTree
import os
import math
import itertools
import unicodedata
from mmap import mmap, ACCESS_READ
def toAscii( s ):
if not s:
return ''
ret = unicodedata.normalize('NFKD', s).encode('ascii','ignore') if type(s) == unicode else str(s)
if ret.endswith( '.0' ):
ret = ret[:-2]
return ret
#----------------------------------------------------------------------------
class ReadExcelXls( object ):
def __init__(self, filename):
if not os.path.isfile(filename):
raise ValueError, "%s is not a valid filename" % filename
with open(filename,'rb') as f:
self.book = xlrd.open_workbook(
filename=filename,
file_contents=mmap(f.fileno(),0,access=ACCESS_READ),
)
# self.book = xlrd.open_workbook(filename)
def is_nonempty_row(self, sheet, i):
values = sheet.row_values(i)
if isinstance(values[0], basestring) and values[0].startswith('#'):
return False # ignorable comment row
return any( bool(v) for v in values )
def sheet_names( self ):
return self.book.sheet_names()
def _parse_row(self, sheet, row_index, date_as_tuple):
""" Sanitize incoming excel data """
# Data Type Codes:
# EMPTY 0
# TEXT 1 a Unicode string
# NUMBER 2 float
# DATE 3 float
# BOOLEAN 4 int; 1 means TRUE, 0 means FALSE
# ERROR 5
values = []
for type, value in itertools.izip(sheet.row_types(row_index), sheet.row_values(row_index)):
if type == 2:
if value == int(value):
value = int(value)
elif type == 3:
if isinstance(value, float) and value < 1.0:
t = value * (24.0*60.0*60.0)
if int(t + 0.000001) == int(t+1.0):
secs = int(t + 0.000001)
fract = 0.0
else:
fract, secs = math.modf( t )
if fract < 0.000000001:
fract = 0.0
secs = int(secs)
if fract:
value = '%02d:%02d:%02d.%s' % ( secs // (60*60), (secs // 60) % 60, secs % 60, ('%.20f'%fract)[2:])
else:
value = '%02d:%02d:%02d' % (secs // (60*60), (secs // 60) % 60, secs % 60)
else:
try:
datetuple = xlrd.xldate_as_tuple(value, self.book.datemode)
validDate = True
except:
value = 'UnreadableDate'
validDate = False
if validDate:
if date_as_tuple:
value = datetuple
else:
# time only - no date component
if datetuple[0] == 0 and datetuple[1] == 0 and datetuple[2] == 0:
value = "%02d:%02d:%02d" % datetuple[3:]
# date only, no time
elif datetuple[3] == 0 and datetuple[4] == 0 and datetuple[5] == 0:
value = "%04d/%02d/%02d" % datetuple[:3]
else: # full date
value = "%04d/%02d/%02d %02d:%02d:%02d" % datetuple
elif type == 5:
value = xlrd.error_text_from_code[value]
values.append(value)
return values
def iter_list(self, sname, date_as_tuple=False):
sheet = self.book.sheet_by_name(sname) # XLRDError
for i in range(sheet.nrows):
yield self._parse_row(sheet, i, date_as_tuple)
#----------------------------------------------------------------------------
ReadExcelXlsx = ReadExcelXls
#----------------------------------------------------------------------------
def GetExcelReader( filename ):
if filename.endswith( '.xls' ):
return ReadExcelXls( filename )
elif filename.endswith( '.xlsx' ) or filename.endswith( '.xlsm' ):
return ReadExcelXlsx( filename )
else:
raise ValueError, '%s is not a recognized Excel format' % filename
#----------------------------------------------------------------------------