forked from ratal/mdfreader
-
Notifications
You must be signed in to change notification settings - Fork 0
/
mdfreader.py
1363 lines (1240 loc) · 59.2 KB
/
mdfreader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# -*- coding: utf-8 -*-
""" Measured Data Format file reader main module
Platform and python version
----------------------------------------
With Unix and Windows for python 2.6+ and 3.2+
:Author: `Aymeric Rateau <https://github.com/ratal/mdfreader>`__
Created on Sun Oct 10 12:57:28 2010
Dependencies
-------------------
- Python >2.6, >3.2 <http://www.python.org>
- Numpy >1.6 <http://numpy.scipy.org>
- Sympy to convert channels with formula
- bitarray for not byte aligned data parsing
- Matplotlib >1.0 <http://matplotlib.sourceforge.net>
- NetCDF
- h5py for the HDF5 export
- xlwt for the excel export (not existing for python3)
- openpyxl for the excel 2007 export
- scipy for the Matlab file conversion
- zlib to uncompress data block if needed
Attributes
--------------
PythonVersion : float
Python version currently running, needed for compatibility of both python 2.6+ and 3.2+
mdfreader module
--------------------------
"""
from __future__ import print_function
from io import open
from struct import unpack
from math import ceil
from os.path import dirname, abspath, splitext
from os import remove
from sys import version_info, stderr, path
from datetime import datetime
from argparse import ArgumentParser
from numpy import arange, linspace, interp, all, diff, mean, vstack, hstack, float64, zeros, empty, delete
from numpy import nan, datetime64, array, searchsorted, clip
_root = dirname(abspath(__file__))
path.append(_root)
from mdf3reader import mdf3
from mdf4reader import mdf4
from mdf import _open_MDF, dataField, descriptionField, unitField, masterField, masterTypeField
from mdfinfo3 import info3, _generateDummyMDF3
from mdfinfo4 import info4, _generateDummyMDF4
PythonVersion = version_info
PythonVersion = PythonVersion[0]
def _convertMatlabName(channel):
"""Removes non allowed characters for a Matlab variable name
Parameters
-----------------
channel : string
channel name
Returns
-----------
string
channel name compatible for Matlab
"""
if PythonVersion < 3:
try:
channel = channel.decode('utf-8')
except:
print('channel name can not be decoded : ' + channel, file=stderr)
channelName = channel.replace('[', '_ls_')
channelName = channelName.replace(']', '_rs_')
channelName = channelName.replace('$', '')
channelName = channelName.replace('.', 'p')
channelName = channelName.replace('\\', '_bs_')
channelName = channelName.replace('/', '_fs_')
channelName = channelName.replace('(', '_lp_')
channelName = channelName.replace(')', '_rp_')
channelName = channelName.replace(',', '_c_')
channelName = channelName.replace('@', '_am_')
channelName = channelName.replace(' ', '_')
channelName = channelName.replace(':', '_co_')
channelName = channelName.replace('-', '_hy_')
channelName = channelName.replace('-', '_hy_')
def cleanName(name):
allowedStr = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_.'
buf = ''
for c in name:
if c in allowedStr:
buf += c
return buf
channelName = cleanName(channelName)
return channelName
class mdfinfo(dict):
__slots__ = ['fileName', 'fid', 'zipfile', 'mdfversion', 'filterChannelNames']
""" MDFINFO is a class gathering information from block headers in a MDF (Measure Data Format) file
Structure is nested dicts. Primary key is Block type, then data group, channel group and channel number.
Examples of dicts
- mdfinfo['HDBlock'] header block
- mdfinfo['DGBlock'][dataGroup] Data Group block
- mdfinfo['CGBlock'][dataGroup][channelGroup] Channel Group block
- mdfinfo['CNBlock'][dataGroup][channelGroup][channel] Channel block including text blocks for comment and identifier
- mdfinfo['CCBlock'][dataGroup][channelGroup][channel] Channel conversion information
Attributes
--------------
fileName : str
file name
mdfversion : int
mdf file version number
filterChannelNames : bool
flag to filter long channel names including module names separated by a '.'
fid
file identifier
zipfile
flag to indicate the mdf4 is packaged in a zip
Methods
------------
readinfo( fileName = None, filterChannelNames=False )
Reads MDF file and extracts its complete structure
listChannels( fileName = None )
Read MDF file blocks and returns a list of contained channels
Examples
--------------
>>> import mdfreader
>>> FILENAME='toto.dat'
>>> yop=mdfreader.mdfinfo(FILENAME)
or if you are just interested to have only list of channels
>>> yop=mdfreader.mdfinfo() # creates new instance of mdfinfo class
>>> yop.listChannels(FILENAME) # returns a simple list of channel names
"""
def __init__(self, fileName=None, filterChannelNames=False, fid=None, minimal=0):
""" You can give optionally to constructor a file name that will be parsed
Parameters
----------------
fileName : str, optional
file name
filterChannelNames : bool, optional
flag to filter long channel names including module names separated by a '.'
fid : file identifier, optional
"""
self.fileName = fileName
self.filterChannelNames = filterChannelNames
self.mdfversion = 410
self.fid = fid
self.zipfile = False
if fileName is not None:
self.readinfo(fileName, fid, minimal)
def readinfo(self, fileName=None, fid=None, minimal=0):
""" Reads MDF file and extracts its complete structure
Parameters
----------------
fileName : str, optional
file name. If not input, uses fileName attribute
fid : file identifier, optional
minimal : int
0 will load every metadata
1 will load DG, CG, CN and CC
2 will load only DG
"""
if self.fileName is None or fileName is not None:
self.fileName = fileName
# Open file
if self.fid is None or (self.fid is not None and self.fid.closed):
(self.fid, self.fileName, self.zipfile) = _open_MDF(self.fileName)
# read Identifier block
self.fid.seek(28)
MDFVersionNumber = unpack('<H', self.fid.read(2))
self.mdfversion = MDFVersionNumber[0]
if self.mdfversion < 400: # up to version 3.x not compatible with version 4.x
from mdfinfo3 import info3
self.update(info3(None, self.fid, self.filterChannelNames))
else: # MDF version 4.x
from mdfinfo4 import info4
self.update(info4(None, self.fid, minimal))
if self.zipfile and fid is None: # not from mdfreader.read()
remove(self.fileName)
def listChannels(self, fileName=None):
""" Read MDF file blocks and returns a list of contained channels
Parameters
----------------
fileName : string
file name
Returns
-----------
nameList : list of string
list of channel names
"""
if self.fileName is None or fileName is not None:
self.fileName = fileName
# Open file
(self.fid, self.fileName, zipfile) = _open_MDF(self.fileName)
# read Identifier block
self.fid.seek(28)
MDFVersionNumber = unpack('<H', self.fid.read(2))
self.mdfversion = MDFVersionNumber[0]
if self.mdfversion < 400: # up to version 3.x not compatible with version 4.x
from mdfinfo3 import info3
channelNameList = info3()
nameList = channelNameList.listChannels3(self.fileName, self.fid)
else:
from mdfinfo4 import info4
channelNameList = info4()
nameList = channelNameList.listChannels4(self.fileName, self.fid)
if zipfile: # not from mdfreader.read()
remove(self.fileName)
return nameList
def _generateDummyMDF(self, channelList=None):
""" Parse MDF file structure and create a dummy mdf object structure
Parameters
----------------
channelList : str, optional
list of channels
"""
if self.mdfversion < 400: # up to version 3.x not compatible with version 4.x
from mdfinfo3 import _generateDummyMDF3
return _generateDummyMDF3(self, channelList)
else: # MDF version 4.x
from mdfinfo4 import _generateDummyMDF4
return _generateDummyMDF4(self, channelList)
class mdf(mdf3, mdf4):
""" mdf class
Attributes
--------------
fileName : str
file name
MDFVersionNumber : int
mdf file version number
masterChannelList : dict
Represents data structure: a key per master channel with corresponding value containing a list of channels
One key or master channel represents then a data group having same sampling interval.
multiProc : bool
Flag to request channel conversion multi processed for performance improvement.
One thread per data group.
file_metadata : dict
file metadata with minimum keys : author, organisation, project, subject, comment, time, date
Methods
------------
read( fileName = None, multiProc = False, channelList=None, convertAfterRead=True, filterChannelNames=False, noDataLoading=False, compression=False)
reads mdf file version 3.x and 4.x
write( fileName=None )
writes simple mdf file
getChannelData( channelName )
returns channel numpy array
convertAllChannel()
converts all channel data according to CCBlock information
getChannelUnit( channelName )
returns channel unit
plot( channels )
Plot channels with Matplotlib
resample( samplingTime = 0.1, masterChannel=None )
Resamples all data groups
exportToCSV( filename = None, sampling = 0.1 )
Exports mdf data into CSV file
exportToNetCDF( filename = None, sampling = None )
Exports mdf data into netcdf file
exportToHDF5( filename = None, sampling = None )
Exports mdf class data structure into hdf5 file
exportToMatlab( filename = None )
Exports mdf class data structure into Matlab file
exportToExcel( filename = None )
Exports mdf data into excel 95 to 2003 file
exportToXlsx( filename=None )
Exports mdf data into excel 2007 and 2010 file
convertToPandas( sampling=None )
converts mdf data structure into pandas dataframe(s)
keepChannels( channelList )
keeps only list of channels and removes the other channels
mergeMdf( mdfClass ):
Merges data of 2 mdf classes
Notes
--------
mdf class is a nested dict
Channel name is the primary dict key of mdf class
At a higher level, each channel includes the following keys :
- 'data' : containing vector of data (numpy)
- 'unit' : unit (string)
- 'master' : master channel of channel (time, crank angle, etc.)
- 'description' : Description of channel
- 'conversion': mdfinfo nested dict for CCBlock.
Exist if channel not converted, used to convert with getChannelData method
Examples
--------------
>>> import mdfreader
>>> yop=mdfreader.mdf('NameOfFile')
>>> yop.keys() # list channels names
# list channels grouped by raster or master channel
>>> yop.masterChannelList
>>> yop.plot('channelName') or yop.plot({'channel1','channel2'})
>>> yop.resample(0.1) or yop.resample(channelName='master3')
>>> yop.exportoCSV(sampling=0.01)
>>> yop.exportNetCDF()
>>> yop.exporttoHDF5()
>>> yop.exporttoMatlab()
>>> yop.exporttoExcel()
>>> yop.exporttoXlsx()
>>> yop.convertToPandas() # converts data groups into pandas dataframes
>>> yop.write() # writes mdf file
# drops all the channels except the one in argument
>>> yop.keepChannels({'channel1','channel2','channel3'})
>>> yop.getChannelData('channelName') # returns channel numpy array
"""
def read(self, fileName=None, multiProc=False, channelList=None,
convertAfterRead=True, filterChannelNames=False,
noDataLoading=False, compression=False):
""" reads mdf file version 3.x and 4.x
Parameters
----------------
fileName : str, optional
file name
multiProc : bool
flag to activate multiprocessing of channel data conversion
channelList : list of str, optional
list of channel names to be read
If you use channelList, reading might be much slower but it will save you memory. Can be used to read big files
convertAfterRead : bool, optional
flag to convert channel after read, True by default
If you use convertAfterRead by setting it to false, all data from channels will be kept raw, no conversion applied.
If many float are stored in file, you can gain from 3 to 4 times memory footprint
To calculate value from channel, you can then use method .getChannelData()
filterChannelNames : bool, optional
flag to filter long channel names from its module names separated by '.'
noDataLoading : bool, optional
Flag to read only file info but no data to have minimum memory use
compression : bool or str, optional
To compress data in memory using blosc or bcolz, takes cpu time
if compression = int(1 to 9), uses bcolz for compression
if compression = 'blosc', uses blosc for compression
Choice given, efficiency depends of data
Notes
--------
If you keep convertAfterRead to true, you can set attribute mdf.multiProc to activate channel conversion in multiprocessing.
Gain in reading time can be around 30% if file is big and using a lot of float channels
Warning:
------------
MultiProc use should be avoided when reading several files in a batch, it is not thread safe.
You should better multi process instances of mdf rather than using multiproc in mdf class (see implementation of mdfconverter)
"""
if self.fileName is None or fileName is not None:
self.fileName = fileName
# Open file
(self.fid, self.fileName, self.zipfile) = _open_MDF(self.fileName)
# read Identifier block
self.fid.seek(28)
MDFVersionNumber = unpack('<H', self.fid.read(2))
self.MDFVersionNumber = MDFVersionNumber[0]
if self.MDFVersionNumber < 400: # up to version 3.x not compatible with version 4.x
if not noDataLoading:
self.read3(self.fileName, None, multiProc, channelList,
convertAfterRead, filterChannelNames, compression)
else: # populate minimum mdf structure
self._noDataLoading = True
self.info = info3(None, fid=self.fid, minimal=1)
(self.masterChannelList, mdfdict) = _generateDummyMDF3(self.info, channelList)
self.update(mdfdict)
else: # MDF version 4.x
if not noDataLoading:
self.read4(self.fileName, None, multiProc, channelList,
convertAfterRead, filterChannelNames, compression)
else: # populate minimum mdf structure
self._noDataLoading = True
self.info = info4(None, fid=self.fid, minimal=1)
(self.masterChannelList, mdfdict) = _generateDummyMDF4(self.info, channelList)
self.update(mdfdict)
def write(self, fileName=None):
"""Writes simple mdf file, same format as originally read, default is 4.x
Parameters
----------------
fileName : str, optional
Name of file
If file name is not input, written file name will be the one read with appended '_new' string before extension
Notes
--------
All channels will be converted, so size might be bigger than original file
"""
if fileName is None:
splitName = splitext(self.fileName)
if splitName[-1] in ('.mfxz', '.MFXZ'):
splitName[-1] = '.mfx' # do not resave in compressed file
fileName = ''.join([splitName[-2], '_New', splitName[-1]])
# makes sure all channels are converted
self.convertAllChannel()
if self.MDFVersionNumber < 400:
self.write3(fileName=fileName)
else:
self.write4(fileName=fileName)
def getChannelData(self, channelName):
"""Return channel numpy array
Parameters
----------------
channelName : str
channel name
Returns:
-----------
numpy array
converted, if not already done, data corresponding to channel name
Notes
------
This method is the safest to get channel data as numpy array from 'data' dict key might contain raw data
"""
if self.MDFVersionNumber < 400:
return self._getChannelData3(channelName)
else:
return self._getChannelData4(channelName)
def convertAllChannel(self):
"""Converts all channels from raw data to converted data according to CCBlock information
Converted data will take more memory.
"""
if self.MDFVersionNumber < 400:
return self._convertAllChannel3()
else:
return self._convertAllChannel4()
def plot(self, channels):
"""Plot channels with Matplotlib
Parameters
----------------
channels : str or list of str
channel name or list of channel names
Notes
---------
Channel description and unit will be tentatively displayed with axis labels
"""
try:
import matplotlib.pyplot as plt
except:
raise ImportError('matplotlib not found')
if isinstance(channels, str):
channels = {channels}
for channelName in channels:
if channelName in self:
data = self.getChannelData(channelName)
if data.dtype.kind not in ['S', 'U']: # if channel not a string
self.fig = plt.figure()
# plot using matplotlib the channel versus master channel
if len(list(self.masterChannelList.keys())) == 1: # Resampled signals
masterName = list(self.masterChannelList.keys())[0]
if not masterName: # resampled channels, only one time channel most probably called 'master'
masterName = 'master'
if masterName in list(self.keys()): # time channel properly defined
plt.plot(self.getChannelData(masterName), data)
plt.xlabel(masterName + ' [' + self.getChannelUnit(masterName) + ']')
else: # no time channel found
plt.plot(data)
else: # not resampled
master_name = self.getChannelMaster(channelName)
if master_name in list(self.keys()): # master channel is proper channel name
plt.plot(self.getChannelData(master_name), data)
plt.xlabel(master_name + ' [' + self.getChannelUnit(master_name) + ']')
else:
plt.plot(data)
plt.title(self.getChannelDesc(channelName))
if self.getChannelUnit(channelName) == {}:
plt.ylabel(channelName)
else:
plt.ylabel(channelName + ' [' + self.getChannelUnit(channelName) + ']')
plt.grid(True)
plt.show()
else:
print(('Channel ' + channelName + ' not existing'), file=stderr)
def allPlot(self):
# plot all channels in the object, be careful for test purpose only,
# can display many many many plots overloading your computer
for Name in list(self.keys()):
try:
self.plot(Name)
except:
print(Name, file=stderr)
def resample(self, samplingTime=None, masterChannel=None):
""" Resamples all data groups into one data group having defined
sampling interval or sharing same master channel
Parameters
----------------
samplingTime : float, optional
resampling interval, None by default. If None, will merge all datagroups
into a unique datagroup having the highest sampling rate from all datagroups
**or**
masterChannel : str, optional
master channel name to be used for all channels
Notes
--------
1. resampling is relatively safe for mdf3 as it contains only time series.
However, mdf4 can contain also distance, angle, etc. It might make not sense
to apply one resampling to several data groups that do not share same kind
of master channel (like time resampling to distance or angle data groups)
If several kind of data groups are used, you should better use pandas to resample
2. resampling will convert all your channels so be careful for big files
and memory consumption
"""
def interpolate(x, y, new_x):
if y.dtype.kind == 'f':
return interp(x, y, new_x)
else:
idx = searchsorted(x, new_x, side='right')
idx -= 1
idx = clip(idx, 0, idx[-1])
return x[idx]
if self: # mdf contains data
# must make sure all channels are converted
self.convertAllChannel()
masterData = None
if masterChannel is None: # create master channel if not proposed
minTime = []
maxTime = []
length = []
masterChannelName = 'master'
for master in list(self.masterChannelList.keys()):
if master is not None and master != '' and \
master in self and self.masterChannelList[master]:
masterData = self.getChannelData(master)
# consider groups having minimum size
if master in self and len(masterData) > 5:
minTime.append(masterData[0])
maxTime.append(masterData[-1])
length.append(len(masterData))
if minTime: # at least 1 datagroup has a master channel to be resampled
if samplingTime is None:
masterData = linspace(min(minTime), max(maxTime), num=max(length))
else:
masterData = arange(min(minTime), max(maxTime), samplingTime)
self.add_channel(0, masterChannelName,
masterData,
masterChannelName,
master_type=self.getChannelMasterType(master),
unit=self.getChannelUnit(master),
description=self.getChannelDesc(master),
conversion=None)
else:
masterChannelName = masterChannel # master channel defined in argument
if masterChannel not in list(self.masterChannelList.keys()):
print('master channel name not in existing', file=stderr)
raise ValueError('Master Channel not existing')
# resample all channels to one sampling time vector
if len(list(self.masterChannelList.keys())) > 1: # Not yet resampled or only 1 datagroup
# create master channel if not proposed
if masterChannel is None and masterData is not None:
self.add_channel(0, masterChannelName,
masterData,
masterChannelName,
master_type=self.getChannelMasterType(master),
unit=self.getChannelUnit(master),
description=self.getChannelDesc(master),
conversion=None)
# Interpolate channels
timevect = []
if masterChannelName in self:
masterData = self.getChannelData(masterChannelName)
if masterData is None: # no master channel, cannot resample
return None
for Name in list(self.keys()):
try:
if Name not in list(self.masterChannelList.keys()): # not a master channel
timevect = self.getChannelData(self.getChannelMaster(Name))
if not self.getChannelData(Name).dtype.kind in ('S', 'U', 'V'):
# if channel not array of string
self.setChannelData(Name, interpolate(masterData, timevect, self.getChannelData(Name)))
self.setChannelMaster(Name, masterChannelName)
self.setChannelMasterType(Name, self.getChannelMasterType(master))
self.remove_channel_conversion(Name)
else: # can not interpolate strings, remove channel containing string
self.remove_channel(Name)
except:
if timevect is not None and len(timevect) != len(self.getChannelData(Name)):
print('{} and master channel {} do not have same length'. \
format(Name, self.getChannelMaster(Name)), file=stderr)
elif not all(diff(timevect) > 0):
print('{} has non regularly increasing master channel {}'.\
format(Name, self.getChannelMaster(Name)), file=stderr)
# remove time channels in masterChannelList
for ind in list(self.masterChannelList.keys()):
if ind != masterChannelName and ind in self:
self.remove_channel(ind)
self.masterChannelList = {} # empty dict
self.masterChannelList[masterChannelName] = list(self.keys())
elif len(list(self.masterChannelList.keys())) == 1 and samplingTime is not None:
# resamples only 1 datagroup
masterData = self.getChannelData(list(self.masterChannelList.keys())[0])
masterData = arange(masterData[0], masterData[-1], samplingTime)
for Name in list(self.keys()):
timevect = self.getChannelData(self.getChannelMaster(Name))
self.setChannelData(Name, interpolate(masterData, timevect, self.getChannelData(Name)))
self.setChannelMaster(Name, masterChannelName)
self.setChannelMasterType(Name, self.getChannelMasterType(master))
self.remove_channel_conversion(Name)
elif samplingTime is None:
print('Already resampled', file=stderr)
else:
print('no data to be resampled', file=stderr)
def cut(self, begin=None, end=None):
""" Cut data
Parameters
----------------
begin : float
beginning value in master channel from which to start cutting in all channels
end : float
ending value in master channel from which to start cutting in all channels
Notes
------
Use this method if whole data in mdf are using same physical or type of
master channel (for instance time).
"""
if begin is None and end is None:
raise Exception('Please input at least one beginning or ending value to cut data')
for master in self.masterChannelList: # for each channel group
# find corresponding indexes to cut
masterData = self.getChannelData(master)
if masterData is not None and len(masterData) > 0: # not empty data
if begin is not None:
startIndex = searchsorted(masterData, begin, side='left')
else:
startIndex = 0
if end is not None:
endIndex = searchsorted(masterData, end, side='right')
else:
endIndex = len(masterData)
if startIndex == endIndex:
# empty array
for channel in self.masterChannelList[master]:
self.setChannelData(channel, array([]))
else:
for channel in self.masterChannelList[master]:
data = self.getChannelData(channel)
self.setChannelData(channel, data[startIndex: endIndex])
def exportToCSV(self, filename=None, sampling=None):
"""Exports mdf data into CSV file
Parameters
----------------
filename : str, optional
file name. If no name defined, it will use original mdf name and path
sampling : float, optional
sampling interval. None by default
Notes
--------
Data saved in CSV fille be automatically resampled as it is difficult to save in this format
data not sharing same master channel
Warning: this can be slow for big data, CSV is text format after all
"""
if self: # data in mdf
import csv
self.resample(sampling)
if filename is None:
filename = splitext(self.fileName)[0]
filename = filename + '.csv'
if self.MDFVersionNumber >= 400:
encoding = 'utf8' # mdf4 encoding is unicode
else:
encoding = 'latin-1' # mdf3 encoding is latin-1
# writes header
if PythonVersion < 3:
units = []
names = []
f = open(filename, "wb")
writer = csv.writer(f, dialect=csv.excel)
for name in list(self.keys()):
data = self.getChannelData(name)
unit = self.getChannelUnit(name)
if data.dtype.kind not in ('S', 'U', 'V') \
and data.ndim <= 1:
if name is bytes:
names.append(name.encode(encoding, 'ignore'))
else:
try:
names.append(name.encode(encoding, 'replace'))
except:
names.append(name)
if self.getChannelUnit(name) is bytes:
units.append(unit.encode(encoding, 'ignore'))
else:
try:
units.append(unit.encode(encoding, 'replace'))
except:
units.append(unit)
writer.writerow(names) # writes channel names
writer.writerow(units) # writes units
else:
f = open(filename, "wt", encoding=encoding)
writer = csv.writer(f, dialect=csv.excel)
writer.writerow([name for name in list(self.keys()) \
if self.getChannelData(name).dtype.kind not in ('S', 'U', 'V') \
and self.getChannelData(name).ndim <= 1]) # writes channel names
# writes units
writer.writerow([self.getChannelUnit(name) \
for name in list(self.keys())
if self.getChannelData(name).dtype.kind not in ('S', 'U', 'V') \
and self.getChannelData(name).ndim <= 1]) # writes units
# concatenate all channels
temp = []
for name in list(self.keys()):
data = self.getChannelData(name)
if data.dtype.kind not in ('S', 'U', 'V') \
and data.ndim <= 1:
temp.append(data.transpose())
if temp:
buf = vstack(temp)
buf = buf.transpose()
# Write all rows
r, c = buf.shape
writer.writerows([list(buf[i, :]) for i in range(r)])
f.close()
else:
print('no data to be exported', file=stderr)
def exportToNetCDF(self, filename=None, sampling=None):
"""Exports mdf data into netcdf file
Parameters
----------------
filename : str, optional
file name. If no name defined, it will use original mdf name and path
sampling : float, optional
sampling interval.
Dependency
-----------------
scipy
"""
try:
from scipy.io import netcdf
except:
raise ImportError('scipy.io module not found')
def cleanName(name):
allowedStr = ' ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+_.@'
buf = ''
for c in name:
if c in allowedStr:
buf += c
return buf
def setAttribute(f, name, value):
if value is not None and len(value) > 0: # netcdf does not allow empty strings...
if value is dict and 'name' in value:
value = value['name']
if PythonVersion >= 3 and value is bytes:
value = value.encode('utf-8', 'ignore')
value = cleanName(value)
setattr(f, name, value)
else:
pass
if sampling is not None:
self.resample(sampling)
if filename is None:
filename = splitext(self.fileName)[0]
filename = filename + '.nc'
f = netcdf.netcdf_file(filename, 'w')
setAttribute(f, 'Date', self.file_metadata['date'])
setAttribute(f, 'Time', self.file_metadata['time'])
setAttribute(f, 'Author', self.file_metadata['author'])
setAttribute(f, 'Organization', self.file_metadata['organisation'])
setAttribute(f, 'ProjectName', self.file_metadata['project'])
setAttribute(f, 'Subject', self.file_metadata['subject'])
setAttribute(f, 'Comment', self.file_metadata['comment'])
# Create dimensions having name of all time channels
for master in list(self.masterChannelList.keys()):
f.createDimension(master, len(self.getChannelData(self.masterChannelList[master][0])))
# Create variables definition, dimension and attributes
var = {}
for name in list(self.keys()):
data = self.getChannelData(name)
if data.dtype == 'float64':
dataType = 'd'
elif data.dtype == 'float32':
dataType = 'f'
elif data.dtype in ['int8', 'int16', 'uint8', 'uint16']:
dataType = 'h'
elif data.dtype in ['int32', 'uint32']:
dataType = 'i'
elif data.dtype.kind in ['S', 'U']:
dataType = 'c'
else:
dataType = None
print(('Can not process numpy type ' + str(data.dtype)\
+ ' of channel ' + name), file=stderr)
if dataType is not None:
# create variable
CleanedName = cleanName(name)
if len(list(self.masterChannelList.keys())) == 1: # mdf resampled
var[name] = f.createVariable(CleanedName, dataType, (list(self.masterChannelList.keys())[0], ))
else: # not resampled
var[name] = f.createVariable(CleanedName, dataType, (self.getChannelMaster(name), ))
# Create attributes
setAttribute(var[name], 'title', CleanedName)
setAttribute(var[name], 'units', self.getChannelUnit(name))
setAttribute(var[name], 'Description', self.getChannelDesc(name))
if name in set(self.masterChannelList.keys()):
setAttribute(var[name], 'Type', 'Master Channel')
setAttribute(var[name], 'datatype', 'master')
else:
setAttribute(var[name], 'Type', 'Data Channel')
# put data in variables
for name in list(self.keys()):
var[name] = self.getChannelData(name)
f.close()
def exportToHDF5(self, filename=None, sampling=None):
"""Exports mdf class data structure into hdf5 file
Parameters
----------------
filename : str, optional
file name. If no name defined, it will use original mdf name and path
sampling : float, optional
sampling interval.
Dependency
------------------
h5py
Notes
--------
The maximum attributes will be stored
Data structure will be similar has it is in masterChannelList attribute
"""
#
try:
import h5py
import os
except:
raise ImportError('h5py not found')
def setAttribute(obj, name, value):
if value is not None and len(value) > 0:
try:
if value is dict and 'name' in value:
value = value['name']
obj.attrs[name] = value
except:
pass
else:
pass
if sampling is not None:
self.resample(sampling)
if filename is None:
filename = splitext(self.fileName)[0]
filename = filename + '.hdf'
f = h5py.File(filename, 'w') # create hdf5 file
# create group in root associated to file
filegroup = f.create_group(os.path.basename(filename))
setAttribute(filegroup, 'Author', self.file_metadata['author'])
setAttribute(filegroup, 'Date', self.file_metadata['date'])
setAttribute(filegroup, 'Time', self.file_metadata['time'])
setAttribute(filegroup, 'Time', self.file_metadata['time'])
setAttribute(filegroup, 'Organization', self.file_metadata['organisation'])
setAttribute(filegroup, 'ProjectName', self.file_metadata['project'])
setAttribute(filegroup, 'Subject', self.file_metadata['subject'])
setAttribute(filegroup, 'Comment', self.file_metadata['comment'])
masterTypeDict = {0:'None', 1:'Time', 2:'Angle', 3:'Distance', 4:'Index', None:'None'}
if len(list(self.masterChannelList.keys())) > 1:
# if several time groups of channels, not resampled
groups = {}
ngroups = 0
grp = {}
for channel in list(self.keys()):
channelData = self.getChannelData(channel)
masterName = self.getChannelMaster(channel)
if masterField in self[channel] and masterName not in list(groups.keys()):
# create new data group
ngroups += 1
if masterName != '' \
and masterName is not None:
group_name = masterName
else:
group_name = masterField+str(ngroups)
groups[group_name] = ngroups
grp[ngroups] = filegroup.create_group(group_name)
setAttribute(grp[ngroups], masterField, masterName)
setAttribute(grp[ngroups], masterTypeField, \
masterTypeDict[self.getChannelMasterType(channel)])
elif masterField in self[channel] and masterName in list(groups.keys()):
group_name = masterName
if channelData.dtype.kind not in ('U', 'O'): # not supported type
dset = grp[groups[group_name]].create_dataset(channel, data=channelData)
setAttribute(dset, unitField, self.getChannelUnit(channel))
if descriptionField in self[channel]:
setAttribute(dset, descriptionField, self.getChannelDesc(channel))
else: # resampled or only one time for all channels : no groups
masterName = list(self.masterChannelList.keys())[0]
setAttribute(filegroup, masterField, masterName)
setAttribute(filegroup, masterTypeField, \
masterTypeDict[self.getChannelMasterType(masterName)])
for channel in list(self.keys()):
channelData = self.getChannelData(channel)
if channelData.dtype.kind not in ('U', 'O'): # not supported type
dset = filegroup.create_dataset(channel, data=channelData)
setAttribute(dset, unitField, self.getChannelUnit(channel))
if descriptionField in self[channel]:
setAttribute(dset, descriptionField, self.getChannelDesc(channel))
f.close()
def exportToMatlab(self, filename=None):
"""Export mdf data into Matlab file format 5, tentatively compressed
Parameters
----------------
filename : str, optional
file name. If no name defined, it will use original mdf name and path
Dependency
------------------
scipy
Notes
--------
This method will dump all data into Matlab file but you will loose below information:
- unit and descriptions of channel
- data structure, what is corresponding master channel to a channel.
Channels might have then different lengths
"""
# export class data struture into .mat file
try:
from scipy.io import savemat
except:
raise ImportError('scipy module not found')
if filename is None:
filename = splitext(self.fileName)[0]
filename = filename + '.mat'
# convert self into simple dict without and metadata
temp = {}
for channel in list(self.keys()):
data = self.getChannelData(channel)
if data.dtype.kind not in ('S', 'U', 'V'): # does not like special characters chains, skip