Skip to content
This repository

Skip UTF BOM from first data event on UTF-8 decoded stream #55

Merged
merged 1 commit into from over 1 year ago

2 participants

Douglas Christopher Wilson Worms David
Douglas Christopher Wilson

This is a conservative implementation for issue #36. I don't always agree with things stripping the UTF BOM on a file, but it seems it may be useful for the CSV parser. This will strip the BOM from a stream only when the stream's encoding is set to UTF-8 and the BOM is the first character in the first data event (which should be the beginning of the file 99% of the time).

Worms David wdavidw merged commit 40dfd12 into from
Worms David wdavidw closed this
Worms David
Owner

This is how I was planning to do it. Look perfect to me.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Showing 1 unique commit by 1 author.

Oct 18, 2012
Douglas Christopher Wilson Skip UTF BOM from first data event on UTF-8 decoded stream 2127daa
This page is out of date. Refresh to see the latest.
10  lib/from.js
@@ -193,13 +193,19 @@ module.exports = function(csv) {
193 193
   */
194 194
 
195 195
   from.stream = function(stream, options) {
  196
+    var first;
196 197
     this.options(options);
  198
+    first = true;
197 199
     stream.on('data', function(data) {
  200
+      var string, strip;
198 201
       if (csv.writable) {
199  
-        if (false === csv.write(data.toString())) {
200  
-          return stream.pause();
  202
+        strip = first && typeof data === 'string' && stream.encoding === 'utf8' && 0xFEFF === data.charCodeAt(0);
  203
+        string = strip ? data.substring(1) : data.toString();
  204
+        if (false === csv.write(string)) {
  205
+          stream.pause();
201 206
         }
202 207
       }
  208
+      return first = false;
203 209
     });
204 210
     stream.on('error', function(e) {
205 211
       return csv.error(e);
6  src/from.coffee
@@ -170,10 +170,14 @@ module.exports = (csv) ->
170 170
   ###
171 171
   from.stream = (stream, options) ->
172 172
     @options options
  173
+    first = true
173 174
     stream.on 'data', (data) ->
174 175
       if csv.writable
175  
-        if false is csv.write data.toString()
  176
+        strip = first and typeof data is 'string' and stream.encoding is 'utf8' and 0xFEFF is data.charCodeAt 0
  177
+        string = if strip then data.substring 1 else data.toString()
  178
+        if false is csv.write string
176 179
           stream.pause()
  180
+      first = false
177 181
     stream.on 'error', (e) ->
178 182
       csv.error e
179 183
     stream.on 'end', ->
11  test/from.coffee
@@ -45,6 +45,17 @@ describe 'from', ->
45 45
       .on 'end', ->
46 46
         next()
47 47
 
  48
+  describe 'path', ->
  49
+
  50
+    it 'should strip UTF-8 BOM', (next) ->
  51
+      csv()
  52
+      .from.path("#{__dirname}/from/file_bom.csv")
  53
+      .on 'record', (record) ->
  54
+        record.length.should.eql 5
  55
+        record.should.eql ['1','2','3','4','5']
  56
+      .on 'end', ->
  57
+        next()
  58
+
48 59
   describe 'stream', ->
49 60
 
50 61
     it 'should be able to pause', (next) ->
1  test/from/file_bom.csv
... ...
@@ -0,0 +1 @@
  1
+"1","2","3","4","5"
Commit_comment_tip

Tip: You can add notes to lines in a file. Hover to the left of a line to make a note

Something went wrong with that request. Please try again.