Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Loading…

Skip UTF BOM from first data event on UTF-8 decoded stream #55

Merged
merged 1 commit into from

2 participants

Douglas Christopher Wilson Worms David
Douglas Christopher Wilson

This is a conservative implementation for issue #36. I don't always agree with things stripping the UTF BOM on a file, but it seems it may be useful for the CSV parser. This will strip the BOM from a stream only when the stream's encoding is set to UTF-8 and the BOM is the first character in the first data event (which should be the beginning of the file 99% of the time).

Worms David wdavidw merged commit 40dfd12 into from
Worms David
Owner

This is how I was planning to do it. Look perfect to me.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
This page is out of date. Refresh to see the latest.
10 lib/from.js
View
@@ -193,13 +193,19 @@ module.exports = function(csv) {
*/
from.stream = function(stream, options) {
+ var first;
this.options(options);
+ first = true;
stream.on('data', function(data) {
+ var string, strip;
if (csv.writable) {
- if (false === csv.write(data.toString())) {
- return stream.pause();
+ strip = first && typeof data === 'string' && stream.encoding === 'utf8' && 0xFEFF === data.charCodeAt(0);
+ string = strip ? data.substring(1) : data.toString();
+ if (false === csv.write(string)) {
+ stream.pause();
}
}
+ return first = false;
});
stream.on('error', function(e) {
return csv.error(e);
6 src/from.coffee
View
@@ -170,10 +170,14 @@ module.exports = (csv) ->
###
from.stream = (stream, options) ->
@options options
+ first = true
stream.on 'data', (data) ->
if csv.writable
- if false is csv.write data.toString()
+ strip = first and typeof data is 'string' and stream.encoding is 'utf8' and 0xFEFF is data.charCodeAt 0
+ string = if strip then data.substring 1 else data.toString()
+ if false is csv.write string
stream.pause()
+ first = false
stream.on 'error', (e) ->
csv.error e
stream.on 'end', ->
11 test/from.coffee
View
@@ -45,6 +45,17 @@ describe 'from', ->
.on 'end', ->
next()
+ describe 'path', ->
+
+ it 'should strip UTF-8 BOM', (next) ->
+ csv()
+ .from.path("#{__dirname}/from/file_bom.csv")
+ .on 'record', (record) ->
+ record.length.should.eql 5
+ record.should.eql ['1','2','3','4','5']
+ .on 'end', ->
+ next()
+
describe 'stream', ->
it 'should be able to pause', (next) ->
1  test/from/file_bom.csv
View
@@ -0,0 +1 @@
+"1","2","3","4","5"
Something went wrong with that request. Please try again.