Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Loading…

Skip UTF BOM from first data event on UTF-8 decoded stream #55

Merged
merged 1 commit into from

2 participants

@dougwilson

This is a conservative implementation for issue #36. I don't always agree with things stripping the UTF BOM on a file, but it seems it may be useful for the CSV parser. This will strip the BOM from a stream only when the stream's encoding is set to UTF-8 and the BOM is the first character in the first data event (which should be the beginning of the file 99% of the time).

@wdavidw wdavidw merged commit 40dfd12 into wdavidw:master
@wdavidw
Owner

This is how I was planning to do it. Look perfect to me.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
This page is out of date. Refresh to see the latest.
View
10 lib/from.js
@@ -193,13 +193,19 @@ module.exports = function(csv) {
*/
from.stream = function(stream, options) {
+ var first;
this.options(options);
+ first = true;
stream.on('data', function(data) {
+ var string, strip;
if (csv.writable) {
- if (false === csv.write(data.toString())) {
- return stream.pause();
+ strip = first && typeof data === 'string' && stream.encoding === 'utf8' && 0xFEFF === data.charCodeAt(0);
+ string = strip ? data.substring(1) : data.toString();
+ if (false === csv.write(string)) {
+ stream.pause();
}
}
+ return first = false;
});
stream.on('error', function(e) {
return csv.error(e);
View
6 src/from.coffee
@@ -170,10 +170,14 @@ module.exports = (csv) ->
###
from.stream = (stream, options) ->
@options options
+ first = true
stream.on 'data', (data) ->
if csv.writable
- if false is csv.write data.toString()
+ strip = first and typeof data is 'string' and stream.encoding is 'utf8' and 0xFEFF is data.charCodeAt 0
+ string = if strip then data.substring 1 else data.toString()
+ if false is csv.write string
stream.pause()
+ first = false
stream.on 'error', (e) ->
csv.error e
stream.on 'end', ->
View
11 test/from.coffee
@@ -45,6 +45,17 @@ describe 'from', ->
.on 'end', ->
next()
+ describe 'path', ->
+
+ it 'should strip UTF-8 BOM', (next) ->
+ csv()
+ .from.path("#{__dirname}/from/file_bom.csv")
+ .on 'record', (record) ->
+ record.length.should.eql 5
+ record.should.eql ['1','2','3','4','5']
+ .on 'end', ->
+ next()
+
describe 'stream', ->
it 'should be able to pause', (next) ->
View
1  test/from/file_bom.csv
@@ -0,0 +1 @@
+"1","2","3","4","5"
Something went wrong with that request. Please try again.