-
Notifications
You must be signed in to change notification settings - Fork 265
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Origin commit, functionnalities almost complete, still miss data obje…
…cts as an alternative to arrays, and some more source/destination handling
- Loading branch information
0 parents
commit c253672
Showing
38 changed files
with
845 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
.* | ||
*.tmp | ||
lib-cov | ||
!.gitignore |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,254 @@ | ||
|
||
// Module CSV - Copyright David Worms <open@adaltas.com> (MIT Licensed) | ||
|
||
var EventEmitter = require('events').EventEmitter, | ||
fs = require('fs'); | ||
|
||
// Utils function | ||
var merge = function(obj1,obj2){ | ||
var r = obj1||{}; | ||
for(var key in obj2){ | ||
r[key] = obj2[key]; | ||
} | ||
return r; | ||
} | ||
|
||
module.exports = function(){ | ||
var state = { | ||
count: 0, | ||
field: '', | ||
line: [], | ||
lastC: '', | ||
quoted: false, | ||
commented: false, | ||
buffer: null, | ||
bufferPosition: 0 | ||
} | ||
|
||
// Defined Class | ||
|
||
var CSV = function(){ | ||
// Set options | ||
this.readOptions = { | ||
flags: 'r', | ||
encoding: 'utf8', | ||
bufferSize: 8 * 1024 * 1024, | ||
separator: ',', | ||
escape: '"', | ||
quote: '"' | ||
}; | ||
this.writeOptions = { | ||
bufferSize: null, | ||
lineBreaks: null | ||
}; | ||
} | ||
CSV.prototype.__proto__ = EventEmitter.prototype; | ||
|
||
// Reading API | ||
|
||
CSV.prototype.from = function(data,options){ | ||
var self = this; | ||
process.nextTick(function(){ | ||
parse(data); | ||
finish(); | ||
}); | ||
return this; | ||
} | ||
CSV.prototype.fromStream = function(readStream, options){ | ||
if(!readStream instanceof EventEmitter) throw new Error('Invalid stream'); | ||
if(options){ | ||
merge(this.readOptions,options); | ||
} | ||
var self = this; | ||
readStream.on('data', function(data) { parse(data) }); | ||
readStream.on('error', function(error) { self.emit('error', error) }); | ||
readStream.on('end', function() { | ||
finish(); | ||
}); | ||
this.readStream = readStream; | ||
return this; | ||
} | ||
CSV.prototype.fromPath = function(path, options){ | ||
merge(this.readOptions, options); | ||
var stream = fs.createReadStream(path, this.readOptions); | ||
stream.setEncoding(this.readOptions.encoding); | ||
return this.fromStream(stream, null); | ||
} | ||
|
||
// Writting API | ||
|
||
CSV.prototype.toStream = function(writeStream, options){ | ||
if(!writeStream instanceof EventEmitter) throw new Error('Invalid stream'); | ||
var self = this; | ||
merge(this.writeOptions,options); | ||
switch(this.writeOptions.lineBreaks){ | ||
case 'auto': | ||
this.writeOptions.lineBreaks = null; | ||
break; | ||
case 'unix': | ||
this.writeOptions.lineBreaks = "\n"; | ||
break; | ||
case 'mac': | ||
this.writeOptions.lineBreaks = "\r"; | ||
break; | ||
case 'windows': | ||
this.writeOptions.lineBreaks = "\r\n"; | ||
break; | ||
case 'unicode': | ||
this.writeOptions.lineBreaks = "\u2028"; | ||
break; | ||
} | ||
writeStream.on('close', function(){ | ||
self.emit('end',state.count); | ||
}) | ||
this.writeStream = writeStream; | ||
state.buffer = new Buffer(this.writeOptions.bufferSize||this.readOptions.bufferSize); | ||
state.bufferPosition = 0; | ||
return this; | ||
} | ||
CSV.prototype.toPath = function(path, options){ | ||
merge(options, { | ||
flags: 'w', | ||
encoding: 'utf8' | ||
}); | ||
var stream = fs.createWriteStream(path, options); | ||
return this.toStream(stream, options); | ||
} | ||
|
||
// Transform API | ||
|
||
CSV.prototype.transform = function(callback){ | ||
this.transformer = callback; | ||
return this; | ||
} | ||
|
||
var csv = new CSV(); | ||
|
||
function parse(chars){ | ||
chars = ''+chars; | ||
for (var i = 0, l = chars.length; i < l; i++) { | ||
var c = chars.charAt(i); | ||
switch (c) { | ||
case csv.readOptions.escape: | ||
case csv.readOptions.quote: | ||
if( state.commented ) break; | ||
var isEscape = false; | ||
if (c === csv.readOptions.escape) { | ||
var nextChar = chars.charAt(i + 1); | ||
if (nextChar === csv.readOptions.escape || nextChar === csv.readOptions.quote) { | ||
i++; | ||
isEscape = true; | ||
c = chars.charAt(i); | ||
state.field += c; | ||
} | ||
} | ||
if (!isEscape && (c === csv.readOptions.quote)) { | ||
if (state.field && !state.quoted) { | ||
// Treat quote as a regular character | ||
state.field += c; | ||
break; | ||
} | ||
if (state.quoted) { | ||
// Make sure a closing quote is followed by a separator | ||
var nextChar = chars.charAt(i + 1); | ||
if (nextChar && nextChar != '\r' && nextChar != '\n' && nextChar !== csv.readOptions.separator) { | ||
throw new Error('Invalid closing quote; found "' + nextChar + '" instead of separator "' + csv.readOptions.separator + '"'); | ||
} | ||
state.quoted = false; | ||
} else if (state.field === '') { | ||
state.quoted = true; | ||
} | ||
} | ||
break; | ||
case csv.readOptions.separator: | ||
if( state.commented ) break; | ||
if( state.quoted ) { | ||
state.field += c; | ||
}else{ | ||
state.line.push(state.field); | ||
state.field = ''; | ||
} | ||
break; | ||
case '\n': | ||
if( !csv.readOptions.quoted && state.lastC === '\r' ){ | ||
break; | ||
} | ||
case '\r': | ||
if( csv.writeOptions.lineBreaks === null ){ | ||
csv.writeOptions.lineBreaks = c + ( c === '\r' && chars.charAt(i+1) === '\n' ? '\n' : '' ); | ||
} | ||
state.line.push(state.field); | ||
state.field = ''; | ||
flush(); | ||
break; | ||
default: | ||
if (state.commented) break; | ||
state.field += c; | ||
} | ||
state.lastC = c; | ||
} | ||
} | ||
|
||
function flush(){ | ||
var line = csv.transformer?csv.transformer(state.line,state.count):state.line; | ||
if(line !== null){ | ||
csv.emit('data',line,state.count); | ||
} | ||
state.count++; | ||
if(line !== null){ | ||
if(typeof line === 'object'){ | ||
if(line instanceof Array){ | ||
var newLine = ''; | ||
line.forEach(function(field,i){ | ||
var containsSeparator = field.indexOf(csv.writeOptions.separator||csv.readOptions.separator)>=0; | ||
var containsQuote = field.indexOf(csv.writeOptions.quote||csv.readOptions.quote)>=0; | ||
if(containsQuote){ | ||
field = field.replace(csv.writeOptions.quote||csv.readOptions.quote,(csv.writeOptions.escape||csv.readOptions.escape)+(csv.writeOptions.quote||csv.readOptions.quote)); | ||
} | ||
if(containsQuote||containsSeparator){ | ||
field = (csv.writeOptions.quote||csv.readOptions.quote)+field+(csv.writeOptions.quote||csv.readOptions.quote); | ||
} | ||
newLine += field; | ||
if(i!==line.length-1){ | ||
newLine += (csv.writeOptions.separator||csv.readOptions.separator) | ||
} | ||
}); | ||
line = newLine+csv.writeOptions.lineBreaks; | ||
}else{ | ||
|
||
} | ||
} | ||
if(state.buffer){ | ||
if(state.bufferPosition+Buffer.byteLength(line,'utf8')>csv.readOptions.bufferSize){ | ||
csv.writeStream.write(state.buffer.slice(0, state.bufferPosition)); | ||
state.buffer = new Buffer(csv.readOptions.bufferSize); | ||
state.bufferPosition = 0; | ||
} | ||
state.bufferPosition += state.buffer.write(line,state.bufferPosition,'utf8'); | ||
} | ||
} | ||
state.line = []; | ||
state.lastC = ''; | ||
} | ||
function finish(){ | ||
if (state.quoted) { | ||
csv.emit('error', new Error('Quoted field not terminated')); | ||
} else { | ||
// dump open record | ||
if (state.field) { | ||
state.line.push(state.field); | ||
state.field = ''; | ||
} | ||
if (state.line.length > 0) { | ||
flush(); | ||
} | ||
if(csv.writeStream){ | ||
csv.writeStream.write(state.buffer.slice(0, state.bufferPosition)); | ||
csv.writeStream.end(); | ||
}else{ | ||
csv.emit('end',state.count); | ||
} | ||
} | ||
} | ||
return csv; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
{ "name": "csv" | ||
, "version": "0.0.1" | ||
, "description": "CSV parser with simple api, full of options and tested against large datasets." | ||
, "author": "David Worms <david@adaltas.com>" | ||
, "main": "./lib/csv" | ||
, "engines": { "node": ">= 0.1.90" } | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
|
||
<pre> | ||
_ _ _ _____ _______ __ | ||
| \ | | | | / ____|/ ____\ \ / / | ||
| \| | ___ __| | ___ | | | (___ \ \ / / | ||
| . ` |/ _ \ / _` |/ _ \ | | \___ \ \ \/ / | ||
| |\ | (_) | (_| | __/ | |____ ____) | \ / | ||
|_| \_|\___/ \__,_|\___| \_____|_____/ \/ | ||
</pre> | ||
|
||
This project provide CSV parsing and has been tested and used on large source file (over 2Gb). | ||
|
||
- Line breaks discovery: when reading from a file, it is not required to provide line breaks, when writing to a file it will default to the same style. | ||
|
||
Quick exemple | ||
------------- | ||
|
||
Using the library is a 3 steps process where all steps are optional: | ||
|
||
1. Create a source | ||
2. Create a destination | ||
3. Transform the data | ||
|
||
var csv = require('csv-parser'); | ||
csv() | ||
.fromPath('/tmp/csv.in'); | ||
.toPath('/tmp/csv.out'); | ||
.transform(function(data){ | ||
data.unshift(data.pop()); | ||
return data; | ||
}); | ||
|
||
Creating a source | ||
----------------- | ||
|
||
Options are: | ||
|
||
The following method are available: | ||
|
||
- *fromPath* | ||
Take a file path as first argument and optionnaly on object of options as a second arguments. | ||
|
||
- *fromStream* | ||
Take a readable stream as first argument and optionnaly on object of options as a second arguments. | ||
|
||
- *fromData* | ||
Take a string, a buffer, an array or an object as first argument and optionnaly some options as a second arguments. | ||
|
||
Creating a destination | ||
---------------------- | ||
|
||
Options are: | ||
|
||
- *encoding* Default to 'utf8' | ||
- *lineBreaks* Default to 'auto', special values are 'auto', 'unix', 'mac', 'windows', 'unicode'. | ||
- *flag* Default to 'w', 'w' to create or overwrite an file, 'a' to append to a file. Apply when using the `toPath` method. | ||
- *bufferSize* Internal buffer holding data before being flush into a stream. Apply when destination is a stream. | ||
|
||
The following method are available: | ||
|
||
- *toPath* | ||
Take a file path as first argument and optionnaly on object of options as a second arguments. | ||
|
||
- *toStream* | ||
Take a readable stream as first argument and optionnaly on object of options as a second arguments. | ||
|
||
Transforming data | ||
----------------- | ||
|
||
You may provide a callback to the `transform` method. The contract is quite simple, you recieve an array of fields for each record and return the transformed record. The return value may be an array, an associative array, a string or null. If null, the record will simply be skipped. | ||
|
||
Events | ||
------ | ||
|
||
By extending the Node `EventEmitter` class, the library provide a few usefull events: | ||
|
||
- *data* (function(data, index){}) | ||
Thrown when a new row is parsed after the `transform` callback and with the data being the value returned by `transform`. Note however that the event won't be call if transform return `null` since the record is skipped. | ||
The callback provide two arguements: | ||
`data` is the CSV line being processed (by default as an array) | ||
`index` is the index number of the line starting at zero | ||
|
||
- *end* | ||
In case your redirecting the output to a file using the `toPath` method, the event will be called once the writing process is complete and the file closed. | ||
|
||
- *error* | ||
Thrown whenever an error is captured. | ||
|
||
Running the tests | ||
----------------- | ||
|
||
Tests are executed with expresso. To install it, simple use `npm install expresso`. | ||
|
||
To run the tests | ||
expresso -I lib test/* | ||
|
||
To develop with the tests watching at your changes | ||
expresso -w -I lib test/* | ||
|
||
To instrument the tests | ||
expresso -I lib --cov test/* | ||
|
||
Related projects | ||
---------------- | ||
|
||
* Pavel Kolesnikov "ya-csv": http://github.com/wdavidw/ya-csv | ||
* Chris Williams "node-csv": http://github.com/voodootikigod/node-csv | ||
|
Oops, something went wrong.