Permalink
Browse files

Merge pull request #7 from ssoper/master

Node 0.8
  • Loading branch information...
2 parents ac2e136 + fa0ecb3 commit 01c7ff651fba5eefbc1695e90dca4e12a1244f78 @voodootikigod committed May 22, 2013
Showing with 261 additions and 296 deletions.
  1. +6 −0 Makefile
  2. +14 −4 README.md
  3. +196 −2 index.js
  4. +0 −202 lib/csv.js
  5. +26 −0 package.json
  6. +7 −16 test/each_test.js
  7. +12 −23 test/parse_test.js
  8. +0 −49 test/test_helper.js
View
@@ -0,0 +1,6 @@
+TESTS = $(shell find test -name "*test.js")
+
+test:
+ node $(TESTS)
+
+.PHONY: test
View
@@ -6,18 +6,28 @@ A CSV parser for node.js that will parse provided text data or stream parse a fi
API
---
-`var csv = require('./lib/csv');`
+ var csv = require('node-csv');
Imports the CSV parser into your code for use under the variable named 'csv'. Use this to access the functions within the CSV parser.
-`csv.each('file.csv')`
+ lines = 0;
+ csv.each('file.csv').on('data', function(data) {
+ lines++;
+ }).on('end', function() {
+ console.log(lines + ' lines parsed');
+ })
+
Opens and stream parses a CSV file provided by the first argument value. The second argument can be an _options_ object with the following attributes:
* _headers_: true or false values expected, if true the first row of the provided file will be used as headers and not emitted as data. Furthermore all other rows within the CSV will be converted from array objects to new objects with attributes identified in the headers row.
* _strDelimiter_: String delimiter that is used to separate data values within the file. Default is ",", but can be any value.
* _readAmount_: Number of bytes to read on each cycle. The smaller the value, the faster the 'data' event is emitted, but it will be more processor and memory intensive. Default is recommended.
-`csv.parse("a,b,c,e,f\nk,l,m,s,d", function(data) { sys.puts(data); })`
+`csv.parse("a,b,c,e,f\nk,l,m,s,d", function(data) { console.log(data); })`
+
+Parses a string CSV value and emits 'data' events. Second argument must either be a callback function that will be passed the parsed rows OR an options object with the same attributes as above. If the options object is provided, the third argument must be the callback function.
-Parses a string CSV value and emits 'data' events. Second argument must either be a callback function that will be passed the parsed rows OR an options object with the same attributes as above. If the options object is provided, the third argument must be the callback function.
+Contributors
+---
+- [Sean Soper](https://github.com/ssoper) ([@ssoper](https://twittr.com/ssoper))
View
198 index.js
@@ -1,2 +1,196 @@
-// For your convenience
-process.mixin(exports, require('./lib/csv'));
+// CSV parser for node.js that handles all standard CSV parsing and
+// returns the data elements in a single 'data' event emmitter.
+// Currently the only exported function is each(filename, option),
+// where filename is the file to process and options can have any
+// of the following:
+//
+// strDelimiter: The string to use for delimiting data elements.
+// headers: If the first line of the file represents headers.
+// Setting this will convert the translated array into
+// an object with the headers as attributes and the
+// values assigned.
+// readAmount: Number of bytes to read before parsing and processing.
+//
+//
+//
+//
+var fs = require("fs"),
+ util = require("util"),
+ events = require("events");
+
+
+// CSVToArray Parsing function from http://www.bennadel.com/blog/1504-Ask-Ben-Parsing-CSV-Strings-With-Javascript-Exec-Regular-Expression-Command.htm
+// This will parse a delimited string into an array of
+// arrays. The default delimiter is the comma, but this
+// can be overriden in the second argument.
+function CSVToArray( strData, strDelimiter ){
+ // Check to see if the delimiter is defined. If not,
+ // then default to comma.
+ strDelimiter = (strDelimiter || ",");
+
+ // Create a regular expression to parse the CSV values.
+ var objPattern = new RegExp(
+ (
+ // Delimiters.
+ "(\\" + strDelimiter + "|\\r?\\n|\\r|^)" +
+
+ // Quoted fields.
+ "(?:\"([^\"]*(?:\"\"[^\"]*)*)\"|" +
+
+ // Standard fields.
+ "([^\"\\" + strDelimiter + "\\r\\n]*))"
+ ),
+ "gi"
+ );
+
+
+ // Create an array to hold our data. Give the array
+ // a default empty first row.
+ var arrData = [[]];
+ // Create an array to hold our individual pattern
+ // matching groups.
+ var arrMatches = null;
+ // Keep looping over the regular expression matches
+ // until we can no longer find a match.
+ while (arrMatches = objPattern.exec( strData )){
+ // Get the delimiter that was found.
+ var strMatchedDelimiter = arrMatches[ 1 ];
+ // Check to see if the given delimiter has a length
+ // (is not the start of string) and if it matches
+ // field delimiter. If id does not, then we know
+ // that this delimiter is a row delimiter.
+ if (
+ strMatchedDelimiter.length &&
+ (strMatchedDelimiter != strDelimiter)
+ ){
+
+ // Since we have reached a new row of data,
+ // add an empty row to our data array.
+ arrData.push( [] );
+ }
+
+
+ // Now that we have our delimiter out of the way,
+ // let's check to see which kind of value we
+ // captured (quoted or unquoted).
+ if (arrMatches[ 2 ]){
+
+ // We found a quoted value. When we capture
+ // this value, unescape any double quotes.
+ var strMatchedValue = arrMatches[ 2 ].replace(
+ new RegExp( "\"\"", "g" ),
+ "\""
+ );
+
+ } else {
+
+ // We found a non-quoted value.
+ var strMatchedValue = arrMatches[ 3 ];
+
+ }
+
+
+ // Now that we have our value string, let's add
+ // it to the data array.
+ arrData[ arrData.length - 1 ].push( strMatchedValue );
+ }
+
+ // Return the parsed data.
+ return( arrData );
+}
+
+exports.each = function (filename, options) {
+ options = (options || {});
+ var strDelimiter = (options.strDelimiter || ",");
+ var headers = null;
+ var position = 0;
+ var readAmount = (options.readAmount || (16384));
+ var buffer = "";
+ var fd = null;
+ var stream = new events.EventEmitter();
+
+ var emit_row = function(row) {
+ var data = CSVToArray(row)[0];
+ if (options["headers"]) {
+ if (headers == null) {
+ headers = data;
+ } else {
+ var obj = {};
+ data.forEach(function(d,i) { obj[headers[i]] = d; });
+ data = obj;
+ }
+ }
+ stream.emit("data", data);
+ }
+
+ var readMore = function() {
+ fs.read(fd, readAmount, position, "utf8", function(err, data, bytesRead) {
+ if (err) {
+ console.log("E1");
+ stream.emit("error",e);
+ fs.close(fd);
+ } else {
+ position += bytesRead;
+ buffer += data;
+ var parts = buffer.split("\n");
+ var pl = parts.length;
+ if (pl > 1) {
+ for( var i = 0; i < (pl - 1); i++) {
+ emit_row(parts[i]);
+ }
+ buffer = parts[pl-1];
+ }
+ if (bytesRead == readAmount) {
+ readMore();
+ } else {
+ if (buffer.length > 0) {
+ emit_row(buffer);
+ }
+ fs.close(fd);
+ stream.emit("end")
+ }
+ }
+ });
+ };
+
+ fs.open(filename, 'r', function (err, _fd) {
+ if (err) {
+ console.log("Could not open the file: "+filename);
+ } else if (_fd) {
+ fd = _fd;
+ readMore();
+ }
+ });
+
+ return stream;
+};
+
+exports.parse = function(str, options, data_listener) {
+
+ if (typeof options == "function") {
+ data_listener = options;
+ options = {};
+ }
+ if (typeof data_listener != "function") {
+ throw new Exception("Data listener must be provided");
+ }
+ options = (options || {});
+ var stream = new events.EventEmitter();
+ stream.addListener("data", data_listener);
+ var parts = str.split("\n");
+ var pl = parts.length;
+ var headers = null;
+ for( var i = 0; i < pl; i++) {
+ var data = CSVToArray(parts[i])[0];
+ if (options["headers"]) {
+ if (headers == null) {
+ headers = data;
+ } else {
+ var obj = {};
+ data.forEach(function(d,i) { obj[headers[i]] = d; });
+ data = obj;
+ }
+ }
+ stream.emit('data', data);
+ }
+};
Oops, something went wrong.

0 comments on commit 01c7ff6

Please sign in to comment.