Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Merge pull request #7 from ssoper/master

Node 0.8
  • Loading branch information...
commit 01c7ff651fba5eefbc1695e90dca4e12a1244f78 2 parents ac2e136 + fa0ecb3
@voodootikigod authored
View
6 Makefile
@@ -0,0 +1,6 @@
+TESTS = $(shell find test -name "*test.js")
+
+test:
+ node $(TESTS)
+
+.PHONY: test
View
18 README.md
@@ -6,11 +6,17 @@ A CSV parser for node.js that will parse provided text data or stream parse a fi
API
---
-`var csv = require('./lib/csv');`
+ var csv = require('node-csv');
Imports the CSV parser into your code for use under the variable named 'csv'. Use this to access the functions within the CSV parser.
-`csv.each('file.csv')`
+ lines = 0;
+ csv.each('file.csv').on('data', function(data) {
+ lines++;
+ }).on('end', function() {
+ console.log(lines + ' lines parsed');
+ })
+
Opens and stream parses a CSV file provided by the first argument value. The second argument can be an _options_ object with the following attributes:
@@ -18,6 +24,10 @@ Opens and stream parses a CSV file provided by the first argument value. The sec
* _strDelimiter_: String delimiter that is used to separate data values within the file. Default is ",", but can be any value.
* _readAmount_: Number of bytes to read on each cycle. The smaller the value, the faster the 'data' event is emitted, but it will be more processor and memory intensive. Default is recommended.
-`csv.parse("a,b,c,e,f\nk,l,m,s,d", function(data) { sys.puts(data); })`
+`csv.parse("a,b,c,e,f\nk,l,m,s,d", function(data) { console.log(data); })`
+
+Parses a string CSV value and emits 'data' events. Second argument must either be a callback function that will be passed the parsed rows OR an options object with the same attributes as above. If the options object is provided, the third argument must be the callback function.
-Parses a string CSV value and emits 'data' events. Second argument must either be a callback function that will be passed the parsed rows OR an options object with the same attributes as above. If the options object is provided, the third argument must be the callback function.
+Contributors
+---
+- [Sean Soper](https://github.com/ssoper) ([@ssoper](https://twittr.com/ssoper))
View
198 index.js
@@ -1,2 +1,196 @@
-// For your convenience
-process.mixin(exports, require('./lib/csv'));
+// CSV parser for node.js that handles all standard CSV parsing and
+// returns the data elements in a single 'data' event emmitter.
+// Currently the only exported function is each(filename, option),
+// where filename is the file to process and options can have any
+// of the following:
+//
+// strDelimiter: The string to use for delimiting data elements.
+// headers: If the first line of the file represents headers.
+// Setting this will convert the translated array into
+// an object with the headers as attributes and the
+// values assigned.
+// readAmount: Number of bytes to read before parsing and processing.
+//
+//
+//
+//
+var fs = require("fs"),
+ util = require("util"),
+ events = require("events");
+
+
+// CSVToArray Parsing function from http://www.bennadel.com/blog/1504-Ask-Ben-Parsing-CSV-Strings-With-Javascript-Exec-Regular-Expression-Command.htm
+// This will parse a delimited string into an array of
+// arrays. The default delimiter is the comma, but this
+// can be overriden in the second argument.
+function CSVToArray( strData, strDelimiter ){
+ // Check to see if the delimiter is defined. If not,
+ // then default to comma.
+ strDelimiter = (strDelimiter || ",");
+
+ // Create a regular expression to parse the CSV values.
+ var objPattern = new RegExp(
+ (
+ // Delimiters.
+ "(\\" + strDelimiter + "|\\r?\\n|\\r|^)" +
+
+ // Quoted fields.
+ "(?:\"([^\"]*(?:\"\"[^\"]*)*)\"|" +
+
+ // Standard fields.
+ "([^\"\\" + strDelimiter + "\\r\\n]*))"
+ ),
+ "gi"
+ );
+
+
+ // Create an array to hold our data. Give the array
+ // a default empty first row.
+ var arrData = [[]];
+ // Create an array to hold our individual pattern
+ // matching groups.
+ var arrMatches = null;
+ // Keep looping over the regular expression matches
+ // until we can no longer find a match.
+ while (arrMatches = objPattern.exec( strData )){
+ // Get the delimiter that was found.
+ var strMatchedDelimiter = arrMatches[ 1 ];
+ // Check to see if the given delimiter has a length
+ // (is not the start of string) and if it matches
+ // field delimiter. If id does not, then we know
+ // that this delimiter is a row delimiter.
+ if (
+ strMatchedDelimiter.length &&
+ (strMatchedDelimiter != strDelimiter)
+ ){
+
+ // Since we have reached a new row of data,
+ // add an empty row to our data array.
+ arrData.push( [] );
+ }
+
+
+ // Now that we have our delimiter out of the way,
+ // let's check to see which kind of value we
+ // captured (quoted or unquoted).
+ if (arrMatches[ 2 ]){
+
+ // We found a quoted value. When we capture
+ // this value, unescape any double quotes.
+ var strMatchedValue = arrMatches[ 2 ].replace(
+ new RegExp( "\"\"", "g" ),
+ "\""
+ );
+
+ } else {
+
+ // We found a non-quoted value.
+ var strMatchedValue = arrMatches[ 3 ];
+
+ }
+
+
+ // Now that we have our value string, let's add
+ // it to the data array.
+ arrData[ arrData.length - 1 ].push( strMatchedValue );
+ }
+
+ // Return the parsed data.
+ return( arrData );
+}
+
+exports.each = function (filename, options) {
+ options = (options || {});
+ var strDelimiter = (options.strDelimiter || ",");
+ var headers = null;
+ var position = 0;
+ var readAmount = (options.readAmount || (16384));
+ var buffer = "";
+ var fd = null;
+ var stream = new events.EventEmitter();
+
+ var emit_row = function(row) {
+ var data = CSVToArray(row)[0];
+ if (options["headers"]) {
+ if (headers == null) {
+ headers = data;
+ } else {
+ var obj = {};
+ data.forEach(function(d,i) { obj[headers[i]] = d; });
+ data = obj;
+ }
+ }
+ stream.emit("data", data);
+ }
+
+ var readMore = function() {
+ fs.read(fd, readAmount, position, "utf8", function(err, data, bytesRead) {
+ if (err) {
+ console.log("E1");
+ stream.emit("error",e);
+ fs.close(fd);
+ } else {
+ position += bytesRead;
+ buffer += data;
+ var parts = buffer.split("\n");
+ var pl = parts.length;
+ if (pl > 1) {
+ for( var i = 0; i < (pl - 1); i++) {
+ emit_row(parts[i]);
+ }
+ buffer = parts[pl-1];
+ }
+ if (bytesRead == readAmount) {
+ readMore();
+ } else {
+ if (buffer.length > 0) {
+ emit_row(buffer);
+ }
+ fs.close(fd);
+ stream.emit("end")
+ }
+ }
+ });
+ };
+
+ fs.open(filename, 'r', function (err, _fd) {
+ if (err) {
+ console.log("Could not open the file: "+filename);
+ } else if (_fd) {
+ fd = _fd;
+ readMore();
+ }
+ });
+
+ return stream;
+};
+
+exports.parse = function(str, options, data_listener) {
+
+ if (typeof options == "function") {
+ data_listener = options;
+ options = {};
+ }
+ if (typeof data_listener != "function") {
+ throw new Exception("Data listener must be provided");
+ }
+ options = (options || {});
+ var stream = new events.EventEmitter();
+ stream.addListener("data", data_listener);
+ var parts = str.split("\n");
+ var pl = parts.length;
+ var headers = null;
+ for( var i = 0; i < pl; i++) {
+ var data = CSVToArray(parts[i])[0];
+ if (options["headers"]) {
+ if (headers == null) {
+ headers = data;
+ } else {
+ var obj = {};
+ data.forEach(function(d,i) { obj[headers[i]] = d; });
+ data = obj;
+ }
+ }
+ stream.emit('data', data);
+ }
+};
View
202 lib/csv.js
@@ -1,202 +0,0 @@
-// CSV parser for node.js that handles all standard CSV parsing and
-// returns the data elements in a single 'data' event emmitter.
-// Currently the only exported function is each(filename, option),
-// where filename is the file to process and options can have any
-// of the following:
-//
-// strDelimiter: The string to use for delimiting data elements.
-// headers: If the first line of the file represents headers.
-// Setting this will convert the translated array into
-// an object with the headers as attributes and the
-// values assigned.
-// readAmount: Number of bytes to read before parsing and processing.
-//
-//
-//
-//
-var fs = require("fs"),
- sys = require("sys"),
- events = require("events");
-
-
-// CSVToArray Parsing function from http://www.bennadel.com/blog/1504-Ask-Ben-Parsing-CSV-Strings-With-Javascript-Exec-Regular-Expression-Command.htm
-// This will parse a delimited string into an array of
-// arrays. The default delimiter is the comma, but this
-// can be overriden in the second argument.
-function CSVToArray( strData, strDelimiter ){
- // Check to see if the delimiter is defined. If not,
- // then default to comma.
- strDelimiter = (strDelimiter || ",");
-
- // Create a regular expression to parse the CSV values.
- var objPattern = new RegExp(
- (
- // Delimiters.
- "(\\" + strDelimiter + "|\\r?\\n|\\r|^)" +
-
- // Quoted fields.
- "(?:\"([^\"]*(?:\"\"[^\"]*)*)\"|" +
-
- // Standard fields.
- "([^\"\\" + strDelimiter + "\\r\\n]*))"
- ),
- "gi"
- );
-
-
- // Create an array to hold our data. Give the array
- // a default empty first row.
- var arrData = [[]];
- // Create an array to hold our individual pattern
- // matching groups.
- var arrMatches = null;
- // Keep looping over the regular expression matches
- // until we can no longer find a match.
- while (arrMatches = objPattern.exec( strData )){
- // Get the delimiter that was found.
- var strMatchedDelimiter = arrMatches[ 1 ];
- // Check to see if the given delimiter has a length
- // (is not the start of string) and if it matches
- // field delimiter. If id does not, then we know
- // that this delimiter is a row delimiter.
- if (
- strMatchedDelimiter.length &&
- (strMatchedDelimiter != strDelimiter)
- ){
-
- // Since we have reached a new row of data,
- // add an empty row to our data array.
- arrData.push( [] );
- }
-
-
- // Now that we have our delimiter out of the way,
- // let's check to see which kind of value we
- // captured (quoted or unquoted).
- if (arrMatches[ 2 ]){
-
- // We found a quoted value. When we capture
- // this value, unescape any double quotes.
- var strMatchedValue = arrMatches[ 2 ].replace(
- new RegExp( "\"\"", "g" ),
- "\""
- );
-
- } else {
-
- // We found a non-quoted value.
- var strMatchedValue = arrMatches[ 3 ];
-
- }
-
-
- // Now that we have our value string, let's add
- // it to the data array.
- arrData[ arrData.length - 1 ].push( strMatchedValue );
- }
-
- // Return the parsed data.
- return( arrData );
-}
-
-
-
-
-
-
-
-exports.each = function (filename, options) {
- options = (options || {});
- var strDelimiter = (options.strDelimiter || ",");
- var headers = null;
- var position = 0;
- var readAmount = (options.readAmount || (16384));
- var buffer = "";
- var fd = null;
- var stream = new events.EventEmitter();
-
- var emit_row = function(row) {
- var data = CSVToArray(row)[0];
- if (options["headers"]) {
- if (headers == null) {
- headers = data;
- } else {
- var obj = {};
- data.forEach(function(d,i) { obj[headers[i]] = d; });
- data = obj;
- }
- }
- stream.emit("data", data);
- }
-
- var readMore = function() {
- fs.read(fd, readAmount, position, "utf8", function(err, data, bytesRead) {
- if (err) {
- sys.puts("E1");
- stream.emit("error",e);
- fs.close(fd);
- } else {
- position += bytesRead;
- buffer += data;
- var parts = buffer.split("\n");
- var pl = parts.length;
- if (pl > 1) {
- for( var i = 0; i < (pl - 1); i++) {
- emit_row(parts[i]);
- }
- buffer = parts[pl-1];
- }
- if (bytesRead == readAmount) {
- readMore();
- } else {
- if (buffer.length > 0) {
- emit_row(buffer);
- }
- fs.close(fd);
- stream.emit("end")
- }
- }
- });
- };
-
- fs.open(filename, process.O_RDONLY, 0666, function (err, _fd) {
- if (err) {
- sys.puts("Could not open the file: "+filename);
- } else if (_fd) {
- fd = _fd;
- readMore();
- }
- });
-
- return stream;
-};
-
-exports.parse = function(str, options, data_listener) {
-
- if (typeof options == "function") {
- data_listener = options;
- options = {};
- }
- if (typeof data_listener != "function") {
- throw new Exception("Data listener must be provided");
- }
- options = (options || {});
- var stream = new events.EventEmitter();
- stream.addListener("data", data_listener);
- var parts = str.split("\n");
- var pl = parts.length;
- var headers = null;
- for( var i = 0; i < pl; i++) {
- var data = CSVToArray(parts[i])[0];
- if (options["headers"]) {
- if (headers == null) {
- headers = data;
- } else {
- var obj = {};
- data.forEach(function(d,i) { obj[headers[i]] = d; });
- data = obj;
- }
- }
- stream.emit('data', data);
- }
-};
View
26 package.json
@@ -0,0 +1,26 @@
+{
+ "name": "node-csv",
+ "version": "0.5.0",
+ "description": "Efficient Evented CSV Parsing in node.js",
+ "keywords": [
+ "csv",
+ "parse"
+ ],
+ "author": {
+ "name": "Chris Williams",
+ "email": "chris@iterativedesigns.com"
+ },
+ "contributors": [
+ {
+ "name": "Sean Soper",
+ "email": "sean.soper@gmail.com"
+ }
+ ],
+ "dependencies": {},
+ "main": "index",
+ "devDependencies": {},
+ "optionalDependencies": {},
+ "engines": {
+ "node": ">= 0.8"
+ }
+}
View
23 test/each_test.js
@@ -1,18 +1,9 @@
-var helper = require('./test_helper'),
- sys = require("sys"),
- csv = require("../lib/csv");
+var assert = require('assert'),å
+ csv = require("../index");
-
-helper.testCase("Each Tests", {
- testParseMultiLine: function(test) {
- var count = 0;
- csv.each("basic_sample.csv").addListener("data", function(data) {
- count += 1;
- }).addListener("complete", function() {
- test.assertTrue(count == 1800);
- });
-
-
- }
+var count = 0;
+csv.each(__dirname + '/basic_sample.csv').on("data", function(data) {
+ count += 1;
+}).on("complete", function() {
+ assert.equal(count, 1800);
});
-
View
35 test/parse_test.js
@@ -1,27 +1,16 @@
-var helper = require('./test_helper'),
- sys = require("sys"),
- csv = require("../lib/csv");
+var assert = require('assert'),
+ csv = require("../index");
-
-helper.testCase("Parse Tests", {
- testParseSingleLine: function(test) {
- var parser = csv.parse("a,test,of,csv,parsing,for,me", function(data) {
- test.assertArrayEquals(["a", "test", "of", "csv", "parsing", "for", "me"], data);
- });
- },
+csv.parse("a,test,of,csv,parsing,for,me", function(data) {
+ assert.equal(["a", "test", "of", "csv", "parsing", "for", "me"], data);
+});
- testParseMultiLine: function(test) {
- var count = 0;
- var parser = csv.parse("a,test,of,csv,parsing,for,me\na,test,of,csv,parsing,for,me", function(data) {
- count += 1
- });
- test.assertTrue(count == 2);
- },
-
- testParseComplexCSV: function(test) {
- var parser = csv.parse("a,\"test of the power\",234,3.1444", function(data) {
- test.assertArrayEquals(["a", "test of the power", 234, 3.1444], data);
- });
- }
+var count = 0;
+csv.parse("a,test,of,csv,parsing,for,me\na,test,of,csv,parsing,for,me", function(data) {
+ count++;
+ count == 2 && assert.equal(count, 2);
});
+csv.parse("a,\"test of the power\",234,3.1444", function(data) {
+ assert.equal(["a", "test of the power", 234, 3.1444], data);
+});
View
49 test/test_helper.js
@@ -1,49 +0,0 @@
-var test = require('mjsunit'),
- sys = require('sys');
-
-exports.testCase = function(caseName, tests) {
- var testCount = 0, passes = 0, fails = 0;
-
- function wrapAssertions(name) {
- var assertions = {};
-
- [
- 'assertEquals',
- 'assertArrayEquals',
- 'assertTrue',
- 'assertFalse',
- 'assertNaN',
- 'assertThrows',
- 'AssertInstanceOf',
- 'assertDoesNotThrow',
- 'assertUnreachable'
- ].forEach(function(assert) {
- assertions[assert] = function() {
- testCount++;
- try {
- test[assert].apply(this, arguments);
- passes++;
- } catch(e) {
- sys.puts(name + ': ' + e);
- fails++;
- }
- }
- });
-
- return assertions;
- }
-
-
- for (var name in tests) {
- if (name.match(/^test/)) {
- tests[name](wrapAssertions(name));
- }
- }
-
- process.addListener('exit', function() {
- var passFail = (testCount == passes) ? ' \033[0;32mGOOD!\033[1;37m' : ' \033[0;31mBAD!\033[1;37m';
- sys.puts(caseName + " - Assertions: " + testCount + " Passed: " + passes + " Failed: " + fails + passFail);
- });
-}
-
-
Please sign in to comment.
Something went wrong with that request. Please try again.