diff --git a/src/bosh-request-parser.js b/src/bosh-request-parser.js index 8bebfbf..e925a20 100644 --- a/src/bosh-request-parser.js +++ b/src/bosh-request-parser.js @@ -23,34 +23,55 @@ * */ -var ltx = require('ltx'); -var util = require('util'); -var dutil = require('./dutil.js'); -var expat = require('node-expat'); +var ltx = require('ltx'); +var util = require('util'); +var dutil = require('./dutil.js'); +var expat = require('node-expat'); +var assert = require('assert').ok; +var path = require('path'); + +var filename = "[" + path.basename(path.normalize(__filename)) + "]"; +var log = require('./log.js').getLogger(filename); function BoshRequestParser() { this._parser = new expat.Parser('UTF-8'); - this._parser.parse(""); - - this._started = false; - this.parsedBody = null; - - this._parser.on("text", this._handle_text.bind(this)); - this._parser.on("endElement", this._handle_end_element.bind(this)); - this._parser.on("entityDecl", this._handle_entity_decl.bind(this)); - this._parser.on("startElement", this._handle_start_element.bind(this)); + this.init_state_(); } dutil.copy(BoshRequestParser.prototype, { + /* Initialize the internal state (variables) of the parser */ + init_state_: function() { + this._parser.removeAllListeners(); + this._parser.parse(""); + + this._started = false; + this.parsedBody = null; + if (this.hasOwnProperty('stanza')) { + delete this.stanza; + } + + // Always attach handlers after starting the tag. + this._parser.on("text", this._handle_text.bind(this)); + this._parser.on("endElement", this._handle_end_element.bind(this)); + this._parser.on("entityDecl", this._handle_entity_decl.bind(this)); + this._parser.on("startElement", this._handle_start_element.bind(this)); + }, + + /* Reset the underlying expat parser and internal state. Do NOT + * call this method after calling end() on the parser. + */ + reset: function() { + log.debug("Reseting parser state"); + this._parser.reset(); + this.init_state_(); + }, + _handle_start_element: function(name, attrs) { if (!this._started) { - if (name.substr(0, 5) === "DUMMY") { - this._started = true; - } else { - this.end(); - return; - } - } + // The first node MUST be . + assert(name === 'DUMMY'); + this._started = true; + } var stanza = new ltx.Element(name, attrs); if (this.stanza) { @@ -63,14 +84,20 @@ dutil.copy(BoshRequestParser.prototype, { _handle_end_element: function(name, attrs) { if (this.stanza) { if (this.stanza.parent) { + // Expat has already verified that the closing tag + // matches the corresponding opening tag, so we need + // not check that again. this.stanza = this.stanza.parent; } else { this.parsedBody = this.stanza; delete this.stanza; } } else { - // This happens at times. - this.end(); + // The user tried to close the top level tag. We + // set this.parsedBody to null to indicate that we + // encountered a parsing error. We don't do anything else + // since the caller will reset() the parser. + this.parsedBody = null; } }, @@ -82,20 +109,32 @@ dutil.copy(BoshRequestParser.prototype, { }, _handle_entity_decl: function() { - this.end(); + // this.end(); + // We ignore all entity declarations. + this.reset(); }, + /* parse() may be passed incomplete stanzas, but finally a check + * is made to see if parsedBody is non-null. If it is, we reset + * the parser. + */ parse: function(data) { + this.parsedBody = null; if (this._parser && !this._parser.parse(data)) { - this.end(); + // this.end(); + this.reset(); return false; } else if (!this._parser) { + // end() was called on this parser already. return false; } return true; }, + /* Ends parsing and destroys the underlying parser. Do NOT call + * any other method on this object after calling end(). + */ end: function() { if (this._parser) { this._parser.stop(); diff --git a/src/http-server.js b/src/http-server.js index 39fddee..472d28d 100644 --- a/src/http-server.js +++ b/src/http-server.js @@ -58,27 +58,42 @@ function HTTPServer(port, host, stat_func, bosh_request_handler, http_error_hand // the request. // var i; - var randInt = String(Math.floor(Math.random() * 1000000)) - bosh_request_parser.parse(''); + bosh_request_parser.parse(''); for (i = 0; i < buffers.length; i++) { // log.trace("Request fragment: %s", buffers[i]); valid_request = bosh_request_parser.parse(buffers[i]); - if (!valid_request) return null; + if (!valid_request) { + bosh_request_parser.reset(); + return null; + } } - valid_request = bosh_request_parser.parse(''); + valid_request = bosh_request_parser.parse(''); - if (valid_request && bosh_request_parser.parsedBody - && bosh_request_parser.parsedBody.getChild('body')) { - var bodyTag = bosh_request_parser.parsedBody.getChild('body'); - bodyTag.parent = null; - return bodyTag; + if (valid_request && bosh_request_parser.parsedBody) { + if (bosh_request_parser.parsedBody.getChild('body')) { + var bodyTag = bosh_request_parser.parsedBody.getChild('body'); + bodyTag.parent = null; + return bodyTag; + } else { + // We don't reset the parser if we got a valid + // bodyTag, but didn't get a child element in + // the wrapper tag since the parser state + // isn't corrupted. + return null; + } } else { - bosh_request_parser = new BoshRequestParser(); + // We reset the parser state if we either got a 'false' + // return from the parse() method or if the bodyTag is + // absent because the bodyTag could be absent due to an + // unclosed tag (which might occur due to malicious + // input). Reseting the parser state clears out the + // currently being processed stanza. + bosh_request_parser.reset(); return null; } } - + // All request handlers return 'false' on successful handling // of the request and 'undefined' if they did NOT handle the // request. This is according to the EventPipe listeners API