From 10d86171c267084b43b4176ef56b495e12e274b8 Mon Sep 17 00:00:00 2001 From: Aaron Patterson Date: Thu, 23 Apr 2009 23:51:47 -0700 Subject: [PATCH] adding lots of documentation. closes #14 --- lib/nokogiri/html/sax/parser.rb | 26 ++++++++++++- lib/nokogiri/xml/sax/document.rb | 66 ++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 1 deletion(-) diff --git a/lib/nokogiri/html/sax/parser.rb b/lib/nokogiri/html/sax/parser.rb index 2eb038c37a..7249d5665e 100644 --- a/lib/nokogiri/html/sax/parser.rb +++ b/lib/nokogiri/html/sax/parser.rb @@ -1,7 +1,31 @@ module Nokogiri module HTML + ### + # Nokogiri lets you write a SAX parser to process HTML but get HTML + # correction features. + # + # See Nokogiri::HTML::SAX::Parser for a basic example of using a + # SAX parser with HTML. + # + # For more information on SAX parsers, see Nokogiri::XML::SAX module SAX - class Parser < XML::SAX::Parser + ### + # This class lets you perform SAX style parsing on HTML with HTML + # error correction. + # + # Here is a basic usage example: + # + # class MyDoc < Nokogiri::XML::SAX::Document + # def start_element name, attributes = [] + # puts "found a #{name}" + # end + # end + # + # parser = Nokogiri::HTML::SAX::Parser.new(MyDoc.new) + # parser.parse(File.read(ARGV[0], 'rb')) + # + # For more information on SAX parsers, see Nokogiri::XML::SAX + class Parser < Nokogiri::XML::SAX::Parser ### # Parse html stored in +data+ using +encoding+ def parse_memory data, encoding = 'UTF-8' diff --git a/lib/nokogiri/xml/sax/document.rb b/lib/nokogiri/xml/sax/document.rb index 2619985e4d..a0d4626024 100644 --- a/lib/nokogiri/xml/sax/document.rb +++ b/lib/nokogiri/xml/sax/document.rb @@ -1,6 +1,72 @@ module Nokogiri module XML + ### + # SAX Parsers are event driven parsers. Nokogiri provides two different + # event based parsers when dealing with XML. If you want to do SAX style + # parsing using HTML, check out Nokogiri::HTML::SAX. + # + # The basic way a SAX style parser works is by creating a parser, + # telling the parser about the events we're interested in, then giving + # the parser some XML to process. The parser will notify you when + # it encounters events your said you would like to know about. + # + # To register for events, you simply subclass Nokogiri::XML::SAX::Document, + # and implement the methods for which you would like notification. + # + # For example, if I want to be notified when a document ends, and when an + # element starts, I would write a class like this: + # + # class MyDocument < Nokogiri::XML::SAX::Document + # def end_document + # puts "the document has ended" + # end + # + # def start_element name, attributes = [] + # puts "#{name} started" + # end + # end + # + # Then I would instantiate a SAX parser with this document, and feed the + # parser some XML + # + # # Create a new parser + # parser = Nokogiri::XML::SAX::Parser.new(MyDocument.new) + # + # # Feed the parser some XML + # parser.parse(File.read(ARGV[0], 'rb')) + # + # Now my document handler will be called when each node starts, and when + # then document ends. To see what kinds of events are available, take + # a look at Nokogiri::XML::SAX::Document. + # + # Two SAX parsers for XML are available, a parser that reads from a string + # or IO object as it feels necessary, and a parser that lets you spoon + # feed it XML. If you want to let Nokogiri deal with reading your XML, + # use the Nokogiri::XML::SAX::Parser. If you want to have fine grain + # control over the XML input, use the Nokogiri::XML::SAX::PushParser. module SAX + ### + # This class is used for registering types of events you are interested + # in handling. All of the methods on this class are available as + # possible events while parsing an XML document. To register for any + # particular event, just subclass this class and implement the methods + # you are interested in knowing about. + # + # To only be notified about start and end element events, write a class + # like this: + # + # class MyDocument < Nokogiri::XML::SAX::Document + # def start_element name, attrs = [] + # puts "#{name} started!" + # end + # + # def end_element name + # puts "#{name} ended" + # end + # end + # + # You can use this event handler for any SAX style parser included with + # Nokogiri. See Nokogiri::XML::SAX, and Nokogiri::HTML::SAX. class Document ### # Called when document starts parsing