Tidy code and fix saving manifest

valobox · Apr 3, 2013 · e66d03d · e66d03d
1 parent 17aebf3
commit e66d03d
Show file tree

Hide file tree

Showing 24 changed files with 226 additions and 63 deletions.
diff --git a/README.md b/README.md
@@ -9,7 +9,7 @@ Library to access and modify the contents of an Epub
 
 Initialize with the path to an epub file, note any setters will edit the epub itself, so work on a copy if you don't want to modify the original
 
-    epub = Epub::File.new("9781449315306.epub")
+    epub = Epub::Document.new("9781449315306.epub")
 
 
 ### Structure
@@ -202,12 +202,12 @@ Calling `compress!` will minify all the *css* and *html* items in the epub and c
 
 If you want to extract an epub, for instance to serve the content up via a web interface you can to the following
 
-    Epub::File.extract('example.epub', '/some/directory/path')
+    Epub::Document.extract('example.epub', '/some/directory/path')
 
 
-You can also pass a block which will re-zip the epub when the block exits. The block gets passed a <#Epub::File> instance as an argument
+You can also pass a block which will re-zip the epub when the block exits. The block gets passed a <#Epub::Document> instance as an argument
 
-    Epub::File.extract('example.epub') do |epub| 
+    Epub::Document.extract('example.epub') do |epub| 
         # Do some epub processing here...
     end
 

diff --git a/lib/epub.rb b/lib/epub.rb
@@ -31,7 +31,7 @@
 
 # Epub
 require 'epub/base'
-require 'epub/file'
+require 'epub/document'
 require 'epub/guide'
 require 'epub/manifest'
 require 'epub/metadata'

diff --git a/lib/epub/file.rb → lib/epub/document.rb b/lib/epub/file.rb → lib/epub/document.rb
@@ -1,9 +1,10 @@
 module Epub
-  class File < Base
+  class Document < Base
 
     # @private
     attr_accessor :file, :path, :opf_xml
 
+
     # @param [String] path to an epub file, path can be either:
     #   * Directory of an extracted Epub
     #   * Epub file
@@ -22,11 +23,11 @@ def initialize(path)
     # @overload extract(filepath)
     #   Unzips an Epub and Rezips it after the block exits
     #   @param [String] path to the Epub
-    #   @yield [Epub::File, epub_filepath] 
+    #   @yield [Epub::Document, epub_filepath] 
     # @overload extract(filepath, extract_path)
     #   @param [String] path to the Epub
     #   @param [String] directory path to extract to
-    def self.extract(filepath, extract_path=nil)
+    def self.extract(filepath, extract_path = nil)
       if block_given?
         Dir.mktmpdir do |outdir|
           ZipFile.unzip(filepath, outdir)
@@ -41,6 +42,21 @@ def self.extract(filepath, extract_path=nil)
     end
 
 
+    def standardize!
+      begin
+        # Standardize the urls
+        toc.standardize!
+        guide.standardize!
+        manifest.standardize!
+
+      rescue => ex
+        log "failed to standardize\n #{ex.to_s}"
+        raise ex
+      end
+
+    end
+
+
     # Flattens the directory structure, for example this:
     #
     #  /
@@ -87,10 +103,8 @@ def normalize!
       begin
         create_base_directories!
 
-        # Standardize the urls
-        toc.standardize!
-        guide.standardize!
-        manifest.standardize!
+        # Ensure all files are properly formatted
+        standardize!
 
         # normalize the files
         toc.normalize!
@@ -126,51 +140,54 @@ def manifest
       @manifest ||= Manifest.new self
     end
 
+
     # Epub metadata accessor
     # @return [Epub::Metadata]
     def metadata
       @metadata ||= Metadata.new self
     end
 
+
     # Epub guide accessor
     # @return [Epub::Guide]
     def guide
       @guide ||= Guide.new self
     end
 
+
     # Epub spine accessor
     # @return [Epub::Spine]
     def spine
       @spine ||= Spine.new self
     end
 
+
     # Epub toc accessor
     # @return [Epub::Toc]
     def toc
       spine.toc
     end
 
+
     # Save a partial opf
     def save_opf!(doc_partial, xpath)
       log "saving updated opf"
 
-      doc = opf_xml
-
       # Find where we're inseting into
-      node = doc.xpath(xpath, 'xmlns' => 'http://www.idpf.org/2007/opf').first
+      node = opf_xml.xpath(xpath, 'xmlns' => 'http://www.idpf.org/2007/opf').first
 
       if node
         # Because of <https://github.com/tenderlove/nokogiri/issues/391> we
         # create the new doc before we insert, else we get a default namespace
         # prefix
         doc_partial = Nokogiri::XML(doc_partial.to_s)
         node.replace(doc_partial.root)
-        
-        data = doc.to_s
+
+        data = opf_xml.to_s
 
         file.write(opf_path, data)
 
-        @opf_xml = doc
+        opf_xml
       end
 
     end
@@ -213,6 +230,7 @@ def to_s
       ret
     end
 
+
     # Add a line to the log file
     # @return boolean of write success
     def log(str, level = :log)
@@ -242,16 +260,19 @@ def log_path
         "log.txt"
       end
 
+
       def log_present?
         @file.exists?(log_path)
       end
 
+
       def initialize_log
         unless log_present?
           @file.write(log_path, "")
         end
       end
 
+
       def build_file
         case type
         when :zip
@@ -326,6 +347,7 @@ def report_error(str)
         @errors << str
       end
 
+
       def report
         {
           processing_time: @start_time - Time.now,

diff --git a/lib/epub/guide.rb b/lib/epub/guide.rb
@@ -2,7 +2,7 @@ module Epub
   class Guide < Base
     include PathManipulation
 
-    # @param [Epub::File]
+    # @param [Epub::Document]
     def initialize(epub)
       @epub = epub
     end
@@ -17,7 +17,7 @@ def standardize!
 
     # Normalizes the guide by flattening the file paths
     # 
-    # @see Epub::File#normalize!
+    # @see Epub::Document#normalize!
     def normalize!
       normalize
       save

diff --git a/lib/epub/item.rb b/lib/epub/item.rb
@@ -28,26 +28,31 @@ def read_xml
       @epub.file.read_xml(abs_filepath)
     end
 
+
     # TODO: Should be overidden by image to read binary data
     def read
       @epub.file.read(abs_filepath)
     end
 
+
     def write(data)
       @epub.file.write(abs_filepath, data)
     end
 
+
     # Extract file to the _path_ specified
     # @param [String] path
     def extract(path)
       @epub.file.extract(abs_filepath, path)
     end
 
+
     # Boolean of if the file this item represents exists
     def exists?
       ::File.exists?(abs_filepath)
     end
 
+
     # Saves the item back to the epub
     def save
       write(to_s)
@@ -63,32 +68,38 @@ def filename
       unescape_path ::File.basename(filepath)
     end
 
+
     def filename_without_ext
       ext = ::File.extname(filepath)
       ::File.basename(filepath, ext)
     end
 
+
     # Path relative to the Epubs opf file
     def filepath
       unescape_path url
     end
 
+
     # Path absolute to the Epubs base directory, this will be different
-    # depending on the Epub type @see Epub::File.type
+    # depending on the Epub type @see Epub::Document.type
     # * *zip:* Root of the zip filesystem
     # * *directory:* Root relative to base epub directory
     def abs_filepath
       unescape_path abs_url
     end
 
+
     def url
       escape_url @epub.manifest.path_from_id(@id)
     end
 
+
     def abs_url
       escape_url @epub.manifest.abs_path_from_id(@id)
     end
 
+
     # Get an item based on the path from this item
     # TODO: Might need to escape URL
     def get_item(path_to_file)
@@ -102,13 +113,14 @@ def get_item(path_to_file)
       # Get the absolute path to the file from the epub item
       abs_path_to_file = abs_path_to_file(path_to_file)
 
-      begin
-        @epub.manifest.item_for_path( abs_path_to_file )
-      rescue
+      if item_in_manifest = @epub.manifest.item_for_path( abs_path_to_file )
+        item_in_manifest
+      else
         @epub.manifest.add( abs_path_to_file )
       end
     end
 
+
     # returns the full path to an item after it is hashed
     # /html/chapters/1.html #=> /html/a42901.html
     # @options
@@ -120,50 +132,59 @@ def normalized_hashed_path(options = {})
       unescape_path normalized_hashed_url options
     end
 
+
     def normalized_hashed_url(options = {})
       escape_url relative_path(abs_normalized_hashed_path, options[:relative_to])
     end
 
+
     # Standardizes the contents of the item, _overidden by subclasses_
     def standardize!; end
 
+
     # Flattens the epub structure, _overidden by subclasses_
-    # @see Epub::File#normalize!
+    # @see Epub::Document#normalize!
     def normalize!; end
 
+
     # Compress item data, _overidden by subclasses_
     def compress!; end
 
+
     def create_manifest_entry(href)
       @epub.manifest.add( abs_path_to_file(href) )
     end
 
+
     def log(*args)
       @epub.log(args)
     end
 
+
     # create an absolute path to a file #=> 'OEBPS/HTML/file.html' + '../CSS/style.css' = 'OEBPS/CSS/style.css' 
     def abs_path_to_file(path_to_file)
       clean_path(base_dirname, path_to_file)
     end
 
-
     private
 
       def base_dirname
         Pathname.new(filepath).dirname.to_s
       end
 
+
       # The hashed filename
       # /html/chapters/1.html #=> a42901-1.html
       def hashed_filename
         "#{hash_path(abs_filepath)}-#{filename_without_ext}#{file_ext}"
       end
 
+
       def file_ext
         @file_ext_overide || ::File.extname(abs_filepath)
       end
 
+
       def abs_normalized_hashed_path
         ::File.join(@normalized_dir, hashed_filename)
       end