Skip to content
Newer
Older
100755 91 lines (84 sloc) 2.26 KB
2a94b75 @zetaben Correct loading path
authored Nov 29, 2011
1 #!/usr/bin/env ruby
e0754b2 @zetaben Use bundler
authored Nov 24, 2011
2 require 'rubygems'
603b710 @zetaben try to use standard structure to ease loading
authored Feb 23, 2012
3
d98335d @zetaben debug Between, add optparse, new depd htmlentities
authored May 21, 2009
4 require 'optparse'
a387279 @zetaben HTML eating unit first version (able to delete unw
authored Apr 16, 2009
5 require 'open-uri'
0059b13 @zetaben Using cross platform solution for preview
authored May 24, 2009
6 require 'tmpdir'
7 require 'launchy'
f3c917a Adding a cache file to improve speed
zeta authored May 21, 2010
8 require 'digest/md5'
603b710 @zetaben try to use standard structure to ease loading
authored Feb 23, 2012
9 require 'Html2Feedbooks'
a387279 @zetaben HTML eating unit first version (able to delete unw
authored Apr 16, 2009
10
11 include HTML2FB
12
d98335d @zetaben debug Between, add optparse, new depd htmlentities
authored May 21, 2009
13 options = {}
14 options[:conf] = "conf.yaml"
15 options[:preview] = true
181fed8 @zetaben Adding an heuristic to autoconvert non UTF-8 files
authored Sep 23, 2009
16 options[:conv] = true
d98335d @zetaben debug Between, add optparse, new depd htmlentities
authored May 21, 2009
17 OptionParser.new do |opts|
18 opts.banner = "Usage: html2fb [options] URL"
19
20 opts.on("-c", "--conf FILE", String,"Configuration file") do |f|
21 options[:conf] = f
22 end
23 opts.on("-s", "-s","Send to feedbooks") do |f|
24 options[:preview] = !f
25 end
181fed8 @zetaben Adding an heuristic to autoconvert non UTF-8 files
authored Sep 23, 2009
26 opts.on("-nc", "--no-conv","No charset conversion") do |f|
27 options[:conv] = !f
28 end
f3c917a Adding a cache file to improve speed
zeta authored May 20, 2010
29 opts.on("-C", "--cache", String,"Configuration file") do |f|
30 options[:cache] = !f
31 end
d98335d @zetaben debug Between, add optparse, new depd htmlentities
authored May 21, 2009
32 end.parse!
a387279 @zetaben HTML eating unit first version (able to delete unw
authored Apr 16, 2009
33 valid=false
34 entry=ARGV[0]
f3c917a Adding a cache file to improve speed
zeta authored May 20, 2010
35 basedir=Dir.tmpdir+'/'
a387279 @zetaben HTML eating unit first version (able to delete unw
authored Apr 16, 2009
36 while !valid
37 url=nil
38 begin
39 url=Downloader.valid_url?(entry)
40 valid=true
41 rescue Exception => e
42 STDERR.puts 'Invalid URL' unless entry.nil? || entry==''
43 valid=false
44 puts e
45 end
16d2f50 @zetaben Html parsing v0 ok
authored Apr 25, 2009
46 print "URL : " if entry.nil? || entry==''
a387279 @zetaben HTML eating unit first version (able to delete unw
authored Apr 16, 2009
47 entry=STDIN.readline.strip unless valid
48 end
181fed8 @zetaben Adding an heuristic to autoconvert non UTF-8 files
authored Sep 23, 2009
49 conf=Conf.new(options[:conf],options[:conv])
f3c917a Adding a cache file to improve speed
zeta authored May 20, 2010
50 abridged_conf=conf.to_h.reject{|k,v| k=='fb'}
a387279 @zetaben HTML eating unit first version (able to delete unw
authored Apr 16, 2009
51 content=Downloader.download(url)
f3c917a Adding a cache file to improve speed
zeta authored May 20, 2010
52 cache={}
53 ok=false
54 if options[:cache] && File.exists?(basedir+'.cache')
55 cache=Marshal.restore(File.open(basedir+'.cache','r'))
56 ok=Digest::MD5.hexdigest(content)==Digest::MD5.hexdigest(cache[:content])
57 abridged_conf.each do |k,v|
5ff1f73 @zetaben Force hpricot version to be 0.8.1
authored Aug 18, 2010
58 # puts (abridged_conf[k]==cache[:conf][k]).inspect
59 # puts (abridged_conf[k]).inspect
60 # puts (cache[:conf][k]).inspect
61 # puts "-_-_-_-_"
f3c917a Adding a cache file to improve speed
zeta authored May 20, 2010
62 ok&&=abridged_conf[k]==cache[:conf][k]
63 end
64 end
16d2f50 @zetaben Html parsing v0 ok
authored Apr 25, 2009
65 #puts content.size
f3c917a Adding a cache file to improve speed
zeta authored May 20, 2010
66 if options[:cache] && ok
5ff1f73 @zetaben Force hpricot version to be 0.8.1
authored Aug 18, 2010
67 puts "Using cache file"
68 doc=cache[:doc]
f3c917a Adding a cache file to improve speed
zeta authored May 20, 2010
69 else
5ff1f73 @zetaben Force hpricot version to be 0.8.1
authored Aug 18, 2010
70 doc=Parser.new(conf).parse(content)
f3c917a Adding a cache file to improve speed
zeta authored May 20, 2010
71 end
5ff1f73 @zetaben Force hpricot version to be 0.8.1
authored Aug 18, 2010
72
f3c917a Adding a cache file to improve speed
zeta authored May 20, 2010
73 File.open(basedir+'.cache','w') do |e|
5ff1f73 @zetaben Force hpricot version to be 0.8.1
authored Aug 18, 2010
74 Marshal.dump({:url => url,:conf => abridged_conf, :content => content, :doc => doc},e)
f3c917a Adding a cache file to improve speed
zeta authored May 20, 2010
75 end
5ff1f73 @zetaben Force hpricot version to be 0.8.1
authored Aug 18, 2010
76 puts "Writing cache File "
77
a387279 @zetaben HTML eating unit first version (able to delete unw
authored Apr 16, 2009
78 puts doc.toc.to_yaml
d98335d @zetaben debug Between, add optparse, new depd htmlentities
authored May 21, 2009
79 if options[:preview]
a9288a6 @zetaben Correcting url#to_s bug
authored May 25, 2009
80 page=File.join(Dir.tmpdir(),Digest::MD5.hexdigest(url.to_s))+'.html'
0059b13 @zetaben Using cross platform solution for preview
authored May 24, 2009
81 f=File.open(page,'w')
d98335d @zetaben debug Between, add optparse, new depd htmlentities
authored May 21, 2009
82 f.write doc.to_html
83 f.close
f25a785 @zetaben fixing bug on Windows, adding message
authored May 28, 2009
84 puts "A preview of the parsed file should be opening in your webbrowser now"
3e4a391 @zetaben Updating message
authored May 28, 2009
85 puts "If nothing open you can open the file located at : #{page}"
f25a785 @zetaben fixing bug on Windows, adding message
authored May 28, 2009
86 puts "When happy with the parsed output rerun with -s option to send to Feedbooks.com"
e7ebb06 @zetaben Use launchy 2.0
authored Nov 24, 2011
87 Launchy.open(page)
d98335d @zetaben debug Between, add optparse, new depd htmlentities
authored May 21, 2009
88 else
5ff1f73 @zetaben Force hpricot version to be 0.8.1
authored Aug 18, 2010
89 doc.to_feedbooks(conf)
d98335d @zetaben debug Between, add optparse, new depd htmlentities
authored May 21, 2009
90 end
Something went wrong with that request. Please try again.