Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge pull request #79 from xijo/handle_force_encoding
Handle force_encoding issue, according to #73
  • Loading branch information
xijo committed Oct 2, 2019
2 parents d18fd18 + a93ee2b commit d7078c4
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 8 deletions.
16 changes: 9 additions & 7 deletions lib/reverse_markdown.rb
Expand Up @@ -33,15 +33,17 @@
module ReverseMarkdown

def self.convert(input, options = {})
root = case input
when String then Nokogiri::HTML(input).root
when Nokogiri::XML::Document then input.root
when Nokogiri::XML::Node then input
end
config.with(options) do
input = cleaner.force_encoding(input.to_s)

root or return ''
root = case input
when String then Nokogiri::HTML(input).root
when Nokogiri::XML::Document then input.root
when Nokogiri::XML::Node then input
end

root or return ''

config.with(options) do
result = ReverseMarkdown::Converters.lookup(root.name).convert(root)
cleaner.tidy(result)
end
Expand Down
5 changes: 5 additions & 0 deletions lib/reverse_markdown/cleaner.rb
Expand Up @@ -59,6 +59,11 @@ def clean_punctuation_characters(string)
string.gsub(/(\*\*|~~|__)\s([\.!\?'"])/, "\\1".strip + "\\2")
end

def force_encoding(string)
ReverseMarkdown.config.force_encoding or return string
string.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
end

private

def preserve_border_whitespaces(string, options = {}, &block)
Expand Down
7 changes: 6 additions & 1 deletion lib/reverse_markdown/config.rb
@@ -1,10 +1,11 @@
module ReverseMarkdown
class Config
attr_accessor :unknown_tags, :github_flavored, :tag_border
attr_accessor :unknown_tags, :github_flavored, :tag_border, :force_encoding

def initialize
@unknown_tags = :pass_through
@github_flavored = false
@force_encoding = false
@em_delimiter = '_'.freeze
@strong_delimiter = '**'.freeze
@inline_options = {}
Expand All @@ -29,5 +30,9 @@ def github_flavored
def tag_border
@inline_options[:tag_border] || @tag_border
end

def force_encoding
@inline_options[:force_encoding] || @force_encoding
end
end
end
10 changes: 10 additions & 0 deletions spec/lib/reverse_markdown_spec.rb
Expand Up @@ -33,5 +33,15 @@
end
expect(ReverseMarkdown.config.github_flavored).to eq true
end

describe 'force_encoding option', jruby: :exclude do
it 'raises invalid byte sequence in UTF-8 exception' do
expect { ReverseMarkdown.convert("hi \255") }.to raise_error(ArgumentError)
end

it 'handles invalid byte sequence if option is set' do
expect(ReverseMarkdown.convert("hi \255", force_encoding: true)).to eq "hi\n\n"
end
end
end
end
8 changes: 8 additions & 0 deletions spec/spec_helper.rb
Expand Up @@ -14,6 +14,14 @@
config.after(:each) do
ReverseMarkdown.instance_variable_set(:@config, nil)
end

config.around(jruby: :exclude) do |example|
if RUBY_ENGINE == 'jruby'
example.metadata[:skip] = true
else
example.call
end
end
end

def node_for(html)
Expand Down

0 comments on commit d7078c4

Please sign in to comment.