Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Being serious about UTF-8 compatibility #28

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 27 additions & 8 deletions ext/rinku/rinku.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@
#include <ruby/encoding.h>
#else
#define rb_enc_copy(dst, src)
#define rb_enc_str_new(dst, src, enc) rb_str_new(dst, src)
#define rb_enc_asciicompat(str1) 1
#define rb_enc_get(str) 1
#define rb_enc_set_index(str, idx)
#define rb_enc_get_index(str) 1
#endif

#include "autolink.h"
Expand Down Expand Up @@ -74,7 +79,7 @@ static const char *g_hrefs[] = {
};

static void
autolink__print(struct buf *ob, const struct buf *link, void *payload)
autolink__print(struct buf *ob, const struct buf *link, void *payload, int enc_index)
{
bufput(ob, link->data, link->size);
}
Expand Down Expand Up @@ -191,7 +196,8 @@ rinku_autolink(
unsigned int flags,
const char *link_attr,
const char **skip_tags,
void (*link_text_cb)(struct buf *ob, const struct buf *link, void *payload),
void (*link_text_cb)(struct buf *ob, const struct buf *link, void *payload, int enc_index),
int enc_index,
void *payload)
{
size_t i, end;
Expand Down Expand Up @@ -267,7 +273,7 @@ rinku_autolink(
BUFPUTSL(ob, "\">");
}

link_text_cb(ob, link, payload);
link_text_cb(ob, link, payload, enc_index);
BUFPUTSL(ob, "</a>");

link_count++;
Expand All @@ -286,11 +292,22 @@ rinku_autolink(
/**
* Ruby code
*/

static void
autolink_callback(struct buf *link_text, const struct buf *link, void *block)
check_utf8(VALUE str)
{
if(!rb_enc_asciicompat(rb_enc_get(str))) {
rb_raise(rb_eArgError,
"Invalid encoding");
}
}

static void
autolink_callback(struct buf *link_text, const struct buf *link, void *block, int enc_index)
{
VALUE rb_link, rb_link_text;
rb_link = rb_str_new(link->data, link->size);
rb_enc_set_index(rb_link, enc_index);
rb_link_text = rb_funcall((VALUE)block, rb_intern("call"), 1, rb_link);
Check_Type(rb_link_text, T_STRING);
bufput(link_text, RSTRING_PTR(rb_link_text), RSTRING_LEN(rb_link_text));
Expand Down Expand Up @@ -346,8 +363,8 @@ const char **rinku_load_tags(VALUE rb_skip)
* HTML, Rinku is smart enough to skip the links that are already enclosed in `<a>`
* tags.`
*
* - `mode` is a symbol, either `:all`, `:urls` or `:email_addresses`,
* which specifies which kind of links will be auto-linked.
* - `mode` is a symbol, either `:all`, `:urls` or `:email_addresses`,
* which specifies which kind of links will be auto-linked.
*
* - `link_attr` is a string containing the link attributes for each link that
* will be generated. These attributes are not sanitized and will be include as-is
Expand Down Expand Up @@ -392,9 +409,10 @@ rb_rinku_autolink(int argc, VALUE *argv, VALUE self)
ID mode_sym;

rb_scan_args(argc, argv, "14&", &rb_text, &rb_mode,
&rb_html, &rb_skip, &rb_flags, &rb_block);
&rb_html, &rb_skip, &rb_flags, &rb_block);

Check_Type(rb_text, T_STRING);
check_utf8(rb_text);

if (!NIL_P(rb_mode)) {
Check_Type(rb_mode, T_SYMBOL);
Expand Down Expand Up @@ -434,6 +452,7 @@ rb_rinku_autolink(int argc, VALUE *argv, VALUE self)
rb_raise(rb_eTypeError,
"Invalid linking mode (possible values are :all, :urls, :email_addresses)");


count = rinku_autolink(
output_buf,
RSTRING_PTR(rb_text),
Expand All @@ -443,6 +462,7 @@ rb_rinku_autolink(int argc, VALUE *argv, VALUE self)
link_attr,
skip_tags,
RTEST(rb_block) ? &autolink_callback : NULL,
rb_enc_get_index(rb_text),
(void*)rb_block);

if (count == 0)
Expand All @@ -465,4 +485,3 @@ void RUBY_EXPORT Init_rinku()
rb_define_method(rb_mRinku, "auto_link", rb_rinku_autolink, -1);
rb_define_const(rb_mRinku, "AUTOLINK_SHORT_DOMAINS", INT2FIX(SD_AUTOLINK_SHORT_DOMAINS));
}

34 changes: 32 additions & 2 deletions test/autolink_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_global_skip_tags
Rinku.skip_tags = nil
assert_not_equal Rinku.auto_link(url), url
end

def test_auto_link_with_single_trailing_punctuation_and_space
url = "http://www.youtube.com"
url_result = generate_result(url)
Expand Down Expand Up @@ -138,7 +138,7 @@ def test_auto_link_at_eol
url2 = "http://www.ruby-doc.org/core/Bar.html"

assert_equal %(<p><a href="#{url1}">#{url1}</a><br /><a href="#{url2}">#{url2}</a><br /></p>), Rinku.auto_link("<p>#{url1}<br />#{url2}<br /></p>")
end
end

def test_block
link = Rinku.auto_link("Find ur favorite pokeman @ http://www.pokemon.com") do |url|
Expand Down Expand Up @@ -285,6 +285,36 @@ def test_copies_source_encoding
ret = Rinku.auto_link str
assert_equal str.encoding, ret.encoding
end

def test_block_encoding
url = "http://example.com/х"
assert_equal "UTF-8", url.encoding.to_s

link = Rinku.auto_link(url) do |u|
assert_equal "UTF-8", u.encoding.to_s
u
end

assert_equal link.encoding.to_s, "UTF-8"

url = "http://www.bash.org"
url.encode! 'binary'

link = Rinku.auto_link(url) do |u|
assert_equal url.encoding.to_s, u.encoding.to_s
u
end

assert_equal url.encoding, link.encoding

end

def test_bad_encoding
url = "http://example.com/ümlaut".encode("UTF-16")
assert_raise ArgumentError do
Rinku.auto_link(url)
end
end
end

def generate_result(link_text, href = nil)
Expand Down