Skip to content

Commit

Permalink
String#concat for String (does not handle integers)
Browse files Browse the repository at this point in the history
  • Loading branch information
vincentisambart committed Feb 6, 2010
1 parent 4bf4f35 commit 5d26382
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 12 deletions.
2 changes: 1 addition & 1 deletion TODO.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Necessary before merging the code into MacRuby:
- String#concat
- String#encode
- String#inspect
- add NSData to the types of parameters that can be given to String#replace? (Laurent thinks it's a bad bad idea)
- maybe add NSData to the types of parameters that can be given to String#replace (Laurent thinks it's a bad idea)
- use Ruby 1.9's encoding tables for the Japanese encodings (EUC-JP, Shif_JIS, CP932, ISO-2022-JP)
- you should not be able to call Encoding#dup/clone

Expand Down
82 changes: 71 additions & 11 deletions new_string.c
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,7 @@ str_compatible_encoding(string_t *str1, string_t *str2)
if (str1->length_in_bytes == 0) {
return str2->encoding;
}
if (str1->encoding->ascii_compatible != str2->encoding->ascii_compatible) {
if (!str1->encoding->ascii_compatible || !str2->encoding->ascii_compatible) {
return NULL;
}
if (str_is_ruby_ascii_only(str1) && str_is_ruby_ascii_only(str2)) {
Expand All @@ -657,6 +657,17 @@ str_compatible_encoding(string_t *str1, string_t *str2)
return NULL;
}

static encoding_t *
str_must_have_compatible_encoding(string_t *str1, string_t *str2)
{
encoding_t *new_encoding = str_compatible_encoding(str1, str2);
if (new_encoding == NULL) {
rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
str1->encoding->public_name, str2->encoding->public_name);
}
return new_encoding;
}


static string_t *
str_alloc(void)
Expand Down Expand Up @@ -1308,13 +1319,9 @@ str_get_character_at(string_t *self, long index, bool ucs2_mode)
}

static string_t *
str_plus(string_t *str1, string_t *str2)
str_plus_string(string_t *str1, string_t *str2)
{
encoding_t *new_encoding = str_compatible_encoding(str1, str2);
if (new_encoding == NULL) {
rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
str1->encoding->public_name, str2->encoding->public_name);
}
encoding_t *new_encoding = str_must_have_compatible_encoding(str1, str2);

string_t *new_str = str_alloc();
new_str->encoding = new_encoding;
Expand Down Expand Up @@ -1350,8 +1357,49 @@ str_plus(string_t *str1, string_t *str2)
return new_str;
}

static void
str_concat_string(string_t *self, string_t *str)
{
if (str->length_in_bytes == 0) {
return;
}
if (self->length_in_bytes == 0) {
str_replace(self, (VALUE)str);
return;
}

str_must_have_compatible_encoding(self, str);

if (str_is_stored_in_uchars(self) != str_is_stored_in_uchars(str)) {
// we try not to change the internal format of self if it's possible
// (because self is generally bigger that the string concatenated)
if (str_is_stored_in_uchars(self)) {
if (!str_try_making_data_uchars(str)) {
str_make_data_binary(self);
}
}
else {
str_make_data_binary(str);
}
}

long new_length_in_bytes = self->length_in_bytes + str->length_in_bytes;
// TODO: we should maybe merge flags
// (if both are ASCII-only, the concatenation is ASCII-only,
// though I'm not sure all the tests required are worth doing)
str_unset_facultative_flags(self);
if (self->capacity_in_bytes < new_length_in_bytes) {
uint8_t *bytes = xmalloc(new_length_in_bytes);
memcpy(bytes, self->data.bytes, self->length_in_bytes);
GC_WB(&self->data.bytes, bytes);
self->capacity_in_bytes = new_length_in_bytes;
}
memcpy(self->data.bytes + self->length_in_bytes, str->data.bytes, str->length_in_bytes);
self->length_in_bytes = new_length_in_bytes;
}

static bool
str_is_equal_to_str(string_t *self, string_t *str)
str_is_equal_to_string(string_t *self, string_t *str)
{
if (self == str) {
return true;
Expand Down Expand Up @@ -1570,7 +1618,17 @@ mr_str_plus(VALUE self, SEL sel, VALUE str)
if (OBJC_CLASS(str) != rb_cMRString) {
abort(); // TODO
}
return (VALUE)str_plus(STR(self), STR(str));
return (VALUE)str_plus_string(STR(self), STR(str));
}

static VALUE
mr_str_concat(VALUE self, SEL sel, VALUE str)
{
if (OBJC_CLASS(str) != rb_cMRString) {
abort(); // TODO (should also accept integers)
}
str_concat_string(STR(self), STR(str));
return self;
}

static VALUE
Expand All @@ -1579,7 +1637,7 @@ mr_str_equal(VALUE self, SEL sel, VALUE str)
if (OBJC_CLASS(str) != rb_cMRString) {
abort(); // TODO
}
return str_is_equal_to_str(STR(self), STR(str)) ? Qtrue : Qfalse;
return str_is_equal_to_string(STR(self), STR(str)) ? Qtrue : Qfalse;
}

static VALUE
Expand All @@ -1588,7 +1646,7 @@ mr_str_not_equal(VALUE self, SEL sel, VALUE str)
if (OBJC_CLASS(str) != rb_cMRString) {
abort(); // TODO
}
return str_is_equal_to_str(STR(self), STR(str)) ? Qfalse : Qtrue;
return str_is_equal_to_string(STR(self), STR(str)) ? Qfalse : Qtrue;
}

static VALUE
Expand Down Expand Up @@ -1621,6 +1679,8 @@ Init_MRString(void)
rb_objc_define_method(rb_cMRString, "ascii_only?", mr_str_is_ascii_only, 0);
rb_objc_define_method(rb_cMRString, "[]", mr_str_aref, -1);
rb_objc_define_method(rb_cMRString, "+", mr_str_plus, 1);
rb_objc_define_method(rb_cMRString, "<<", mr_str_concat, 1);
rb_objc_define_method(rb_cMRString, "concat", mr_str_concat, 1);
rb_objc_define_method(rb_cMRString, "==", mr_str_equal, 1);
rb_objc_define_method(rb_cMRString, "!=", mr_str_not_equal, 1);

Expand Down
20 changes: 20 additions & 0 deletions test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -345,11 +345,31 @@ def assert_exception_raised(exception)
assert_equal S.new('a'), S.new('a') + S.new
assert_equal S.new, S.new + S.new

s = empty_utf8.dup
s << empty_utf16le
assert_equal E::UTF_8, s.encoding

s = empty_utf8.dup
s << bonjour_utf16le
assert_equal E::UTF_16LE, s.encoding

s = S.new('a')
old_s = s.dup
s << S.new('')
assert_equal old_s, s
s << S.new('b')
assert_equal S.new('ab'), s
old_s = s.dup
s.concat(S.new('c'))
assert_equal S.new('abc'), s
assert_not_equal old_s, s

assert_equal empty_utf8, empty_utf16le
assert_equal bonjour_utf8, bonjour_ascii
assert_not_equal bonjour_utf16le, bonjour_ascii

assert_exception_raised(Encoding::CompatibilityError) { ohayou_utf8 + ohayou_utf16le }
assert_exception_raised(Encoding::CompatibilityError) { ohayou_utf8 << ohayou_utf16le }

if $tests_failed_count == 0
puts "everything's fine"
Expand Down

0 comments on commit 5d26382

Please sign in to comment.