Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ext/strscan/extconf.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
have_func("onig_region_memsize(NULL)")
have_func("rb_reg_onig_match", "ruby/re.h")
have_func("rb_deprecate_constant")
have_func("rb_int_parse_cstr", "ruby.h") # RUBY_VERSION >= 2.5
have_func("rb_gc_location", "ruby.h") # RUBY_VERSION >= 2.7
have_const("RUBY_TYPED_EMBEDDABLE", "ruby.h") # RUBY_VERSION >= 3.3
create_makefile 'strscan'
Expand Down
6 changes: 6 additions & 0 deletions ext/strscan/lib/strscan/strscan.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
# frozen_string_literal: true

class StringScanner
unless method_defined?(:integer_at) # For JRuby
def integer_at(specifier, *to_i_args)
self[specifier]&.to_i(*to_i_args)
end
end

# :markup: markdown
#
# call-seq:
Expand Down
130 changes: 110 additions & 20 deletions ext/strscan/strscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -1689,6 +1689,38 @@ name_to_backref_number(struct re_registers *regs, VALUE regexp, const char* name
rb_long2int(name_end - name), name);
}

/*
* Resolve capture group index from Integer, Symbol, or String.
* Returns the resolved register index, or -1 if unmatched/out of range.
* For Symbol/String specifiers, raises IndexError if the named group
* does not exist.
*/
static long
resolve_capture_index(struct strscanner *p, VALUE specifier)
{
const char *name;
long i;
if (! MATCHED_P(p)) return -1;
switch (TYPE(specifier)) {
case T_SYMBOL:
specifier = rb_sym2str(specifier);
/* fall through */
case T_STRING:
RSTRING_GETMEM(specifier, name, i);
i = name_to_backref_number(&(p->regs), p->regex, name, name + i,
rb_enc_get(specifier));
break;
default:
i = NUM2LONG(specifier);
}
if (i < 0)
i += p->regs.num_regs;
if (i < 0) return -1;
if (i >= p->regs.num_regs) return -1;
if (p->regs.beg[i] == -1) return -1;
return i;
}

/*
*
* :markup: markdown
Expand Down Expand Up @@ -1763,36 +1795,93 @@ name_to_backref_number(struct re_registers *regs, VALUE regexp, const char* name
static VALUE
strscan_aref(VALUE self, VALUE idx)
{
const char *name;
struct strscanner *p;
long i;

GET_SCANNER(self, p);
if (! MATCHED_P(p)) return Qnil;

switch (TYPE(idx)) {
case T_SYMBOL:
idx = rb_sym2str(idx);
/* fall through */
case T_STRING:
RSTRING_GETMEM(idx, name, i);
i = name_to_backref_number(&(p->regs), p->regex, name, name + i, rb_enc_get(idx));
break;
default:
i = NUM2LONG(idx);
}

if (i < 0)
i += p->regs.num_regs;
if (i < 0) return Qnil;
if (i >= p->regs.num_regs) return Qnil;
if (p->regs.beg[i] == -1) return Qnil;
i = resolve_capture_index(p, idx);
if (i < 0) return Qnil;

return extract_range(p,
adjust_register_position(p, p->regs.beg[i]),
adjust_register_position(p, p->regs.end[i]));
}

/*
* :markup: markdown
*
* call-seq:
* integer_at(specifier, base=10) -> integer or nil
*
* Returns the captured substring at the given `specifier` as an Integer,
* following the behavior of `String#to_i(base)`.
*
* `specifier` can be an Integer (positive, negative, or zero), a Symbol,
* or a String for named capture groups.
*
* Returns `nil` if:
* - No match has been performed or the last match failed
* - The `specifier` is an Integer and is out of range
* - The group at `specifier` did not participate in the match
*
* Raises IndexError if `specifier` is a Symbol or String that does not
* correspond to a named capture group, consistent with
* `StringScanner#[]`.
*
* This is semantically equivalent to `self[specifier]&.to_i(base)`
* but avoids the allocation of a temporary String when possible.
*
* ```rb
* scanner = StringScanner.new("2024-06-15")
* scanner.scan(/(\d{4})-(\d{2})-(\d{2})/)
* scanner.integer_at(1) # => 2024
* scanner.integer_at(1, 16) # => 8228
* ```
*/
static VALUE
strscan_integer_at(int argc, VALUE *argv, VALUE self)
{
struct strscanner *p;
long i;
long beg, end, len;
const char *ptr;
VALUE rb_specifier;
VALUE rb_base;
int base = 10;

GET_SCANNER(self, p);
rb_scan_args(argc, argv, "11", &rb_specifier, &rb_base);
if (argc > 1)
base = NUM2INT(rb_base);
i = resolve_capture_index(p, rb_specifier);
if (i < 0)
return Qnil;

beg = adjust_register_position(p, p->regs.beg[i]);
end = adjust_register_position(p, p->regs.end[i]);
len = end - beg;
ptr = S_PBEG(p) + beg;
#ifdef HAVE_RB_INT_PARSE_CSTR
{
/*
* Ruby 2.5 or later export the rb_int_parse_cstr() symbol but
* prototype definition isn't provided. Ruby 4.1 or later
* provide prototype definition.
*/
# ifndef RB_INT_PARSE_DEFAULT
VALUE rb_int_parse_cstr(const char *str, ssize_t len, char **endp,
size_t *ndigits, int base, int flags);
# define RB_INT_PARSE_DEFAULT 0x07
# endif
char *endp;
return rb_int_parse_cstr(ptr, len, &endp, NULL, base,
RB_INT_PARSE_DEFAULT);
}
#else
return rb_str_to_inum(rb_str_new(ptr, len), base, 0);
#endif
}

/*
* :markup: markdown
* :include: strscan/link_refs.txt
Expand Down Expand Up @@ -2353,6 +2442,7 @@ Init_strscan(void)
rb_define_method(StringScanner, "matched", strscan_matched, 0);
rb_define_method(StringScanner, "matched_size", strscan_matched_size, 0);
rb_define_method(StringScanner, "[]", strscan_aref, 1);
rb_define_method(StringScanner, "integer_at", strscan_integer_at, -1);
rb_define_method(StringScanner, "pre_match", strscan_pre_match, 0);
rb_define_method(StringScanner, "post_match", strscan_post_match, 0);
rb_define_method(StringScanner, "size", strscan_size, 0);
Expand Down
6 changes: 2 additions & 4 deletions gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -3650,14 +3650,11 @@ rb_gc_copy_attributes(VALUE dest, VALUE obj)
rb_gc_impl_copy_attributes(rb_gc_get_objspace(), dest, obj);
}

#if USE_MODULAR_GC
int
rb_gc_modular_gc_loaded_p(void)
{
#if USE_MODULAR_GC
return rb_gc_functions.modular_gc_loaded_p;
#else
return false;
#endif
}

const char *
Expand All @@ -3673,6 +3670,7 @@ rb_gc_active_gc_name(void)

return gc_name;
}
#endif

struct rb_gc_object_metadata_entry *
rb_gc_object_metadata(VALUE obj)
Expand Down
2 changes: 2 additions & 0 deletions internal/gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,10 @@ void rb_gc_update_values(long n, VALUE *values);
void rb_gc_mark_set_no_pin(st_table *);
void rb_gc_update_set_refs(st_table *);

#if USE_MODULAR_GC
const char *rb_gc_active_gc_name(void);
int rb_gc_modular_gc_loaded_p(void);
#endif

RUBY_SYMBOL_EXPORT_END

Expand Down
40 changes: 40 additions & 0 deletions spec/bundler/support/rubygems_ext.rb
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,46 @@ def install_test_deps

require_relative "helpers"
Helpers.install_dev_bundler

install_vendored_compact_index
end

# Vendor `rubygems/rubygems.org#lib/compact_index/` under `tmp/compact_index/`
# so the artifice can serve compact-index responses without a runtime gem
# dependency. Pinned to a reviewed commit; override with COMPACT_INDEX_REF
# to refresh against another ref (the existing vendor copy is discarded).
def install_vendored_compact_index
target_root = Path.tmp_root.join("compact_index")
require "fileutils"
FileUtils.mkdir_p(Path.tmp_root)

files = %w[
lib/compact_index.rb
lib/compact_index/dependency.rb
lib/compact_index/gem.rb
lib/compact_index/gem_version.rb
lib/compact_index/versions_file.rb
]

# Serialize installs so parallel test setups don't race on the same
# vendor tree, and only skip the download when every file is present so
# an interrupted run can't leave a partial copy behind.
File.open(Path.tmp_root.join("compact_index.lock"), File::CREAT | File::RDWR) do |lock|
lock.flock(File::LOCK_EX)

FileUtils.rm_rf(target_root) if ENV["COMPACT_INDEX_REF"]

next if files.all? {|path| File.exist?(target_root.join(path)) }

require "open-uri"
ref = ENV["COMPACT_INDEX_REF"] || "7c68a7b39761c61a66f9299f85b889ec39afc02c"
files.each do |path|
url = "https://raw.githubusercontent.com/rubygems/rubygems.org/#{ref}/#{path}"
target = target_root.join(path)
FileUtils.mkdir_p(File.dirname(target))
File.write(target, URI.parse(url).open(&:read))
end
end
end

def check_source_control_changes(success_message:, error_message:)
Expand Down
53 changes: 53 additions & 0 deletions test/strscan/test_stringscanner.rb
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,59 @@ def test_AREF
end
end

def assert_integer_at(s, specifier, *to_i_args)
assert_equal(s[specifier]&.to_i(*to_i_args),
s.integer_at(specifier, *to_i_args))
end

def test_integer_at
s = create_string_scanner("before 20260514 after")
s.skip_until(" ")
assert_equal("20260514", s.scan(/(\d{4})(\d{2})(\d{2})/))
assert_integer_at(s, 0) # 20260514
assert_integer_at(s, 1) # 2026
assert_integer_at(s, 2) # 5
assert_integer_at(s, 3) # 14
assert_integer_at(s, 4) # nil
assert_integer_at(s, -1) # 14
assert_integer_at(s, -2) # 5
assert_integer_at(s, -3) # 2026
assert_integer_at(s, -4) # 20260514
assert_integer_at(s, -5) # nil
end

def test_integer_at_name_string
s = create_string_scanner("before 20260514 after")
s.skip_until(" ")
assert_equal("20260514", s.scan(/(?<y>\d{4})(?<m>\d{2})(?<d>\d{2})/))
assert_integer_at(s, "y")
assert_integer_at(s, "m")
assert_integer_at(s, "d")
end

def test_integer_at_name_symbol
s = create_string_scanner("before 20260514 after")
s.skip_until(" ")
assert_equal("20260514", s.scan(/(?<y>\d{4})(?<m>\d{2})(?<d>\d{2})/))
assert_integer_at(s, :y)
assert_integer_at(s, :m)
assert_integer_at(s, :d)
end

def test_integer_at_base
s = create_string_scanner("before 111 after")
s.skip_until(" ")
assert_equal("111", s.scan(/\d+/))
assert_integer_at(s, 0, 2)
end

def test_integer_at_base_auto
s = create_string_scanner("before 0xa_f after")
s.skip_until(" ")
assert_equal("0xa_f", s.scan(/0x[\h_]+/))
assert_integer_at(s, 0, 0) # 0xaf
end

def test_pre_match
s = create_string_scanner('a b c d e')
s.scan(/\w/)
Expand Down