Skip to content

Commit

Permalink
Refactor parser compile functions
Browse files Browse the repository at this point in the history
Refactor parser compile functions to reduce the dependence
on ruby functions.
This commit includes these changes

1. Refactor `gets`, `input` and `gets_` of `parser_params`

Parser needs two different data structure to get next line, function (`gets`) and input data (`input`).
However `gets_` is used for both function (`call`) and input data (`ptr`).
`call` is used for managing general callback function when `rb_ruby_parser_compile_generic` is used.
`ptr` is used for managing the current pointer on String when `parser_compile_string` is used.
This commit changes parser to used only `gets` and `input` then removes `gets_`.

2. Move parser_compile functions and `gets` functions from parse.y to ruby_parser.c

This change reduces the dependence on ruby functions from parser.

3. Change ruby_parser and ripper to take care of `VALUE input` GC mark

Move the responsibility of calling `rb_gc_mark` for `VALUE input` from parser to ruby_parser and ripper.
`input` is arbitrary data pointer from the viewpoint of parser.

4. Introduce rb_parser_compile_array function

Caller of `rb_parser_compile_generic` needs to take care about GC because ruby_parser doesn’t know
about the detail of `lex_gets` and `input`.
Introduce `rb_parser_compile_array` to reduce the complexity of ast.c.
  • Loading branch information
yui-knk committed Apr 22, 2024
1 parent f87c216 commit 65339ab
Show file tree
Hide file tree
Showing 8 changed files with 243 additions and 122 deletions.
15 changes: 1 addition & 14 deletions ast.c
Original file line number Diff line number Diff line change
Expand Up @@ -128,19 +128,6 @@ rb_ast_parse_file(VALUE path, VALUE keep_script_lines, VALUE error_tolerant, VAL
return ast_parse_done(ast);
}

static VALUE
lex_array(VALUE array, int index)
{
VALUE str = rb_ary_entry(array, index);
if (!NIL_P(str)) {
StringValue(str);
if (!rb_enc_asciicompat(rb_enc_get(str))) {
rb_raise(rb_eArgError, "invalid source encoding");
}
}
return str;
}

static VALUE
rb_ast_parse_array(VALUE array, VALUE keep_script_lines, VALUE error_tolerant, VALUE keep_tokens)
{
Expand All @@ -151,7 +138,7 @@ rb_ast_parse_array(VALUE array, VALUE keep_script_lines, VALUE error_tolerant, V
if (RTEST(keep_script_lines)) rb_parser_set_script_lines(vparser);
if (RTEST(error_tolerant)) rb_parser_error_tolerant(vparser);
if (RTEST(keep_tokens)) rb_parser_keep_tokens(vparser);
ast = rb_parser_compile_generic(vparser, lex_array, Qnil, array, 1);
ast = rb_parser_compile_array(vparser, Qnil, array, 1);
return ast_parse_done(ast);
}

Expand Down
4 changes: 4 additions & 0 deletions common.mk
Original file line number Diff line number Diff line change
Expand Up @@ -3316,6 +3316,7 @@ compile.$(OBJEXT): $(top_srcdir)/internal/imemo.h
compile.$(OBJEXT): $(top_srcdir)/internal/io.h
compile.$(OBJEXT): $(top_srcdir)/internal/numeric.h
compile.$(OBJEXT): $(top_srcdir)/internal/object.h
compile.$(OBJEXT): $(top_srcdir)/internal/parse.h
compile.$(OBJEXT): $(top_srcdir)/internal/rational.h
compile.$(OBJEXT): $(top_srcdir)/internal/re.h
compile.$(OBJEXT): $(top_srcdir)/internal/ruby_parser.h
Expand Down Expand Up @@ -7494,6 +7495,7 @@ goruby.$(OBJEXT): $(top_srcdir)/internal/fixnum.h
goruby.$(OBJEXT): $(top_srcdir)/internal/gc.h
goruby.$(OBJEXT): $(top_srcdir)/internal/imemo.h
goruby.$(OBJEXT): $(top_srcdir)/internal/numeric.h
goruby.$(OBJEXT): $(top_srcdir)/internal/parse.h
goruby.$(OBJEXT): $(top_srcdir)/internal/rational.h
goruby.$(OBJEXT): $(top_srcdir)/internal/ruby_parser.h
goruby.$(OBJEXT): $(top_srcdir)/internal/sanitizers.h
Expand Down Expand Up @@ -10361,6 +10363,7 @@ miniinit.$(OBJEXT): $(top_srcdir)/internal/fixnum.h
miniinit.$(OBJEXT): $(top_srcdir)/internal/gc.h
miniinit.$(OBJEXT): $(top_srcdir)/internal/imemo.h
miniinit.$(OBJEXT): $(top_srcdir)/internal/numeric.h
miniinit.$(OBJEXT): $(top_srcdir)/internal/parse.h
miniinit.$(OBJEXT): $(top_srcdir)/internal/rational.h
miniinit.$(OBJEXT): $(top_srcdir)/internal/ruby_parser.h
miniinit.$(OBJEXT): $(top_srcdir)/internal/sanitizers.h
Expand Down Expand Up @@ -10820,6 +10823,7 @@ node_dump.$(OBJEXT): $(top_srcdir)/internal/gc.h
node_dump.$(OBJEXT): $(top_srcdir)/internal/hash.h
node_dump.$(OBJEXT): $(top_srcdir)/internal/imemo.h
node_dump.$(OBJEXT): $(top_srcdir)/internal/numeric.h
node_dump.$(OBJEXT): $(top_srcdir)/internal/parse.h
node_dump.$(OBJEXT): $(top_srcdir)/internal/rational.h
node_dump.$(OBJEXT): $(top_srcdir)/internal/ruby_parser.h
node_dump.$(OBJEXT): $(top_srcdir)/internal/sanitizers.h
Expand Down
59 changes: 52 additions & 7 deletions ext/ripper/ripper_init.c.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,40 @@

ID id_warn, id_warning, id_gets, id_assoc;

enum lex_type {
lex_type_str,
lex_type_io,
lex_type_generic,
};

struct ripper {
rb_parser_t *p;
enum lex_type type;
union {
struct lex_pointer_string ptr_str;
VALUE val;
} data;
};

static void
ripper_parser_mark2(void *ptr)
{
struct ripper *r = (struct ripper*)ptr;
if (r->p) ripper_parser_mark(r->p);
if (r->p) {
ripper_parser_mark(r->p);

switch (r->type) {
case lex_type_str:
rb_gc_mark(r->data.ptr_str.str);
break;
case lex_type_io:
rb_gc_mark(r->data.val);
break;
case lex_type_generic:
rb_gc_mark(r->data.val);
break;
}
}
}

static void
Expand Down Expand Up @@ -54,8 +79,9 @@ static const rb_data_type_t parser_data_type = {
};

static VALUE
ripper_lex_get_generic(struct parser_params *p, VALUE src)
ripper_lex_get_generic(struct parser_params *p, rb_parser_input_data input, int line_count)
{
VALUE src = (VALUE)input;
VALUE line = rb_funcallv_public(src, id_gets, 0, 0);
if (!NIL_P(line) && !RB_TYPE_P(line, T_STRING)) {
rb_raise(rb_eTypeError,
Expand All @@ -79,11 +105,18 @@ ripper_compile_error(struct parser_params *p, const char *fmt, ...)
}

static VALUE
ripper_lex_io_get(struct parser_params *p, VALUE src)
ripper_lex_io_get(struct parser_params *p, rb_parser_input_data input, int line_count)
{
VALUE src = (VALUE)input;
return rb_io_gets(src);
}

static VALUE
ripper_lex_get_str(struct parser_params *p, rb_parser_input_data input, int line_count)
{
return rb_parser_lex_get_str((struct lex_pointer_string *)input);
}

static VALUE
ripper_s_allocate(VALUE klass)
{
Expand Down Expand Up @@ -294,26 +327,38 @@ parser_dedent_string(VALUE self, VALUE input, VALUE width)
static VALUE
ripper_initialize(int argc, VALUE *argv, VALUE self)
{
struct ripper *r;
struct parser_params *p;
VALUE src, fname, lineno;
VALUE (*gets)(struct parser_params*,VALUE);
VALUE input, sourcefile_string;
rb_parser_lex_gets_func *gets;
VALUE sourcefile_string;
const char *sourcefile;
int sourceline;
rb_parser_input_data input;

p = ripper_parser_params(self, false);
TypedData_Get_Struct(self, struct ripper, &parser_data_type, r);
rb_scan_args(argc, argv, "12", &src, &fname, &lineno);
if (RB_TYPE_P(src, T_FILE)) {
gets = ripper_lex_io_get;
r->type = lex_type_io;
r->data.val = src;
input = (rb_parser_input_data)src;
}
else if (rb_respond_to(src, id_gets)) {
gets = ripper_lex_get_generic;
r->type = lex_type_generic;
r->data.val = src;
input = (rb_parser_input_data)src;
}
else {
StringValue(src);
gets = rb_ruby_ripper_lex_get_str;
gets = ripper_lex_get_str;
r->type = lex_type_str;
r->data.ptr_str.str = src;
r->data.ptr_str.ptr = 0;
input = (rb_parser_input_data)&r->data.ptr_str;
}
input = src;
if (NIL_P(fname)) {
fname = STR_NEW2("(ripper)");
OBJ_FREEZE(fname);
Expand Down
8 changes: 3 additions & 5 deletions internal/parse.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,9 @@ void rb_ruby_parser_set_options(rb_parser_t *p, int print, int loop, int chomp,
rb_parser_t *rb_ruby_parser_set_context(rb_parser_t *p, const struct rb_iseq_struct *base, int main);
void rb_ruby_parser_set_script_lines(rb_parser_t *p);
void rb_ruby_parser_error_tolerant(rb_parser_t *p);
rb_ast_t* rb_ruby_parser_compile_file_path(rb_parser_t *p, VALUE fname, VALUE file, int start);
void rb_ruby_parser_keep_tokens(rb_parser_t *p);
rb_ast_t* rb_ruby_parser_compile_generic(rb_parser_t *p, VALUE (*lex_gets)(VALUE, int), VALUE fname, VALUE input, int start);
rb_ast_t* rb_ruby_parser_compile_string_path(rb_parser_t *p, VALUE f, VALUE s, int line);
typedef VALUE (rb_parser_lex_gets_func)(struct parser_params*, rb_parser_input_data, int);
rb_ast_t *rb_parser_compile(rb_parser_t *p, rb_parser_lex_gets_func *gets, VALUE fname, rb_parser_input_data input, int line);

RUBY_SYMBOL_EXPORT_BEGIN

Expand Down Expand Up @@ -91,15 +90,14 @@ VALUE rb_ruby_parser_debug_output(rb_parser_t *p);
void rb_ruby_parser_set_debug_output(rb_parser_t *p, VALUE output);
VALUE rb_ruby_parser_parsing_thread(rb_parser_t *p);
void rb_ruby_parser_set_parsing_thread(rb_parser_t *p, VALUE parsing_thread);
void rb_ruby_parser_ripper_initialize(rb_parser_t *p, VALUE (*gets)(struct parser_params*,VALUE), VALUE input, VALUE sourcefile_string, const char *sourcefile, int sourceline);
void rb_ruby_parser_ripper_initialize(rb_parser_t *p, rb_parser_lex_gets_func *gets, rb_parser_input_data input, VALUE sourcefile_string, const char *sourcefile, int sourceline);
VALUE rb_ruby_parser_result(rb_parser_t *p);
rb_encoding *rb_ruby_parser_enc(rb_parser_t *p);
VALUE rb_ruby_parser_ruby_sourcefile_string(rb_parser_t *p);
int rb_ruby_parser_ruby_sourceline(rb_parser_t *p);
int rb_ruby_parser_lex_state(rb_parser_t *p);
void rb_ruby_ripper_parse0(rb_parser_t *p);
int rb_ruby_ripper_dedent_string(rb_parser_t *p, VALUE string, int width);
VALUE rb_ruby_ripper_lex_get_str(rb_parser_t *p, VALUE s);
int rb_ruby_ripper_initialized_p(rb_parser_t *p);
void rb_ruby_ripper_parser_initialize(rb_parser_t *p);
long rb_ruby_ripper_column(rb_parser_t *p);
Expand Down
10 changes: 9 additions & 1 deletion internal/ruby_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,16 @@
#include "internal/bignum.h"
#include "internal/compilers.h"
#include "internal/complex.h"
#include "internal/parse.h"
#include "internal/rational.h"
#include "rubyparser.h"
#include "vm.h"

struct lex_pointer_string {
VALUE str;
long ptr;
};

RUBY_SYMBOL_EXPORT_BEGIN
#ifdef UNIVERSAL_PARSER
const rb_parser_config_t *rb_ruby_parser_config(void);
Expand All @@ -19,6 +25,7 @@ VALUE rb_parser_new(void);
rb_ast_t *rb_parser_compile_string_path(VALUE vparser, VALUE fname, VALUE src, int line);
VALUE rb_str_new_parser_string(rb_parser_string_t *str);
VALUE rb_str_new_mutable_parser_string(rb_parser_string_t *str);
VALUE rb_parser_lex_get_str(struct lex_pointer_string *ptr_str);

VALUE rb_node_str_string_val(const NODE *);
VALUE rb_node_sym_string_val(const NODE *);
Expand Down Expand Up @@ -48,7 +55,8 @@ void rb_parser_keep_tokens(VALUE vparser);

rb_ast_t *rb_parser_compile_string(VALUE, const char*, VALUE, int);
rb_ast_t *rb_parser_compile_file_path(VALUE vparser, VALUE fname, VALUE input, int line);
rb_ast_t *rb_parser_compile_generic(VALUE vparser, VALUE (*lex_gets)(VALUE, int), VALUE fname, VALUE input, int line);
rb_ast_t *rb_parser_compile_generic(VALUE vparser, rb_parser_lex_gets_func *lex_gets, VALUE fname, VALUE input, int line);
rb_ast_t *rb_parser_compile_array(VALUE vparser, VALUE fname, VALUE array, int start);

enum lex_state_bits {
EXPR_BEG_bit, /* ignore newline, +/- is a sign. */
Expand Down
93 changes: 7 additions & 86 deletions parse.y
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ syntax_error_new(void)
static NODE *reg_named_capture_assign(struct parser_params* p, VALUE regexp, const YYLTYPE *loc);

#define compile_callback rb_suppress_tracing
VALUE rb_io_gets_internal(VALUE io);
#endif /* !UNIVERSAL_PARSER */

static int rb_parser_string_hash_cmp(rb_parser_string_t *str1, rb_parser_string_t *str2);
Expand Down Expand Up @@ -490,19 +489,15 @@ struct parser_params {

struct {
rb_strterm_t *strterm;
VALUE (*gets)(struct parser_params*,VALUE);
VALUE input;
VALUE (*gets)(struct parser_params*,rb_parser_input_data,int);
rb_parser_input_data input;
parser_string_buffer_t string_buffer;
rb_parser_string_t *lastline;
rb_parser_string_t *nextline;
const char *pbeg;
const char *pcur;
const char *pend;
const char *ptok;
union {
long ptr;
VALUE (*call)(VALUE, int);
} gets_;
enum lex_state_e state;
/* track the nest level of any parens "()[]{}" */
int paren_nest;
Expand Down Expand Up @@ -7806,31 +7801,11 @@ must_be_ascii_compatible(struct parser_params *p, VALUE s)
return enc;
}

static VALUE
lex_get_str(struct parser_params *p, VALUE s)
{
char *beg, *end, *start;
long len;

beg = RSTRING_PTR(s);
len = RSTRING_LEN(s);
start = beg;
if (p->lex.gets_.ptr) {
if (len == p->lex.gets_.ptr) return Qnil;
beg += p->lex.gets_.ptr;
len -= p->lex.gets_.ptr;
}
end = memchr(beg, '\n', len);
if (end) len = ++end - beg;
p->lex.gets_.ptr += len;
return rb_str_subseq(s, beg - start, len);
}

static rb_parser_string_t *
lex_getline(struct parser_params *p)
{
rb_parser_string_t *str;
VALUE line = (*p->lex.gets)(p, p->lex.input);
VALUE line = (*p->lex.gets)(p, p->lex.input, p->line_count);
if (NIL_P(line)) return 0;
must_be_ascii_compatible(p, line);
p->line_count++;
Expand All @@ -7840,61 +7815,14 @@ lex_getline(struct parser_params *p)
}

#ifndef RIPPER
static rb_ast_t*
parser_compile_string(rb_parser_t *p, VALUE fname, VALUE s, int line)
{
p->lex.gets = lex_get_str;
p->lex.gets_.ptr = 0;
p->lex.input = rb_str_new_frozen(s);
p->lex.pbeg = p->lex.pcur = p->lex.pend = 0;

return yycompile(p, fname, line);
}

rb_ast_t*
rb_ruby_parser_compile_string_path(rb_parser_t *p, VALUE f, VALUE s, int line)
{
must_be_ascii_compatible(p, s);
return parser_compile_string(p, f, s, line);
}

rb_ast_t*
rb_ruby_parser_compile_string(rb_parser_t *p, const char *f, VALUE s, int line)
{
return rb_ruby_parser_compile_string_path(p, rb_filesystem_str_new_cstr(f), s, line);
}

static VALUE
lex_io_gets(struct parser_params *p, VALUE io)
rb_parser_compile(rb_parser_t *p, rb_parser_lex_gets_func *gets, VALUE fname, rb_parser_input_data input, int line)
{
return rb_io_gets_internal(io);
}

rb_ast_t*
rb_ruby_parser_compile_file_path(rb_parser_t *p, VALUE fname, VALUE file, int start)
{
p->lex.gets = lex_io_gets;
p->lex.input = file;
p->lex.pbeg = p->lex.pcur = p->lex.pend = 0;

return yycompile(p, fname, start);
}

static VALUE
lex_generic_gets(struct parser_params *p, VALUE input)
{
return (*p->lex.gets_.call)(input, p->line_count);
}

rb_ast_t*
rb_ruby_parser_compile_generic(rb_parser_t *p, VALUE (*lex_gets)(VALUE, int), VALUE fname, VALUE input, int start)
{
p->lex.gets = lex_generic_gets;
p->lex.gets_.call = lex_gets;
p->lex.gets = gets;
p->lex.input = input;
p->lex.pbeg = p->lex.pcur = p->lex.pend = 0;

return yycompile(p, fname, start);
return yycompile(p, fname, line);
}
#endif /* !RIPPER */

Expand Down Expand Up @@ -15883,7 +15811,6 @@ rb_ruby_parser_mark(void *ptr)
{
struct parser_params *p = (struct parser_params*)ptr;

rb_gc_mark(p->lex.input);
rb_gc_mark(p->ruby_sourcefile_string);
rb_gc_mark((VALUE)p->ast);
#ifndef RIPPER
Expand Down Expand Up @@ -16093,7 +16020,7 @@ rb_ruby_parser_set_parsing_thread(rb_parser_t *p, VALUE parsing_thread)
}

void
rb_ruby_parser_ripper_initialize(rb_parser_t *p, VALUE (*gets)(struct parser_params*,VALUE), VALUE input, VALUE sourcefile_string, const char *sourcefile, int sourceline)
rb_ruby_parser_ripper_initialize(rb_parser_t *p, rb_parser_lex_gets_func *gets, rb_parser_input_data input, VALUE sourcefile_string, const char *sourcefile, int sourceline)
{
p->lex.gets = gets;
p->lex.input = input;
Expand Down Expand Up @@ -16165,12 +16092,6 @@ rb_ruby_ripper_dedent_string(rb_parser_t *p, VALUE string, int width)
return i;
}

VALUE
rb_ruby_ripper_lex_get_str(rb_parser_t *p, VALUE s)
{
return lex_get_str(p, s);
}

int
rb_ruby_ripper_initialized_p(rb_parser_t *p)
{
Expand Down
Loading

0 comments on commit 65339ab

Please sign in to comment.