From 87aa5d3a26ba32fb34a69136c6a1c1ce767f6b7e Mon Sep 17 00:00:00 2001 From: yui-knk Date: Fri, 26 Apr 2024 18:16:36 +0900 Subject: [PATCH] Use `rb_parser_string_t *` as `ruby_sourcefile_string` This reduces dependency on VALUE. --- internal/parse.h | 2 +- parse.y | 112 ++++++++++++++++++++++++++++++++++----------- ruby_parser.c | 21 ++++++++- rubyparser.h | 1 + universal_parser.c | 1 + 5 files changed, 109 insertions(+), 28 deletions(-) diff --git a/internal/parse.h b/internal/parse.h index f06020c73f698f..5d689930179d67 100644 --- a/internal/parse.h +++ b/internal/parse.h @@ -55,7 +55,7 @@ void rb_ruby_parser_set_script_lines(rb_parser_t *p); void rb_ruby_parser_error_tolerant(rb_parser_t *p); void rb_ruby_parser_keep_tokens(rb_parser_t *p); typedef VALUE (rb_parser_lex_gets_func)(struct parser_params*, rb_parser_input_data, int); -rb_ast_t *rb_parser_compile(rb_parser_t *p, rb_parser_lex_gets_func *gets, VALUE fname, rb_parser_input_data input, int line); +rb_ast_t *rb_parser_compile(rb_parser_t *p, rb_parser_lex_gets_func *gets, const char *fname_ptr, long fname_len, rb_encoding *fname_enc, rb_parser_input_data input, int line); RUBY_SYMBOL_EXPORT_BEGIN diff --git a/parse.y b/parse.y index cf217a313ff285..d86c915b3ff25f 100644 --- a/parse.y +++ b/parse.y @@ -84,10 +84,7 @@ static NODE *reg_named_capture_assign(struct parser_params* p, VALUE regexp, con #endif /* !UNIVERSAL_PARSER */ static int rb_parser_string_hash_cmp(rb_parser_string_t *str1, rb_parser_string_t *str2); - -#ifndef RIPPER static rb_parser_string_t *rb_parser_string_deep_copy(struct parser_params *p, const rb_parser_string_t *original); -#endif static int node_integer_cmp(rb_node_integer_t *n1, rb_node_integer_t *n2) @@ -520,7 +517,7 @@ struct parser_params { int line_count; int ruby_sourceline; /* current line no. */ const char *ruby_sourcefile; /* current source file */ - VALUE ruby_sourcefile_string; + rb_parser_string_t *ruby_sourcefile_string; rb_encoding *enc; token_info *token_info; st_table *case_labels; @@ -1161,7 +1158,7 @@ static rb_node_aryptn_t *rb_node_aryptn_new(struct parser_params *p, NODE *pre_a static rb_node_hshptn_t *rb_node_hshptn_new(struct parser_params *p, NODE *nd_pconst, NODE *nd_pkwargs, NODE *nd_pkwrestarg, const YYLTYPE *loc); static rb_node_fndptn_t *rb_node_fndptn_new(struct parser_params *p, NODE *pre_rest_arg, NODE *args, NODE *post_rest_arg, const YYLTYPE *loc); static rb_node_line_t *rb_node_line_new(struct parser_params *p, const YYLTYPE *loc); -static rb_node_file_t *rb_node_file_new(struct parser_params *p, VALUE str, const YYLTYPE *loc); +static rb_node_file_t *rb_node_file_new(struct parser_params *p, rb_parser_string_t *str, const YYLTYPE *loc); static rb_node_error_t *rb_node_error_new(struct parser_params *p, const YYLTYPE *loc); #define NEW_SCOPE(a,b,loc) (NODE *)rb_node_scope_new(p,a,b,loc) @@ -2103,6 +2100,41 @@ rb_parser_encoding_string_new(rb_parser_t *p, const char *ptr, long len, rb_enco } #ifndef RIPPER +static bool +zero_filled(const char *s, int n) +{ + for (; n > 0; --n) { + if (*s++) return false; + } + return true; +} + +static bool +str_null_char(rb_parser_t *p, const char *s, long len, const int minlen, rb_encoding *enc) +{ + const char *e = s + len; + + for (; s + minlen <= e; s += rb_enc_mbclen(s, e, enc)) { + if (zero_filled(s, minlen)) return true; + } + return false; +} + +static bool +cstr_null_check(rb_parser_t *p, const char *s, long len, rb_encoding *enc, int *w) +{ + const int minlen = rb_enc_mbminlen(enc); + + if (minlen > 1) { + *w = 1; + return str_null_char(p, s, len, minlen, enc); + } + else { + *w = 0; + return (!s || memchr(s, 0, len)); + } +} + rb_parser_string_t * rb_str_to_parser_string(rb_parser_t *p, VALUE str) { @@ -7695,7 +7727,7 @@ yycompile0(VALUE arg) struct parser_params *p = (struct parser_params *)arg; int cov = FALSE; - if (!compile_for_eval && !NIL_P(p->ruby_sourcefile_string) && !e_option_supplied(p)) { + if (!compile_for_eval && p->ruby_sourcefile_string && !e_option_supplied(p)) { cov = TRUE; } @@ -7749,17 +7781,39 @@ yycompile0(VALUE arg) return TRUE; } +static void +set_arg_error(struct parser_params *p, const char *err) +{ + VALUE excargs[3]; + + excargs[0] = rb_eArgError; + excargs[1] = rb_str_new_cstr(err); + excargs[2] = rb_make_backtrace(); + rb_set_errinfo(rb_make_exception(3, excargs)); +} + static rb_ast_t * -yycompile(struct parser_params *p, VALUE fname, int line) +yycompile(struct parser_params *p, const char *fname_ptr, long fname_len, rb_encoding *fname_enc, int line) { rb_ast_t *ast; - if (NIL_P(fname)) { - p->ruby_sourcefile_string = Qnil; + if (!fname_ptr) { + p->ruby_sourcefile_string = NULL; p->ruby_sourcefile = "(none)"; } else { - p->ruby_sourcefile_string = rb_str_to_interned_str(fname); - p->ruby_sourcefile = StringValueCStr(fname); + int w; + if (cstr_null_check(p, fname_ptr, fname_len, fname_enc, &w)) { + if (w) { + set_arg_error(p, "string contains null char"); + } + else { + set_arg_error(p, "string contains null byte"); + } + return rb_ast_new(); + } + + p->ruby_sourcefile_string = rb_parser_encoding_string_new(p, fname_ptr, fname_len, fname_enc); + p->ruby_sourcefile = fname_ptr; } p->ruby_sourceline = line - 1; @@ -7802,13 +7856,14 @@ lex_getline(struct parser_params *p) #ifndef RIPPER rb_ast_t* -rb_parser_compile(rb_parser_t *p, rb_parser_lex_gets_func *gets, VALUE fname, rb_parser_input_data input, int line) +rb_parser_compile(rb_parser_t *p, rb_parser_lex_gets_func *gets, + const char *fname_ptr, long fname_len, rb_encoding *fname_enc, rb_parser_input_data input, int line) { p->lex.gets = gets; p->lex.input = input; p->lex.pbeg = p->lex.pcur = p->lex.pend = 0; - return yycompile(p, fname, line); + return yycompile(p, fname_ptr, fname_len, fname_enc, line); } #endif /* !RIPPER */ @@ -9603,7 +9658,7 @@ parser_set_encode(struct parser_params *p, const char *name) error: excargs[0] = rb_eArgError; excargs[2] = rb_make_backtrace(); - rb_ary_unshift(excargs[2], rb_sprintf("%"PRIsVALUE":%d", p->ruby_sourcefile_string, p->ruby_sourceline)); + rb_ary_unshift(excargs[2], rb_sprintf("%"PRIsVALUE":%d", rb_str_new_mutable_parser_string(p->ruby_sourcefile_string), p->ruby_sourceline)); VALUE exc = rb_make_exception(3, excargs); ruby_show_error_line(p, exc, &(YYLTYPE)RUBY_INIT_YYLLOC(), p->ruby_sourceline, p->lex.lastline); rb_exc_raise(exc); @@ -12580,10 +12635,10 @@ rb_node_line_new(struct parser_params *p, const YYLTYPE *loc) } static rb_node_file_t * -rb_node_file_new(struct parser_params *p, VALUE str, const YYLTYPE *loc) +rb_node_file_new(struct parser_params *p, rb_parser_string_t *str, const YYLTYPE *loc) { rb_node_file_t *n = NODE_NEWNODE(NODE_FILE, rb_node_file_t, loc); - n->path = rb_str_to_parser_string(p, str); + n->path = str; return n; } @@ -12832,7 +12887,6 @@ string_literal_head(struct parser_params *p, enum node_type htype, NODE *head) return lit; } -#ifndef RIPPER static rb_parser_string_t * rb_parser_string_deep_copy(struct parser_params *p, const rb_parser_string_t *orig) { @@ -12843,7 +12897,6 @@ rb_parser_string_deep_copy(struct parser_params *p, const rb_parser_string_t *or copy->enc = orig->enc; return copy; } -#endif /* concat two string literals */ static NODE * @@ -13177,9 +13230,13 @@ gettable(struct parser_params *p, ID id, const YYLTYPE *loc) return NEW_FALSE(loc); case keyword__FILE__: { - VALUE file = p->ruby_sourcefile_string; - if (NIL_P(file)) - file = rb_str_new(0, 0); + rb_parser_string_t *file; + if (p->ruby_sourcefile_string) { + file = rb_parser_string_deep_copy(p, p->ruby_sourcefile_string); + } + else { + file = STRING_NEW0(); + } node = NEW_FILE(file, loc); } return node; @@ -15762,7 +15819,7 @@ parser_initialize(struct parser_params *p) { /* note: we rely on TypedData_Make_Struct to set most fields to 0 */ p->command_start = TRUE; - p->ruby_sourcefile_string = Qnil; + p->ruby_sourcefile_string = NULL; p->lex.lpar_beg = -1; /* make lambda_beginning_p() == FALSE at first */ string_buffer_init(p); p->node_id = 0; @@ -15797,7 +15854,6 @@ rb_ruby_parser_mark(void *ptr) { struct parser_params *p = (struct parser_params*)ptr; - rb_gc_mark(p->ruby_sourcefile_string); #ifndef RIPPER rb_gc_mark(p->error_buffer); #else @@ -15828,6 +15884,10 @@ rb_ruby_parser_free(void *ptr) ruby_sized_xfree(p->tokenbuf, p->toksiz); } + if (p->ruby_sourcefile_string) { + rb_parser_string_free(p, p->ruby_sourcefile_string); + } + for (local = p->lvtbl; local; local = prev) { prev = local->prev; local_free(p, local); @@ -16010,7 +16070,7 @@ rb_ruby_parser_ripper_initialize(rb_parser_t *p, rb_parser_lex_gets_func *gets, p->lex.gets = gets; p->lex.input = input; p->eofp = 0; - p->ruby_sourcefile_string = sourcefile_string; + p->ruby_sourcefile_string = rb_str_to_parser_string(p, sourcefile_string); p->ruby_sourcefile = sourcefile; p->ruby_sourceline = sourceline; } @@ -16030,7 +16090,7 @@ rb_ruby_parser_enc(rb_parser_t *p) VALUE rb_ruby_parser_ruby_sourcefile_string(rb_parser_t *p) { - return p->ruby_sourcefile_string; + return rb_str_new_parser_string(p->ruby_sourcefile_string); } int @@ -16167,7 +16227,7 @@ parser_compile_error(struct parser_params *p, const rb_code_location_t *loc, con va_start(ap, fmt); p->error_buffer = rb_syntax_error_append(p->error_buffer, - p->ruby_sourcefile_string, + p->ruby_sourcefile_string ? rb_str_new_parser_string(p->ruby_sourcefile_string) : Qnil, lineno, column, p->enc, fmt, ap); va_end(ap); diff --git a/ruby_parser.c b/ruby_parser.c index d8b7580e002591..9cf2e8c774ea1d 100644 --- a/ruby_parser.c +++ b/ruby_parser.c @@ -166,6 +166,12 @@ enc_mbcput(unsigned int c, void *buf, void *enc) return rb_enc_mbcput(c, buf, (rb_encoding *)enc); } +static int +enc_mbclen(const char *p, const char *e, void *enc) +{ + return rb_enc_mbclen(p, e, (rb_encoding *)enc); +} + static void * enc_from_index(int idx) { @@ -411,6 +417,7 @@ static const rb_parser_config_t rb_global_parser_config = { .ascii8bit_encoding = ascii8bit_encoding, .enc_codelen = enc_codelen, .enc_mbcput = enc_mbcput, + .enc_mbclen = enc_mbclen, .enc_find_index = rb_enc_find_index, .enc_from_index = enc_from_index, .enc_isspace = enc_isspace, @@ -654,7 +661,19 @@ static void parser_aset_script_lines_for(VALUE path, rb_parser_ary_t *lines); static rb_ast_t* parser_compile(rb_parser_t *p, rb_parser_lex_gets_func *gets, VALUE fname, rb_parser_input_data input, int line) { - rb_ast_t *ast = rb_parser_compile(p, gets, fname, input, line); + rb_ast_t *ast; + const char *ptr = 0; + long len = 0; + rb_encoding *enc = 0; + + if (!NIL_P(fname)) { + StringValueCStr(fname); + ptr = RSTRING_PTR(fname); + len = RSTRING_LEN(fname); + enc = rb_enc_get(fname); + } + + ast = rb_parser_compile(p, gets, ptr, len, enc, input, line); parser_aset_script_lines_for(fname, ast->body.script_lines); return ast; } diff --git a/rubyparser.h b/rubyparser.h index 7e85febd990f1d..cb90b847b66769 100644 --- a/rubyparser.h +++ b/rubyparser.h @@ -1338,6 +1338,7 @@ typedef struct rb_parser_config_struct { rb_encoding *(*ascii8bit_encoding)(void); int (*enc_codelen)(int c, rb_encoding *enc); int (*enc_mbcput)(unsigned int c, void *buf, rb_encoding *enc); + int (*enc_mbclen)(const char *p, const char *e, rb_encoding *enc); int (*enc_find_index)(const char *name); rb_encoding *(*enc_from_index)(int idx); int (*enc_isspace)(OnigCodePoint c, rb_encoding *enc); diff --git a/universal_parser.c b/universal_parser.c index fa30fcd2215cc6..05445587ba0bc9 100644 --- a/universal_parser.c +++ b/universal_parser.c @@ -168,6 +168,7 @@ #define rb_ascii8bit_encoding p->config->ascii8bit_encoding #define rb_enc_codelen p->config->enc_codelen #define rb_enc_mbcput p->config->enc_mbcput +#define rb_enc_mbclen p->config->enc_mbclen #define rb_enc_find_index p->config->enc_find_index #define rb_enc_from_index p->config->enc_from_index #define rb_enc_isspace p->config->enc_isspace