Skip to content

Commit c2bfb4e

Browse files
committed
add new instruction attribute called leaf
An instruction is leaf if it has no rb_funcall inside. In order to check this property, we introduce stack canary which is a random number collected at runtime. Stack top is always filled with this number and checked for stack smashing operations, when VM_CHECK_MODE. [rubyGH-1947] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@64677 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
1 parent ecda2d2 commit c2bfb4e

File tree

10 files changed

+279
-1
lines changed

10 files changed

+279
-1
lines changed

inits.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ rb_call_inits(void)
6161
CALL(Complex);
6262
CALL(version);
6363
CALL(vm_trace);
64+
CALL(vm_stack_canary);
6465
CALL(ast);
6566
}
6667
#undef CALL

insns.def

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@
4545
4646
* handles_sp: If it is true, VM deals with sp in the insn.
4747
48+
* leaf: indicates that the instruction is "leaf" i.e. it does
49+
not introduce new stack frame on top of it. Default true.
50+
4851
- Attributes can access operands, but not stack (push/pop) variables.
4952
5053
- An instruction's body is a pure C block, copied verbatimly into
@@ -203,6 +206,8 @@ getinstancevariable
203206
(ID id, IC ic)
204207
()
205208
(VALUE val)
209+
/* "instance variable not initialized" warning can be hooked. */
210+
// attr bool leaf = false; /* has rb_warning() */
206211
{
207212
val = vm_getinstancevariable(GET_SELF(), id, ic);
208213
}
@@ -223,6 +228,8 @@ getclassvariable
223228
(ID id)
224229
()
225230
(VALUE val)
231+
/* "class variable access from toplevel" warning can be hooked. */
232+
// attr bool leaf = false; /* has rb_warning() */
226233
{
227234
val = rb_cvar_get(vm_get_cvar_base(rb_vm_get_cref(GET_EP()), GET_CFP()), id);
228235
}
@@ -233,6 +240,8 @@ setclassvariable
233240
(ID id)
234241
(VALUE val)
235242
()
243+
/* "class variable access from toplevel" warning can be hooked. */
244+
// attr bool leaf = false; /* has rb_warning() */
236245
{
237246
vm_ensure_not_refinement_module(GET_SELF());
238247
rb_cvar_set(vm_get_cvar_base(rb_vm_get_cref(GET_EP()), GET_CFP()), id, val);
@@ -247,6 +256,8 @@ getconstant
247256
(ID id)
248257
(VALUE klass)
249258
(VALUE val)
259+
/* getconstant can kick autoload */
260+
// attr bool leaf = false; /* has rb_autoload_load() */
250261
{
251262
val = vm_get_ev_const(ec, klass, id, 0);
252263
}
@@ -258,6 +269,11 @@ setconstant
258269
(ID id)
259270
(VALUE val, VALUE cbase)
260271
()
272+
/* Assigning an object to a constant is basically a leaf operation.
273+
* The problem is, assigning a Module instance to a constant _names_
274+
* that module. Naming involves string manipulations, which are
275+
* method calls. */
276+
// attr bool leaf = false; /* has StringValue() */
261277
{
262278
vm_check_if_namespace(cbase);
263279
vm_ensure_not_refinement_module(GET_SELF());
@@ -270,6 +286,7 @@ getglobal
270286
(GENTRY entry)
271287
()
272288
(VALUE val)
289+
// attr bool leaf = leafness_of_getglobal(entry);
273290
{
274291
val = GET_GLOBAL((VALUE)entry);
275292
}
@@ -280,6 +297,7 @@ setglobal
280297
(GENTRY entry)
281298
(VALUE val)
282299
()
300+
// attr bool leaf = leafness_of_setglobal(entry);
283301
{
284302
SET_GLOBAL((VALUE)entry, val);
285303
}
@@ -339,6 +357,7 @@ putiseq
339357
(ISEQ iseq)
340358
()
341359
(VALUE ret)
360+
// attr bool leaf = true; /* yes it is */
342361
{
343362
ret = (VALUE)iseq;
344363
}
@@ -392,6 +411,9 @@ toregexp
392411
(rb_num_t opt, rb_num_t cnt)
393412
(...)
394413
(VALUE val)
414+
/* This instruction has StringValue(), which is a method call. But it
415+
* seems that path is never covered. */
416+
// attr bool leaf = true; /* yes it is */
395417
// attr rb_snum_t sp_inc = 1 - cnt;
396418
{
397419
const VALUE ary = rb_ary_tmp_new_from_values(0, cnt, STACK_ADDR_FROM_TOP(cnt));
@@ -444,6 +466,7 @@ expandarray
444466
(rb_num_t num, rb_num_t flag)
445467
(..., VALUE ary)
446468
(...)
469+
// attr bool leaf = false; /* has rb_check_array_type() */
447470
// attr rb_snum_t sp_inc = num - 1 + (flag & 1 ? 1 : 0);
448471
{
449472
vm_expandarray(GET_SP(), ary, num, (int)flag);
@@ -455,6 +478,7 @@ concatarray
455478
()
456479
(VALUE ary1, VALUE ary2)
457480
(VALUE ary)
481+
// attr bool leaf = false; /* has rb_check_array_type() */
458482
{
459483
ary = vm_concat_array(ary1, ary2);
460484
}
@@ -465,6 +489,7 @@ splatarray
465489
(VALUE flag)
466490
(VALUE ary)
467491
(VALUE obj)
492+
// attr bool leaf = false; /* has rb_check_array_type() */
468493
{
469494
obj = vm_splat_array(flag, ary);
470495
}
@@ -475,6 +500,7 @@ newhash
475500
(rb_num_t num)
476501
(...)
477502
(VALUE val)
503+
// attr bool leaf = false; /* has rb_hash_key_str() */
478504
// attr rb_snum_t sp_inc = 1 - num;
479505
{
480506
RUBY_DTRACE_CREATE_HOOK(HASH, num);
@@ -492,6 +518,8 @@ newrange
492518
(rb_num_t flag)
493519
(VALUE low, VALUE high)
494520
(VALUE val)
521+
/* rb_range_new() exercises "bad value for range" check. */
522+
// attr bool leaf = false; /* see also: range.c:range_init() */
495523
{
496524
val = rb_range_new(low, high, (int)flag);
497525
}
@@ -618,6 +646,7 @@ defined
618646
(rb_num_t op_type, VALUE obj, VALUE needstr)
619647
(VALUE v)
620648
(VALUE val)
649+
// attr bool leaf = leafness_of_defined(op_type);
621650
{
622651
val = vm_defined(ec, GET_CFP(), op_type, obj, needstr, v);
623652
}
@@ -634,6 +663,7 @@ checkmatch
634663
(rb_num_t flag)
635664
(VALUE target, VALUE pattern)
636665
(VALUE result)
666+
// attr bool leaf = leafness_of_checkmatch(flag);
637667
{
638668
result = vm_check_match(ec, target, pattern, flag);
639669
}
@@ -726,6 +756,7 @@ opt_str_freeze
726756
(VALUE str)
727757
()
728758
(VALUE val)
759+
// attr bool leaf = BASIC_OP_UNREDEFINED_P(BOP_FREEZE, STRING_REDEFINED_OP_FLAG);
729760
{
730761
val = vm_opt_str_freeze(str, BOP_FREEZE, idFreeze);
731762
}
@@ -735,6 +766,7 @@ opt_str_uminus
735766
(VALUE str)
736767
()
737768
(VALUE val)
769+
// attr bool leaf = BASIC_OP_UNREDEFINED_P(BOP_UMINUS, STRING_REDEFINED_OP_FLAG);
738770
{
739771
val = vm_opt_str_freeze(str, BOP_UMINUS, idUMinus);
740772
}
@@ -744,6 +776,11 @@ opt_newarray_max
744776
(rb_num_t num)
745777
(...)
746778
(VALUE val)
779+
/* This instruction typically has no funcalls. But it compares array
780+
* contents each other by nature. That part could call methods when
781+
* necessary. No way to detect such method calls beforehand. We
782+
* cannot but mark it being not leaf. */
783+
// attr bool leaf = false; /* has rb_funcall() */
747784
// attr rb_snum_t sp_inc = 1 - num;
748785
{
749786
val = vm_opt_newarray_max(num, STACK_ADDR_FROM_TOP(num));
@@ -754,6 +791,8 @@ opt_newarray_min
754791
(rb_num_t num)
755792
(...)
756793
(VALUE val)
794+
/* Same discussion as opt_newarray_max. */
795+
// attr bool leaf = false; /* has rb_funcall() */
757796
// attr rb_snum_t sp_inc = 1 - num;
758797
{
759798
val = vm_opt_newarray_min(num, STACK_ADDR_FROM_TOP(num));
@@ -765,6 +804,7 @@ opt_send_without_block
765804
(CALL_INFO ci, CALL_CACHE cc)
766805
(...)
767806
(VALUE val)
807+
// attr bool leaf = false; /* Of course it isn't. */
768808
// attr bool handles_sp = true;
769809
// attr rb_snum_t sp_inc = -ci->orig_argc;
770810
{
@@ -797,6 +837,7 @@ invokeblock
797837
(CALL_INFO ci)
798838
(...)
799839
(VALUE val)
840+
// attr bool leaf = false; /* Of course it isn't. */
800841
// attr bool handles_sp = true;
801842
// attr rb_snum_t sp_inc = 1 - ci->orig_argc;
802843
{
@@ -824,6 +865,10 @@ leave
824865
()
825866
(VALUE val)
826867
(VALUE val)
868+
/* This is super surprising but when leaving from a frame, we check
869+
* for interrupts. If any, that should be executed on top of the
870+
* current execution context. This is a method call. */
871+
// attr bool leaf = false; /* has rb_threadptr_execute_interrupts() */
827872
// attr bool handles_sp = true;
828873
{
829874
if (OPT_CHECKED_RUN) {
@@ -858,6 +903,8 @@ throw
858903
(rb_num_t throw_state)
859904
(VALUE throwobj)
860905
(VALUE val)
906+
/* Same discussion as leave. */
907+
// attr bool leaf = false; /* has rb_threadptr_execute_interrupts() */
861908
{
862909
RUBY_VM_CHECK_INTS(ec);
863910
val = vm_throw(ec, GET_CFP(), throw_state, throwobj);
@@ -875,6 +922,8 @@ jump
875922
(OFFSET dst)
876923
()
877924
()
925+
/* Same discussion as leave. */
926+
// attr bool leaf = false; /* has rb_threadptr_execute_interrupts() */
878927
{
879928
RUBY_VM_CHECK_INTS(ec);
880929
JUMP(dst);
@@ -886,6 +935,8 @@ branchif
886935
(OFFSET dst)
887936
(VALUE val)
888937
()
938+
/* Same discussion as jump. */
939+
// attr bool leaf = false; /* has rb_threadptr_execute_interrupts() */
889940
{
890941
if (RTEST(val)) {
891942
RUBY_VM_CHECK_INTS(ec);
@@ -899,6 +950,8 @@ branchunless
899950
(OFFSET dst)
900951
(VALUE val)
901952
()
953+
/* Same discussion as jump. */
954+
// attr bool leaf = false; /* has rb_threadptr_execute_interrupts() */
902955
{
903956
if (!RTEST(val)) {
904957
RUBY_VM_CHECK_INTS(ec);
@@ -912,6 +965,8 @@ branchnil
912965
(OFFSET dst)
913966
(VALUE val)
914967
()
968+
/* Same discussion as jump. */
969+
// attr bool leaf = false; /* has rb_threadptr_execute_interrupts() */
915970
{
916971
if (NIL_P(val)) {
917972
RUBY_VM_CHECK_INTS(ec);
@@ -965,6 +1020,10 @@ opt_case_dispatch
9651020
(CDHASH hash, OFFSET else_offset)
9661021
(..., VALUE key)
9671022
()
1023+
/* Case dispatch involves hash lookup, which inevitably compares
1024+
* several objects each other. The same discussion to
1025+
* opt_newarray_max also goes here. */
1026+
// attr bool leaf = false; /* has rb_any_cmp() */
9681027
// attr rb_snum_t sp_inc = -1;
9691028
{
9701029
OFFSET dst = vm_case_dispatch(hash, else_offset, key);
@@ -982,6 +1041,9 @@ opt_plus
9821041
(CALL_INFO ci, CALL_CACHE cc)
9831042
(VALUE recv, VALUE obj)
9841043
(VALUE val)
1044+
/* Array + anything can be handled inside of opt_plus, and that
1045+
* anything is converted into array using #to_ary. */
1046+
// attr bool leaf = false; /* has rb_to_array_type() */
9851047
{
9861048
val = vm_opt_plus(recv, obj);
9871049

@@ -1067,6 +1129,10 @@ opt_eq
10671129
(CALL_INFO ci, CALL_CACHE cc)
10681130
(VALUE recv, VALUE obj)
10691131
(VALUE val)
1132+
/* This instruction can compare a string with non-string. This
1133+
* (somewhat) coerces the non-string into a string, via a method
1134+
* call. */
1135+
// attr bool leaf = false; /* has rb_str_equal() */
10701136
{
10711137
val = opt_eq_func(recv, obj, ci, cc);
10721138

@@ -1084,6 +1150,8 @@ opt_neq
10841150
(CALL_INFO ci_eq, CALL_CACHE cc_eq, CALL_INFO ci, CALL_CACHE cc)
10851151
(VALUE recv, VALUE obj)
10861152
(VALUE val)
1153+
/* Same discussion as opt_eq. */
1154+
// attr bool leaf = false; /* has rb_str_equal() */
10871155
{
10881156
val = vm_opt_neq(ci, cc, ci_eq, cc_eq, recv, obj);
10891157

@@ -1186,6 +1254,11 @@ opt_aref
11861254
(CALL_INFO ci, CALL_CACHE cc)
11871255
(VALUE recv, VALUE obj)
11881256
(VALUE val)
1257+
/* This is complicated. In case of hash, vm_opt_aref() resorts to
1258+
* rb_hash_aref(). If `recv` has no `obj`, this function then yields
1259+
* default_proc. This is a method call. So opt_aref is
1260+
* (surprisingly) not leaf. */
1261+
// attr bool leaf = false; /* has rb_funcall() */ /* calls #yield */
11891262
{
11901263
val = vm_opt_aref(recv, obj);
11911264

@@ -1203,6 +1276,9 @@ opt_aset
12031276
(CALL_INFO ci, CALL_CACHE cc)
12041277
(VALUE recv, VALUE obj, VALUE set)
12051278
(VALUE val)
1279+
/* This is another story than opt_aref. When vm_opt_aset() resorts
1280+
* to rb_hash_aset(), which should call #hash for `obj`. */
1281+
// attr bool leaf = false; /* has rb_funcall() */ /* calls #hash */
12061282
{
12071283
val = vm_opt_aset(recv, obj, set);
12081284

@@ -1220,6 +1296,8 @@ opt_aset_with
12201296
(VALUE key, CALL_INFO ci, CALL_CACHE cc)
12211297
(VALUE recv, VALUE val)
12221298
(VALUE val)
1299+
/* Same discussion as opt_aset. */
1300+
// attr bool leaf = false; /* has rb_funcall() */ /* calls #hash */
12231301
{
12241302
VALUE tmp = vm_opt_aset_with(recv, key, val);
12251303

@@ -1242,6 +1320,8 @@ opt_aref_with
12421320
(VALUE key, CALL_INFO ci, CALL_CACHE cc)
12431321
(VALUE recv)
12441322
(VALUE val)
1323+
/* Same discussion as opt_aref. */
1324+
// attr bool leaf = false; /* has rb_funcall() */ /* calls #yield */
12451325
{
12461326
val = vm_opt_aref_with(recv, key);
12471327

@@ -1345,6 +1425,7 @@ opt_regexpmatch1
13451425
(VALUE recv)
13461426
(VALUE obj)
13471427
(VALUE val)
1428+
// attr bool leaf = BASIC_OP_UNREDEFINED_P(BOP_MATCH, REGEXP_REDEFINED_OP_FLAG);
13481429
{
13491430
val = vm_opt_regexpmatch1(recv, obj);
13501431
}
@@ -1372,6 +1453,7 @@ opt_call_c_function
13721453
(rb_insn_func_t funcptr)
13731454
()
13741455
()
1456+
// attr bool leaf = false; /* anything can happen inside */
13751457
// attr bool handles_sp = true;
13761458
{
13771459
reg_cfp = (funcptr)(ec, reg_cfp);

internal.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1901,6 +1901,9 @@ VALUE rb_ivar_lookup(VALUE obj, ID id, VALUE undef);
19011901
void rb_autoload_str(VALUE mod, ID id, VALUE file);
19021902
void rb_deprecate_constant(VALUE mod, const char *name);
19031903
NORETURN(VALUE rb_mod_const_missing(VALUE,VALUE));
1904+
rb_gvar_getter_t *rb_gvar_getter_function_of(const struct rb_global_entry *);
1905+
rb_gvar_setter_t *rb_gvar_setter_function_of(const struct rb_global_entry *);
1906+
bool rb_gvar_is_traced(const struct rb_global_entry *);
19041907

19051908
/* vm_insnhelper.h */
19061909
rb_serial_t rb_next_class_serial(void);
@@ -1946,6 +1949,7 @@ VALUE rb_lambda_call(VALUE obj, ID mid, int argc, const VALUE *argv,
19461949
/* vm_insnhelper.c */
19471950
VALUE rb_equal_opt(VALUE obj1, VALUE obj2);
19481951
VALUE rb_eql_opt(VALUE obj1, VALUE obj2);
1952+
void Init_vm_stack_canary(void);
19491953

19501954
/* vm_method.c */
19511955
void Init_eval_method(void);
@@ -2099,6 +2103,9 @@ VALUE rb_imemo_new_debug(enum imemo_type type, VALUE v1, VALUE v2, VALUE v3, VAL
20992103
VALUE rb_imemo_new(enum imemo_type type, VALUE v1, VALUE v2, VALUE v3, VALUE v0);
21002104
#endif
21012105

2106+
/* random.c */
2107+
int fill_random_bytes(void *, size_t, int);
2108+
21022109
RUBY_SYMBOL_EXPORT_END
21032110

21042111
#define RUBY_DTRACE_CREATE_HOOK(name, arg) \

random.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -573,7 +573,7 @@ fill_random_bytes_syscall(void *seed, size_t size, int need_secure)
573573
# define fill_random_bytes_syscall(seed, size, need_secure) -1
574574
#endif
575575

576-
static int
576+
int
577577
fill_random_bytes(void *seed, size_t size, int need_secure)
578578
{
579579
int ret = fill_random_bytes_syscall(seed, size, need_secure);

0 commit comments

Comments
 (0)