-
Notifications
You must be signed in to change notification settings - Fork 1.2k
/
Copy pathInterpreterThunkEmitter.cpp
959 lines (843 loc) · 41.4 KB
/
InterpreterThunkEmitter.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
//-------------------------------------------------------------------------------------------------------
// Copyright (C) Microsoft. All rights reserved.
// Copyright (c) ChakraCore Project Contributors. All rights reserved.
// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
//-------------------------------------------------------------------------------------------------------
#include "Backend.h"
#if ENABLE_NATIVE_CODEGEN
namespace {
// The definitions in this anonymous namespace must be constexpr to allow OACR to conclude that certain operations
// in InterpreterThunkEmitter::EncodeInterpreterThunk are safe. Because constexpr requires that the declaration
// and the definition appear at the same place (i.e., no forward declarations), this means that we either have
// to move all 5 definitions of InterpreterThunk into the header file, or we have to make InterpreterThunkSize
// public. The latter option seems the less objectionable, so that's what I've done here.
#ifdef _M_X64
#ifdef _WIN32
constexpr BYTE FunctionInfoOffset = 23;
constexpr BYTE FunctionProxyOffset = 27;
constexpr BYTE DynamicThunkAddressOffset = 31;
constexpr BYTE CallBlockStartAddrOffset = 41;
constexpr BYTE ThunkSizeOffset = 55;
constexpr BYTE ErrorOffset = 64;
constexpr BYTE ThunkAddressOffset = 81;
constexpr BYTE PrologSize = 80;
constexpr BYTE StackAllocSize = 0x28;
//
// Home the arguments onto the stack and pass a pointer to the base of the stack location to the inner thunk
//
// Calling convention requires that caller should allocate at least 0x20 bytes and the stack be 16 byte aligned.
// Hence, we allocate 0x28 bytes of stack space for the callee to use. The callee uses 8 bytes to push the first
// argument and the rest 0x20 ensures alignment is correct.
//
constexpr BYTE InterpreterThunk[InterpreterThunkEmitter::InterpreterThunkSize] = {
0x48, 0x89, 0x54, 0x24, 0x10, // mov qword ptr [rsp+10h],rdx
0x48, 0x89, 0x4C, 0x24, 0x08, // mov qword ptr [rsp+8],rcx
0x4C, 0x89, 0x44, 0x24, 0x18, // mov qword ptr [rsp+18h],r8
0x4C, 0x89, 0x4C, 0x24, 0x20, // mov qword ptr [rsp+20h],r9
0x48, 0x8B, 0x41, 0x00, // mov rax, qword ptr [rcx+FunctionInfoOffset]
0x48, 0x8B, 0x48, 0x00, // mov rcx, qword ptr [rax+FunctionProxyOffset]
0x48, 0x8B, 0x51, 0x00, // mov rdx, qword ptr [rcx+DynamicThunkAddressOffset]
// Range Check for Valid call target
0x48, 0x83, 0xE2, 0xF8, // and rdx, 0xFFFFFFFFFFFFFFF8h ;Force 8 byte alignment
0x48, 0x8b, 0xca, // mov rcx, rdx
0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mov rax, CallBlockStartAddress
0x48, 0x2b, 0xc8, // sub rcx, rax
0x48, 0x81, 0xf9, 0x00, 0x00, 0x00, 0x00, // cmp rcx, ThunkSize
0x76, 0x09, // jbe $safe
0x48, 0xc7, 0xc1, 0x00, 0x00, 0x00, 0x00, // mov rcx, errorcode
0xcd, 0x29, // int 29h
// $safe:
0x48, 0x8D, 0x4C, 0x24, 0x08, // lea rcx, [rsp+8] ;Load the address to stack
0x48, 0x83, 0xEC, StackAllocSize, // sub rsp,28h
0x48, 0xB8, 0x00, 0x00, 0x00 ,0x00, 0x00, 0x00, 0x00, 0x00, // mov rax, <thunk>
0xFF, 0xE2, // jmp rdx
0xCC, 0xCC, 0xCC, 0xCC, 0xCC // int 3 ;for alignment to size of 8 we are adding this
};
constexpr BYTE Epilog[] = {
0x48, 0x83, 0xC4, StackAllocSize, // add rsp,28h
0xC3 // ret
};
#else // Sys V AMD64
constexpr BYTE FunctionInfoOffset = 7;
constexpr BYTE FunctionProxyOffset = 11;
constexpr BYTE DynamicThunkAddressOffset = 15;
constexpr BYTE CallBlockStartAddrOffset = 25;
constexpr BYTE ThunkSizeOffset = 39;
constexpr BYTE ErrorOffset = 48;
constexpr BYTE ThunkAddressOffset = 61;
constexpr BYTE PrologSize = 60;
constexpr BYTE StackAllocSize = 0x0;
constexpr BYTE InterpreterThunk[InterpreterThunkEmitter::InterpreterThunkSize] = {
0x55, // push rbp // Prolog - setup the stack frame
0x48, 0x89, 0xe5, // mov rbp, rsp
0x48, 0x8b, 0x47, 0x00, // mov rax, qword ptr [rdi + FunctionInfoOffset]
0x48, 0x8B, 0x48, 0x00, // mov rcx, qword ptr [rax+FunctionProxyOffset]
0x48, 0x8B, 0x51, 0x00, // mov rdx, qword ptr [rcx+DynamicThunkAddressOffset]
// Range Check for Valid call target
0x48, 0x83, 0xE2, 0xF8, // and rdx, 0xfffffffffffffff8 // Force 8 byte alignment
0x48, 0x89, 0xd1, // mov rcx, rdx
0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mov rax, CallBlockStartAddress
0x48, 0x29, 0xc1, // sub rcx, rax
0x48, 0x81, 0xf9, 0x00, 0x00, 0x00, 0x00, // cmp rcx, ThunkSize
0x76, 0x09, // jbe safe
0x48, 0xc7, 0xc1, 0x00, 0x00, 0x00, 0x00, // mov rcx, errorcode
0xcd, 0x29, // int 29h <-- xplat TODO: just to exit
// safe:
0x48, 0x8d, 0x7c, 0x24, 0x10, // lea rdi, [rsp+0x10]
0x48, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mov rax, <thunk> // stack already 16-byte aligned
0xff, 0xe2, // jmp rdx
0xcc // int 3 // for alignment to size of 8
};
constexpr BYTE Epilog[] = {
0x5d, // pop rbp
0xc3 // ret
};
#endif
#elif defined(_M_ARM)
constexpr BYTE ThunkAddressOffset = 8;
constexpr BYTE FunctionInfoOffset = 18;
constexpr BYTE FunctionProxyOffset = 22;
constexpr BYTE DynamicThunkAddressOffset = 26;
constexpr BYTE CallBlockStartAddressInstrOffset = 42;
constexpr BYTE CallThunkSizeInstrOffset = 54;
constexpr BYTE ErrorOffset = 64;
constexpr BYTE InterpreterThunk[InterpreterThunkEmitter::InterpreterThunkSize] = {
0x0F, 0xB4, // push {r0-r3}
0x2D, 0xE9, 0x00, 0x48, // push {r11,lr}
0xEB, 0x46, // mov r11,sp
0x00, 0x00, 0x00, 0x00, // movw r1,ThunkAddress
0x00, 0x00, 0x00, 0x00, // movt r1,ThunkAddress
0xD0, 0xF8, 0x00, 0x20, // ldr.w r2,[r0,#0x00]
0xD2, 0xF8, 0x00, 0x00, // ldr.w r0,[r2,#0x00]
0xD0, 0xF8, 0x00, 0x30, // ldr.w r3,[r0,#0x00]
0x4F, 0xF6, 0xF9, 0x70, // mov r0,#0xFFF9
0xCF, 0xF6, 0xFF, 0x70, // movt r0,#0xFFFF
0x03, 0xEA, 0x00, 0x03, // and r3,r3,r0
0x18, 0x46, // mov r0, r3
0x00, 0x00, 0x00, 0x00, // movw r12, CallBlockStartAddress
0x00, 0x00, 0x00, 0x00, // movt r12, CallBlockStartAddress
0xA0, 0xEB, 0x0C, 0x00, // sub r0, r12
0x00, 0x00, 0x00, 0x00, // mov r12, ThunkSize
0x60, 0x45, // cmp r0, r12
0x02, 0xD9, // bls $safe
0x4F, 0xF0, 0x00, 0x00, // mov r0, errorcode
0xFB, 0xDE, // Equivalent to int 0x29
//$safe:
0x02, 0xA8, // add r0,sp,#8
0x18, 0x47 // bx r3
};
constexpr BYTE JmpOffset = 2;
constexpr BYTE Call[] = {
0x88, 0x47, // blx r1
0x00, 0x00, 0x00, 0x00, // b.w epilog
0xFE, 0xDE, // int 3 ;Required for alignment
};
constexpr BYTE Epilog[] = {
0x5D, 0xF8, 0x04, 0xBB, // pop {r11}
0x5D, 0xF8, 0x14, 0xFB // ldr pc,[sp],#0x14
};
#elif defined(_M_ARM64)
#ifdef _WIN32
constexpr BYTE FunctionInfoOffset = 24;
constexpr BYTE FunctionProxyOffset = 28;
constexpr BYTE DynamicThunkAddressOffset = 32;
constexpr BYTE ThunkAddressOffset = 36;
#else
constexpr BYTE FunctionInfoOffset = 8;
constexpr BYTE FunctionProxyOffset = 12;
constexpr BYTE DynamicThunkAddressOffset = 16;
constexpr BYTE ThunkAddressOffset = 20;
#endif
//TODO: saravind :Implement Range Check for ARM64
constexpr BYTE InterpreterThunk[InterpreterThunkEmitter::InterpreterThunkSize] = {
#ifdef _WIN32
0xFD, 0x7B, 0xBB, 0xA9, //stp fp, lr, [sp, #-80]! ;Prologue
0xFD, 0x03, 0x00, 0x91, //mov fp, sp ;update frame pointer to the stack pointer
0xE0, 0x07, 0x01, 0xA9, //stp x0, x1, [sp, #16] ;Prologue again; save all registers
0xE2, 0x0F, 0x02, 0xA9, //stp x2, x3, [sp, #32]
0xE4, 0x17, 0x03, 0xA9, //stp x4, x5, [sp, #48]
0xE6, 0x1F, 0x04, 0xA9, //stp x6, x7, [sp, #64]
#else
0xFD, 0x7B, 0xBF, 0xA9, //stp fp, lr, [sp, #-16]! ;Prologue
0xFD, 0x03, 0x00, 0x91, //mov fp, sp ;update frame pointer to the stack pointer
#endif
0x02, 0x00, 0x40, 0xF9, //ldr x2, [x0, #0x00] ;offset will be replaced with Offset of FunctionInfo
0x40, 0x00, 0x40, 0xF9, //ldr x0, [x2, #0x00] ;offset will be replaced with Offset of FunctionProxy
0x03, 0x00, 0x40, 0xF9, //ldr x3, [x0, #0x00] ;offset will be replaced with offset of DynamicInterpreterThunk
//Following 4 MOV Instrs are to move the 64-bit address of the InterpreterThunk address into register x1.
0x00, 0x00, 0x00, 0x00, //movz x1, #0x00 ;This is overwritten with the actual thunk address(16 - 0 bits) move
0x00, 0x00, 0x00, 0x00, //movk x1, #0x00, lsl #16 ;This is overwritten with the actual thunk address(32 - 16 bits) move
0x00, 0x00, 0x00, 0x00, //movk x1, #0x00, lsl #32 ;This is overwritten with the actual thunk address(48 - 32 bits) move
0x00, 0x00, 0x00, 0x00, //movk x1, #0x00, lsl #48 ;This is overwritten with the actual thunk address(64 - 48 bits) move
0xE0, 0x43, 0x00, 0x91, //add x0, sp, #16
0x60, 0x00, 0x1F, 0xD6, //br x3
0xCC, 0xCC, 0xCC, 0xCC //int 3 for 8byte alignment
};
constexpr BYTE JmpOffset = 4;
constexpr BYTE Call[] = {
0x20, 0x00, 0x3f, 0xd6, // blr x1
0x00, 0x00, 0x00, 0x00 // b epilog
};
constexpr BYTE Epilog[] = {
#ifdef _WIN32
0xfd, 0x7b, 0xc5, 0xa8, // ldp fp, lr, [sp], #80
#else
0xfd, 0x7b, 0xc1, 0xa8, // ldp fp, lr, [sp], #16
#endif
0xc0, 0x03, 0x5f, 0xd6 // ret
};
#else // x86
constexpr BYTE FunctionInfoOffset = 8;
constexpr BYTE FunctionProxyOffset = 11;
constexpr BYTE DynamicThunkAddressOffset = 14;
constexpr BYTE CallBlockStartAddrOffset = 21;
constexpr BYTE ThunkSizeOffset = 26;
constexpr BYTE ErrorOffset = 33;
constexpr BYTE ThunkAddressOffset = 44;
constexpr BYTE InterpreterThunk[InterpreterThunkEmitter::InterpreterThunkSize] = {
0x55, // push ebp ;Prolog - setup the stack frame
0x8B, 0xEC, // mov ebp,esp
0x8B, 0x45, 0x08, // mov eax, dword ptr [ebp+8]
0x8B, 0x40, 0x00, // mov eax, dword ptr [eax+FunctionInfoOffset]
0x8B, 0x40, 0x00, // mov eax, dword ptr [eax+FunctionProxyOffset]
0x8B, 0x48, 0x00, // mov ecx, dword ptr [eax+DynamicThunkAddressOffset]
// Range Check for Valid call target
0x83, 0xE1, 0xF8, // and ecx, 0FFFFFFF8h
0x8b, 0xc1, // mov eax, ecx
0x2d, 0x00, 0x00, 0x00, 0x00, // sub eax, CallBlockStartAddress
0x3d, 0x00, 0x00, 0x00, 0x00, // cmp eax, ThunkSize
0x76, 0x07, // jbe SHORT $safe
0xb9, 0x00, 0x00, 0x00, 0x00, // mov ecx, errorcode
0xCD, 0x29, // int 29h
//$safe
0x8D, 0x45, 0x08, // lea eax, ebp+8
0x50, // push eax
0xB8, 0x00, 0x00, 0x00, 0x00, // mov eax, <thunk>
0xFF, 0xE1, // jmp ecx
0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC // int 3 for 8byte alignment
};
constexpr BYTE Epilog[] = {
0x5D, // pop ebp
0xC3 // ret
};
#endif
#if defined(_M_X64) || defined(_M_IX86)
constexpr BYTE JmpOffset = 3;
constexpr BYTE Call[] = {
0xFF, 0xD0, // call rax
0xE9, 0x00, 0x00, 0x00, 0x00, // jmp [offset]
0xCC, // int 3 ;for alignment to size of 8 we are adding this
};
#endif
constexpr BYTE HeaderSize = sizeof(InterpreterThunk);
} // anonymous namespace
const BYTE InterpreterThunkEmitter::ThunkSize = sizeof(Call);
InterpreterThunkEmitter::InterpreterThunkEmitter(Js::ScriptContext* context, ArenaAllocator* allocator, CustomHeap::InProcCodePageAllocators * codePageAllocators, bool isAsmInterpreterThunk) :
emitBufferManager(allocator, codePageAllocators, /*scriptContext*/ nullptr, nullptr, _u("Interpreter thunk buffer"), GetCurrentProcess()),
scriptContext(context),
allocator(allocator),
thunkCount(0),
thunkBuffer(nullptr),
isAsmInterpreterThunk(isAsmInterpreterThunk)
{
}
SListBase<ThunkBlock>*
InterpreterThunkEmitter::GetThunkBlocksList()
{
return &thunkBlocks;
}
//
// Returns the next thunk. Batch allocated PageCount pages of thunks and issue them one at a time
//
BYTE* InterpreterThunkEmitter::GetNextThunk(PVOID* ppDynamicInterpreterThunk)
{
Assert(ppDynamicInterpreterThunk);
Assert(*ppDynamicInterpreterThunk == nullptr);
if(thunkCount == 0)
{
if(!this->freeListedThunkBlocks.Empty())
{
return AllocateFromFreeList(ppDynamicInterpreterThunk);
}
if (!NewThunkBlock())
{
#ifdef ASMJS_PLAT
return this->isAsmInterpreterThunk ? (BYTE*)&Js::InterpreterStackFrame::StaticInterpreterAsmThunk : (BYTE*)&Js::InterpreterStackFrame::StaticInterpreterThunk;
#else
Assert(!this->isAsmInterpreterThunk);
return (BYTE*)&Js::InterpreterStackFrame::StaticInterpreterThunk;
#endif
}
}
Assert(this->thunkBuffer != nullptr);
BYTE* thunk = this->thunkBuffer;
#if _M_ARM
thunk = (BYTE*)((DWORD)thunk | 0x01);
#endif
*ppDynamicInterpreterThunk = thunk + HeaderSize + ((--thunkCount) * ThunkSize);
#if _M_ARM
AssertMsg(((uintptr_t)(*ppDynamicInterpreterThunk) & 0x6) == 0, "Not 8 byte aligned?");
#else
AssertMsg(((uintptr_t)(*ppDynamicInterpreterThunk) & 0x7) == 0, "Not 8 byte aligned?");
#endif
return thunk;
}
//
// Interpreter thunks have an entrypoint at the beginning of the page boundary. Each function has a unique thunk return address
// and this function can convert to the unique thunk return address to the beginning of the page which corresponds with the entrypoint
//
void* InterpreterThunkEmitter::ConvertToEntryPoint(PVOID dynamicInterpreterThunk)
{
Assert(dynamicInterpreterThunk != nullptr);
void* entryPoint = (void*)((size_t)dynamicInterpreterThunk & (~((size_t)(BlockSize) - 1)));
#if _M_ARM
entryPoint = (BYTE*)((DWORD)entryPoint | 0x01);
#endif
return entryPoint;
}
bool InterpreterThunkEmitter::NewThunkBlock()
{
if (this->scriptContext->GetConfig()->IsNoDynamicThunks())
{
return false;
}
#ifdef ENABLE_OOP_NATIVE_CODEGEN
if (CONFIG_FLAG(ForceStaticInterpreterThunk))
{
return false;
}
if (JITManager::GetJITManager()->IsOOPJITEnabled())
{
return NewOOPJITThunkBlock();
}
#endif
Assert(this->thunkCount == 0);
BYTE* buffer;
EmitBufferAllocation<VirtualAllocWrapper, PreReservedVirtualAllocWrapper> * allocation = emitBufferManager.AllocateBuffer(BlockSize, &buffer);
if (allocation == nullptr)
{
Js::Throw::OutOfMemory();
}
if (!emitBufferManager.ProtectBufferWithExecuteReadWriteForInterpreter(allocation))
{
Js::Throw::OutOfMemory();
}
#if PDATA_ENABLED
PRUNTIME_FUNCTION pdataStart = nullptr;
intptr_t epilogEnd = 0;
#endif
DWORD count = this->thunkCount;
FillBuffer(
this->scriptContext->GetThreadContext(),
this->isAsmInterpreterThunk,
(intptr_t)buffer,
BlockSize,
buffer,
#if PDATA_ENABLED
&pdataStart,
&epilogEnd,
#endif
&count
);
if (!emitBufferManager.CommitBufferForInterpreter(allocation, buffer, BlockSize))
{
Js::Throw::OutOfMemory();
}
// Call to set VALID flag for CFG check
BYTE* callTarget = buffer;
#ifdef _M_ARM
// We want to allow the actual callable value, so thumb-tag the address
callTarget = (BYTE*)((uintptr_t)buffer | 0x1);
#endif
ThreadContext::GetContextForCurrentThread()->SetValidCallTargetForCFG(callTarget);
// Update object state only at the end when everything has succeeded - and no exceptions can be thrown.
auto block = this->thunkBlocks.PrependNode(allocator, buffer, count);
#if PDATA_ENABLED
void* pdataTable;
PDataManager::RegisterPdata((PRUNTIME_FUNCTION)pdataStart, (ULONG_PTR)buffer, (ULONG_PTR)epilogEnd, &pdataTable);
block->SetPdata(pdataTable);
#else
Unused(block);
#endif
this->thunkBuffer = buffer;
this->thunkCount = count;
return true;
}
#ifdef ENABLE_OOP_NATIVE_CODEGEN
bool InterpreterThunkEmitter::NewOOPJITThunkBlock()
{
PSCRIPTCONTEXT_HANDLE remoteScriptContext = this->scriptContext->GetRemoteScriptAddr();
if (!JITManager::GetJITManager()->IsConnected())
{
return false;
}
InterpreterThunkInputIDL thunkInput;
thunkInput.asmJsThunk = this->isAsmInterpreterThunk;
InterpreterThunkOutputIDL thunkOutput;
HRESULT hr = JITManager::GetJITManager()->NewInterpreterThunkBlock(remoteScriptContext, &thunkInput, &thunkOutput);
if (!JITManager::HandleServerCallResult(hr, RemoteCallType::ThunkCreation))
{
return false;
}
BYTE* buffer = (BYTE*)thunkOutput.mappedBaseAddr;
if (!CONFIG_FLAG(OOPCFGRegistration))
{
BYTE* callTarget = buffer;
#ifdef _M_ARM
// Need to register the thumb-tagged call target for CFG
callTarget = (BYTE*)((uintptr_t)callTarget | 0x1);
#endif
this->scriptContext->GetThreadContext()->SetValidCallTargetForCFG(callTarget);
}
// Update object state only at the end when everything has succeeded - and no exceptions can be thrown.
auto block = this->thunkBlocks.PrependNode(allocator, buffer, thunkOutput.thunkCount);
#if PDATA_ENABLED
void* pdataTable;
PDataManager::RegisterPdata((PRUNTIME_FUNCTION)thunkOutput.pdataTableStart, (ULONG_PTR)thunkOutput.mappedBaseAddr, (ULONG_PTR)thunkOutput.epilogEndAddr, &pdataTable);
block->SetPdata(pdataTable);
#else
Unused(block);
#endif
this->thunkBuffer = (BYTE*)thunkOutput.mappedBaseAddr;
this->thunkCount = thunkOutput.thunkCount;
return true;
}
#endif
/* static */
void InterpreterThunkEmitter::FillBuffer(
_In_ ThreadContextInfo * threadContext,
_In_ bool asmJsThunk,
_In_ intptr_t finalAddr,
_In_ size_t bufferSize,
_Out_writes_bytes_all_(BlockSize) BYTE* buffer,
#if PDATA_ENABLED
_Out_ PRUNTIME_FUNCTION * pdataTableStart,
_Out_ intptr_t * epilogEndAddr,
#endif
_Out_ DWORD * thunkCount
)
{
#ifdef _M_X64
PrologEncoder prologEncoder;
prologEncoder.EncodeSmallProlog(PrologSize, StackAllocSize);
DWORD pdataSize = prologEncoder.SizeOfPData();
#elif defined(_M_ARM32_OR_ARM64)
DWORD pdataSize = sizeof(RUNTIME_FUNCTION);
#else
DWORD pdataSize = 0;
#endif
DWORD bytesRemaining = BlockSize;
DWORD bytesWritten = 0;
DWORD thunks = 0;
DWORD epilogSize = sizeof(Epilog);
const BYTE *epilog = Epilog;
const BYTE *header = InterpreterThunk;
intptr_t interpreterThunk;
// the static interpreter thunk invoked by the dynamic emitted thunk
#ifdef ASMJS_PLAT
if (asmJsThunk)
{
interpreterThunk = ShiftAddr(threadContext, &Js::InterpreterStackFrame::InterpreterAsmThunk);
}
else
#endif
{
interpreterThunk = ShiftAddr(threadContext, &Js::InterpreterStackFrame::InterpreterThunk);
}
BYTE * currentBuffer = buffer;
// Ensure there is space for PDATA at the end
BYTE* pdataStart = currentBuffer + (BlockSize - Math::Align(pdataSize, EMIT_BUFFER_ALIGNMENT));
BYTE* epilogStart = pdataStart - Math::Align(epilogSize, EMIT_BUFFER_ALIGNMENT);
// Ensure there is space for PDATA at the end
intptr_t finalPdataStart = finalAddr + (BlockSize - Math::Align(pdataSize, EMIT_BUFFER_ALIGNMENT));
intptr_t finalEpilogStart = finalPdataStart - Math::Align(epilogSize, EMIT_BUFFER_ALIGNMENT);
// Copy the thunk buffer and modify it.
js_memcpy_s(currentBuffer, bytesRemaining, header, HeaderSize);
EncodeInterpreterThunk(currentBuffer, finalAddr, finalEpilogStart, epilogSize, interpreterThunk);
currentBuffer += HeaderSize;
bytesRemaining -= HeaderSize;
// Copy call buffer
DWORD callSize = sizeof(Call);
while (currentBuffer < epilogStart - callSize)
{
js_memcpy_s(currentBuffer, bytesRemaining, Call, callSize);
#if _M_ARM
int offset = (epilogStart - (currentBuffer + JmpOffset));
Assert(offset >= 0);
DWORD encodedOffset = EncoderMD::BranchOffset_T2_24(offset);
DWORD encodedBranch = /*opcode=*/ 0x9000F000 | encodedOffset;
Emit(currentBuffer, JmpOffset, encodedBranch);
#elif _M_ARM64
int64 offset = (epilogStart - (currentBuffer + JmpOffset));
Assert(offset >= 0);
DWORD encodedOffset = EncoderMD::BranchOffset_26(offset);
DWORD encodedBranch = /*opcode=*/ 0x14000000 | encodedOffset;
Emit(currentBuffer, JmpOffset, encodedBranch);
#else
// jump requires an offset from the end of the jump instruction.
int offset = (int)(epilogStart - (currentBuffer + JmpOffset + sizeof(int)));
Assert(offset >= 0);
Emit(currentBuffer, JmpOffset, offset);
#endif
currentBuffer += callSize;
bytesRemaining -= callSize;
thunks++;
}
// Fill any gap till start of epilog
bytesWritten = FillDebugBreak(currentBuffer, (DWORD)(epilogStart - currentBuffer));
bytesRemaining -= bytesWritten;
currentBuffer += bytesWritten;
// Copy epilog
bytesWritten = CopyWithAlignment(currentBuffer, bytesRemaining, epilog, epilogSize, EMIT_BUFFER_ALIGNMENT);
currentBuffer += bytesWritten;
bytesRemaining -= bytesWritten;
// Generate and register PDATA
#if PDATA_ENABLED
BYTE* epilogEnd = epilogStart + epilogSize;
DWORD functionSize = (DWORD)(epilogEnd - buffer);
Assert(pdataStart == currentBuffer);
#ifdef _M_X64
Assert(bytesRemaining >= pdataSize);
BYTE* pdata = prologEncoder.Finalize(buffer, functionSize, pdataStart);
bytesWritten = CopyWithAlignment(pdataStart, bytesRemaining, pdata, pdataSize, EMIT_BUFFER_ALIGNMENT);
#elif defined(_M_ARM32_OR_ARM64)
RUNTIME_FUNCTION pdata;
GeneratePdata(buffer, functionSize, &pdata);
bytesWritten = CopyWithAlignment(pdataStart, bytesRemaining, (const BYTE*)&pdata, pdataSize, EMIT_BUFFER_ALIGNMENT);
#endif
*pdataTableStart = (PRUNTIME_FUNCTION)finalPdataStart;
*epilogEndAddr = finalEpilogStart;
#endif
*thunkCount = thunks;
}
#if _M_ARM
void InterpreterThunkEmitter::EncodeInterpreterThunk(
__in_bcount(InterpreterThunkSize) BYTE* thunkBuffer,
__in const intptr_t thunkBufferStartAddress,
__in const intptr_t epilogStart,
__in const DWORD epilogSize,
__in const intptr_t interpreterThunk)
{
// Encode MOVW
DWORD lowerThunkBits = (uint32)interpreterThunk & 0x0000FFFF;
DWORD movW = EncodeMove(/*Opcode*/ 0x0000F240, /*register*/1, lowerThunkBits);
Emit(thunkBuffer,ThunkAddressOffset, movW);
// Encode MOVT
DWORD higherThunkBits = ((uint32)interpreterThunk & 0xFFFF0000) >> 16;
DWORD movT = EncodeMove(/*Opcode*/ 0x0000F2C0, /*register*/1, higherThunkBits);
Emit(thunkBuffer, ThunkAddressOffset + sizeof(movW), movT);
// Encode LDR - Load of function Body
thunkBuffer[FunctionInfoOffset] = Js::JavascriptFunction::GetOffsetOfFunctionInfo();
thunkBuffer[FunctionProxyOffset] = Js::FunctionInfo::GetOffsetOfFunctionProxy();
// Encode LDR - Load of interpreter thunk number
thunkBuffer[DynamicThunkAddressOffset] = Js::FunctionBody::GetOffsetOfDynamicInterpreterThunk();
// Encode MOVW R12, CallBlockStartAddress
uintptr_t callBlockStartAddress = (uintptr_t)thunkBufferStartAddress + HeaderSize;
uint totalThunkSize = (uint)(epilogStart - callBlockStartAddress);
DWORD lowerCallBlockStartAddress = callBlockStartAddress & 0x0000FFFF;
DWORD movWblockStart = EncodeMove(/*Opcode*/ 0x0000F240, /*register*/12, lowerCallBlockStartAddress);
Emit(thunkBuffer,CallBlockStartAddressInstrOffset, movWblockStart);
// Encode MOVT R12, CallBlockStartAddress
DWORD higherCallBlockStartAddress = (callBlockStartAddress & 0xFFFF0000) >> 16;
DWORD movTblockStart = EncodeMove(/*Opcode*/ 0x0000F2C0, /*register*/12, higherCallBlockStartAddress);
Emit(thunkBuffer, CallBlockStartAddressInstrOffset + sizeof(movWblockStart), movTblockStart);
//Encode MOV R12, CallBlockSize
DWORD movBlockSize = EncodeMove(/*Opcode*/ 0x0000F240, /*register*/12, (DWORD)totalThunkSize);
Emit(thunkBuffer, CallThunkSizeInstrOffset, movBlockSize);
Emit(thunkBuffer, ErrorOffset, (BYTE) FAST_FAIL_INVALID_ARG);
}
DWORD InterpreterThunkEmitter::EncodeMove(DWORD opCode, int reg, DWORD imm16)
{
DWORD encodedMove = reg << 24;
#if _M_ARM
DWORD encodedImm = 0;
EncoderMD::EncodeImmediate16(imm16, &encodedImm);
encodedMove |= encodedImm;
#elif _M_ARM64
// ToDo (SaAgarwa) - From Aaron change. Validate for ARM64
encodedMove |= (imm16 & 0xFFFF) << 5;
#endif
AssertMsg((encodedMove & opCode) == 0, "Any bits getting overwritten?");
encodedMove |= opCode;
return encodedMove;
}
void InterpreterThunkEmitter::GeneratePdata(_In_ const BYTE* entryPoint, _In_ const DWORD functionSize, _Out_ RUNTIME_FUNCTION* function)
{
function->BeginAddress = 0x1; // Since our base address is the start of the function - this is offset from the base address
function->Flag = 1; // Packed unwind data is used
function->FunctionLength = functionSize / 2;
function->Ret = 0; // Return via Pop
function->H = 1; // Homes parameters
function->Reg = 7; // No saved registers - R11 is the frame pointer - not considered here
function->R = 1; // No registers are being saved.
function->L = 1; // Save/restore LR register
function->C = 1; // Frame pointer chain in R11 established
function->StackAdjust = 0; // Stack allocation for the function
}
#elif _M_ARM64
void InterpreterThunkEmitter::EncodeInterpreterThunk(
__in_bcount(InterpreterThunkSize) BYTE* thunkBuffer,
__in const intptr_t thunkBufferStartAddress,
__in const intptr_t epilogStart,
__in const DWORD epilogSize,
__in const intptr_t interpreterThunk)
{
int addrOffset = ThunkAddressOffset;
// Following 4 MOV Instrs are to move the 64-bit address of the InterpreterThunk address into register x1.
// Encode MOVZ (movz x1, #<interpreterThunk 16-0 bits>)
DWORD lowerThunkBits = (uint64)interpreterThunk & 0x0000FFFF;
DWORD movZ = EncodeMove(/*Opcode*/ 0xD2800000, /*register x1*/1, lowerThunkBits); // no shift; hw = 00
Emit(thunkBuffer,addrOffset, movZ);
static_assert(sizeof(movZ) == 4, "movZ has to be 32-bit encoded");
addrOffset+= sizeof(movZ);
// Encode MOVK (movk x1, #<interpreterThunk 32-16 bits>, lsl #16)
DWORD higherThunkBits = ((uint64)interpreterThunk & 0xFFFF0000) >> 16;
DWORD movK = EncodeMove(/*Opcode*/ 0xF2A00000, /*register x1*/1, higherThunkBits); // left shift 16 bits; hw = 01
Emit(thunkBuffer, addrOffset, movK);
static_assert(sizeof(movK) == 4, "movK has to be 32-bit encoded");
addrOffset+= sizeof(movK);
// Encode MOVK (movk x1, #<interpreterThunk 48-32 bits>, lsl #16)
higherThunkBits = ((uint64)interpreterThunk & 0xFFFF00000000) >> 32;
movK = EncodeMove(/*Opcode*/ 0xF2C00000, /*register x1*/1, higherThunkBits); // left shift 32 bits; hw = 02
Emit(thunkBuffer, addrOffset, movK);
addrOffset += sizeof(movK);
// Encode MOVK (movk x1, #<interpreterThunk 64-48 bits>, lsl #16)
higherThunkBits = ((uint64)interpreterThunk & 0xFFFF000000000000) >> 48;
movK = EncodeMove(/*Opcode*/ 0xF2E00000, /*register x1*/1, higherThunkBits); // left shift 48 bits; hw = 03
Emit(thunkBuffer, addrOffset, movK);
// Encode LDR - Load of function Body
ULONG offsetOfFunctionInfo = Js::JavascriptFunction::GetOffsetOfFunctionInfo();
AssertMsg(offsetOfFunctionInfo % 8 == 0, "Immediate offset for LDR must be 8 byte aligned");
AssertMsg(offsetOfFunctionInfo < 0x8000, "Immediate offset for LDR must be less than 0x8000");
*(PULONG)&thunkBuffer[FunctionInfoOffset] |= (offsetOfFunctionInfo / 8) << 10;
ULONG offsetOfFunctionProxy = Js::FunctionInfo::GetOffsetOfFunctionProxy();
AssertMsg(offsetOfFunctionProxy % 8 == 0, "Immediate offset for LDR must be 8 byte aligned");
AssertMsg(offsetOfFunctionProxy < 0x8000, "Immediate offset for LDR must be less than 0x8000");
*(PULONG)&thunkBuffer[FunctionProxyOffset] |= (offsetOfFunctionProxy / 8) << 10;
// Encode LDR - Load of interpreter thunk number
ULONG offsetOfDynamicInterpreterThunk = Js::FunctionBody::GetOffsetOfDynamicInterpreterThunk();
AssertMsg(offsetOfDynamicInterpreterThunk % 8 == 0, "Immediate offset for LDR must be 8 byte aligned");
AssertMsg(offsetOfDynamicInterpreterThunk < 0x8000, "Immediate offset for LDR must be less than 0x8000");
*(PULONG)&thunkBuffer[DynamicThunkAddressOffset] |= (offsetOfDynamicInterpreterThunk / 8) << 10;
}
DWORD InterpreterThunkEmitter::EncodeMove(DWORD opCode, int reg, DWORD imm16)
{
DWORD encodedMove = reg << 0;
#if _M_ARM
DWORD encodedImm = 0;
EncoderMD::EncodeImmediate16(imm16, &encodedImm);
encodedMove |= encodedImm;
#elif _M_ARM64
// ToDo (SaAgarwa) - From Aaron change. Validate for ARM64
encodedMove |= (imm16 & 0xFFFF) << 5;
#endif
AssertMsg((encodedMove & opCode) == 0, "Any bits getting overwritten?");
encodedMove |= opCode;
return encodedMove;
}
void InterpreterThunkEmitter::GeneratePdata(_In_ const BYTE* entryPoint, _In_ const DWORD functionSize, _Out_ RUNTIME_FUNCTION* function)
{
function->BeginAddress = 0x0; // Since our base address is the start of the function - this is offset from the base address
function->Flag = 1; // Packed unwind data is used
function->FunctionLength = functionSize / 4;
function->RegF = 0; // number of non-volatile FP registers (d8-d15) saved in the canonical stack location
function->RegI = 0; // number of non-volatile INT registers (r19-r28) saved in the canonical stack location
function->H = 1; // Homes parameters
// (indicating whether the function "homes" the integer parameter registers (r0-r7) by storing them at the very start of the function)
function->CR = 3; // chained function, a store/load pair instruction is used in prolog/epilog <r29,lr>
function->FrameSize = 5; // the number of bytes of stack that is allocated for this function divided by 16
}
#else
void InterpreterThunkEmitter::EncodeInterpreterThunk(
__in_bcount(InterpreterThunkSize) BYTE* thunkBuffer,
__in const intptr_t thunkBufferStartAddress,
__in const intptr_t epilogStart,
__in const DWORD epilogSize,
__in const intptr_t interpreterThunk)
{
Emit(thunkBuffer, ThunkAddressOffset, (uintptr_t)interpreterThunk);
thunkBuffer[DynamicThunkAddressOffset] = Js::FunctionBody::GetOffsetOfDynamicInterpreterThunk();
thunkBuffer[FunctionInfoOffset] = Js::JavascriptFunction::GetOffsetOfFunctionInfo();
thunkBuffer[FunctionProxyOffset] = Js::FunctionInfo::GetOffsetOfFunctionProxy();
Emit(thunkBuffer, CallBlockStartAddrOffset, (uintptr_t) thunkBufferStartAddress + HeaderSize);
uint totalThunkSize = (uint)(epilogStart - (thunkBufferStartAddress + HeaderSize));
Emit(thunkBuffer, ThunkSizeOffset, totalThunkSize);
Emit(thunkBuffer, ErrorOffset, (BYTE) FAST_FAIL_INVALID_ARG);
}
#endif
/*static*/
DWORD InterpreterThunkEmitter::FillDebugBreak(_Out_writes_bytes_all_(count) BYTE* dest, _In_ DWORD count)
{
#if defined(_M_ARM)
Assert(count % 2 == 0);
#elif defined(_M_ARM64)
Assert(count % 4 == 0);
#endif
CustomHeap::FillDebugBreak(dest, count);
return count;
}
/*static*/
DWORD InterpreterThunkEmitter::CopyWithAlignment(
_Out_writes_bytes_all_(sizeInBytes) BYTE* dest,
_In_ const DWORD sizeInBytes,
_In_reads_bytes_(srcSize) const BYTE* src,
_In_ const DWORD srcSize,
_In_ const DWORD alignment)
{
js_memcpy_s(dest, sizeInBytes, src, srcSize);
dest += srcSize;
DWORD alignPad = Math::Align(srcSize, alignment) - srcSize;
Assert(alignPad <= (sizeInBytes - srcSize));
if(alignPad > 0 && alignPad <= (sizeInBytes - srcSize))
{
FillDebugBreak(dest, alignPad);
return srcSize + alignPad;
}
return srcSize;
}
#if DBG
bool
InterpreterThunkEmitter::IsInHeap(void* address)
{
#ifdef ENABLE_OOP_NATIVE_CODEGEN
if (JITManager::GetJITManager()->IsOOPJITEnabled())
{
PSCRIPTCONTEXT_HANDLE remoteScript = this->scriptContext->GetRemoteScriptAddr(false);
if (!remoteScript || !JITManager::GetJITManager()->IsConnected())
{
// this method is used in asserts to validate whether an entry point is valid
// in case JIT process crashed, let's just say true to keep asserts from firing
return true;
}
boolean result;
HRESULT hr = JITManager::GetJITManager()->IsInterpreterThunkAddr(remoteScript, (intptr_t)address, this->isAsmInterpreterThunk, &result);
if (!JITManager::HandleServerCallResult(hr, RemoteCallType::HeapQuery))
{
return true;
}
return result != FALSE;
}
else
#endif
{
return emitBufferManager.IsInHeap(address);
}
}
#endif
// We only decommit at close because there might still be some
// code running here.
// The destructor of emitBufferManager will cause the eventual release.
void InterpreterThunkEmitter::Close()
{
#if PDATA_ENABLED
auto unregisterPdata = ([&] (const ThunkBlock& block)
{
PDataManager::UnregisterPdata((PRUNTIME_FUNCTION) block.GetPdata());
});
thunkBlocks.Iterate(unregisterPdata);
freeListedThunkBlocks.Iterate(unregisterPdata);
#endif
this->thunkBlocks.Clear(allocator);
this->freeListedThunkBlocks.Clear(allocator);
#ifdef ENABLE_OOP_NATIVE_CODEGEN
if (JITManager::GetJITManager()->IsOOPJITEnabled())
{
PSCRIPTCONTEXT_HANDLE remoteScript = this->scriptContext->GetRemoteScriptAddr(false);
if (remoteScript && JITManager::GetJITManager()->IsConnected())
{
JITManager::GetJITManager()->DecommitInterpreterBufferManager(remoteScript, this->isAsmInterpreterThunk);
}
}
else
#endif
{
emitBufferManager.Decommit();
}
this->thunkBuffer = nullptr;
this->thunkCount = 0;
}
void InterpreterThunkEmitter::Release(BYTE* thunkAddress, bool addtoFreeList)
{
if(!addtoFreeList)
{
return;
}
auto predicate = ([=] (const ThunkBlock& block)
{
return block.Contains(thunkAddress);
});
ThunkBlock* block = freeListedThunkBlocks.Find(predicate);
if(!block)
{
block = thunkBlocks.MoveTo(&freeListedThunkBlocks, predicate);
}
// if EnsureFreeList fails in an OOM scenario - we just leak the thunks
if(block && block->EnsureFreeList(allocator))
{
block->Release(thunkAddress);
}
}
BYTE* InterpreterThunkEmitter::AllocateFromFreeList(PVOID* ppDynamicInterpreterThunk )
{
ThunkBlock& block = this->freeListedThunkBlocks.Head();
BYTE* thunk = block.AllocateFromFreeList();
#if _M_ARM
thunk = (BYTE*)((DWORD)thunk | 0x01);
#endif
if(block.IsFreeListEmpty())
{
this->freeListedThunkBlocks.MoveHeadTo(&this->thunkBlocks);
}
*ppDynamicInterpreterThunk = thunk;
BYTE* entryPoint = block.GetStart();
#if _M_ARM
entryPoint = (BYTE*)((DWORD)entryPoint | 0x01);
#endif
return entryPoint;
}
bool ThunkBlock::Contains(BYTE* address) const
{
bool contains = address >= start && address < (start + InterpreterThunkEmitter::BlockSize);
return contains;
}
void ThunkBlock::Release(BYTE* address)
{
Assert(Contains(address));
Assert(this->freeList);
BVIndex index = FromThunkAddress(address);
this->freeList->Set(index);
}
BYTE* ThunkBlock::AllocateFromFreeList()
{
Assert(this->freeList);
BVIndex index = this->freeList->GetNextBit(0);
BYTE* address = ToThunkAddress(index);
this->freeList->Clear(index);
return address;
}
BVIndex ThunkBlock::FromThunkAddress(BYTE* address)
{
uint index = ((uint)(address - start) - HeaderSize) / InterpreterThunkEmitter::ThunkSize;
Assert(index < this->thunkCount);
return index;
}
BYTE* ThunkBlock::ToThunkAddress(BVIndex index)
{
Assert(index < this->thunkCount);
BYTE* address = start + HeaderSize + InterpreterThunkEmitter::ThunkSize * index;
return address;
}
bool ThunkBlock::EnsureFreeList(ArenaAllocator* allocator)
{
if(!this->freeList)
{
this->freeList = BVFixed::NewNoThrow(this->thunkCount, allocator);
}
return this->freeList != nullptr;
}
bool ThunkBlock::IsFreeListEmpty() const
{
Assert(this->freeList);
return this->freeList->IsAllClear();
}
#endif // ENABLE_NATIVE_CODEGEN