-
Notifications
You must be signed in to change notification settings - Fork 48
/
x86.js
1642 lines (1449 loc) · 64.2 KB
/
x86.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Copyright (C) 2017-2018 deroad, elicn
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
module.exports = (function() {
const Base = require('libdec/core/base');
const Variable = require('libdec/core/variable');
const Extra = require('libdec/core/extra');
const Syscalls = require('libdec/db/syscalls');
const Long = require('libdec/long');
/**
* Maps a return register to its corresponding size in bits, This is used to
* determine the size of the returned value according to the register that is
* used to return it.
* @type {Object<string,number>}
*/
var _return_regs_bits = {
'al': 8,
'ax': 16,
'eax': 32,
'rax': 64,
};
/**
* General purpose registers, plus a few others
* @type {Array.<string>}
*/
var _x86_x64_registers = [
'rax', 'eax', 'ax', 'al', 'ah',
'rbx', 'ebx', 'bx', 'bl', 'bh',
'rcx', 'ecx', 'cx', 'cl', 'ch',
'rdx', 'edx', 'dx', 'dl', 'dh',
'rsi', 'esi', 'si', 'sil',
'rdi', 'edi', 'di', 'dil',
'rbp', 'ebp', 'bp', 'bpl',
'rsp', 'esp', 'sp', 'spl',
'r8', 'r8d', 'r8w', 'r8b',
'r9', 'r9d', 'r9w', 'r9b',
'r10', 'r10d', 'r10w', 'r10b',
'r11', 'r11d', 'r11w', 'r11b',
'r12', 'r12d', 'r12w', 'r12b',
'r13', 'r13d', 'r13w', 'r13b',
'r14', 'r14d', 'r14w', 'r14b',
'r15', 'r15d', 'r15w', 'r15b'
];
var _REGEX_STACK_REG = /^[re]?sp$/;
/**
* Indicates whether a register name is the system's stack pointer.
* @param {string} name A string literal
* @returns {boolean}
*/
var _is_stack_reg = function(name) {
return name && _REGEX_STACK_REG.test(name);
};
var _is_xmm = function(op) {
return op.token && op.token.startsWith('xmm');
};
var _REGEX_FRAME_REG = /^[re]?bp$/;
/**
* Indicates whether a register name is the system's frame pointer.
* @param {string} name A string literal
* @returns {boolean}
*/
var _is_frame_reg = function(name) {
return name && _REGEX_FRAME_REG.test(name);
};
/**
* Indicates whether the current function has an argument with a specified named.
* @param {string} name A string literal
* @returns {boolean} `true` if function has an argument named `name, `false` otherwise
*/
var _is_func_arg = function(name, context) {
return context.args.some(function(a) {
return (a.name === name);
});
};
/**
* Indicates whether the current function has a local variable named `name`.
* @param {string} name A string literal
* @param {Object} context
* @returns {boolean}
*/
var _is_local_var = function(name, context) {
return context.vars.some(function(v) {
return (v.name === name);
});
};
var _is_stack_based_local_var = function(name, context) {
return context.vars.some(function(v) {
return (v.name === name) && (_is_stack_reg(v.ref.base));
});
};
/**
* For a given local variable `name`, retreive its offset from the frame pointer. This become handy when
* local variables are referred by their name, but there is a need to query their offset; e.g. returns
* 16 when a variable is referred by rbp + 16.
* @param {string} name A string literal
* @param {Object} context
* @returns {number} Offset from frame pointer (in bytes), or undefined if no such variable name or
* variable exists, but is not on stack
*/
var _get_var_offset = function(name, context) {
// TODO: could be done simply with 'find' on ES6, unfortunately Duktape does not recognize it yet
var info;
context.vars.forEach(function(v) {
if (v.name == name) {
info = v;
}
});
return info ? info.ref.offset.low : undefined;
};
/**
* Updates function's return value properties if necessary. This is used to
* track the result register in order to determine what the function is going
* to return (if any) in terms of exact register name and size.
* If given register is not a return value register, nothing is changed.
* @param {string} reg Modified register name
* @param {boolean} signed Value is signed?
* @param {object} context Conetxt object
*/
var _has_changed_return = function(reg, signed, context) {
if (_return_regs_bits[reg] > context.returns.bits) {
context.returns.bits = _return_regs_bits[reg];
context.returns.signed = signed;
}
};
/**
* Queries whether a given instruction is the last instruction in function.
* @param {Object} instr Instruction instance to check
* @param {Array<Object>} instructions Array of all instructions in the enclosing function
* @returns {boolean} `true` if given instruction appears as the last instruction in function, `false` otherwise
*/
var _is_last_instruction = function(instr, instructions) {
return instructions.indexOf(instr) == (instructions.length - 1);
};
/**
* Queries whether a given instruction jumps to a location external to its enclosing function.
* @param {Object} instr Instruction instance to check
* @param {Array<Object>} instructions Array of all instructions in the enclosing function
* @returns {boolean} `true` if given instruction jumps outside of its enclosing function, `false` otherwise
*/
var _is_jumping_externally = function(instr, instructions) {
var first_inst = instructions[0];
var last_inst = instructions[instructions.length - 1];
return instr.jump && (instr.jump.gt(last_inst) || instr.jump.lt(first_inst.location));
};
/**
* Determines the size (in bits) of a given register name.
* @param {string} reg Register name
* @returns {!number}
*/
var _find_bits = function(reg) {
var elems = reg.match(/([re])?(.?[^dwhl]?)([dwhl])?/);
// reg string will be splitted into an array of 4, where:
// [0]: match string
// [1]: prefix (either 'r', 'e' or undefined)
// [2]: reg name
// [3]: suffix (either 'h', 'l', 'w', 'd' or undefined)
//
// when coming to determine the register size, the aforementioned elements are inspected in a certain order
// to look at the first that it isn't undefined: suffix -> prefix -> name
var sz;
if (elems[3] != undefined) {
sz = {
'h': 8,
'l': 8,
'w': 16,
'd': 32
}[elems[3]];
} else if (elems[1] != undefined) {
sz = {
'e': 32,
'r': 64
}[elems[1]];
} else {
// if neither suffix nor prefix are defined, test name for avx regs
var avx_elems = elems[2].match(/([xyz])mm\d+/);
if (avx_elems) {
sz = {
'x': 128,
'y': 256,
'z': 512
}[avx_elems[1]];
} else {
sz = 16;
}
}
return sz;
};
/**
* Get the number of operands populated for this instruction.
* @param {Object} p Parsed instruction structure
* @returns {number} Number of populated operands
*/
var _num_operands = function(p) {
var operands = p.opd.slice();
while (operands.length > 0) {
if (operands.pop().token != undefined) {
return operands.length + 1;
}
}
return 0;
};
/**
* Handles most of arithmetic and bitwise operations.
* @param {Object} p Parsed instruction structure
* @param {Object} op Operator constructor to use
* @param {boolean} flags Whether this operation affects system's flags (for conditions)
* @param {Object} context Context object
* @returns {Object} Instruction instance representing the required operation
*/
var _math_common = function(p, op, flags, context) {
var lhand = p.opd[0];
var rhand = p.opd[1];
var signed = context.returns.signed;
// stack pointer manipulations are ignored
if (_is_stack_reg(lhand.token)) {
return null;
}
_has_changed_return(lhand.token, signed, context);
var lhand_arg = lhand.mem_access ? Variable.pointer(lhand.token, lhand.mem_access, signed) : lhand.token;
var rhand_arg = rhand.mem_access ? Variable.pointer(rhand.token, rhand.mem_access, signed) : rhand.token;
if (flags) {
context.cond.a = lhand_arg;
context.cond.b = '0';
}
// lhand = lhand op rhand
return op(lhand_arg, lhand_arg, rhand_arg);
};
/**
* Handles arithmetic divisions.
* @param {Object} p Parsed instruction structure
* @param {boolean} signed Signed operation or operands
* @param {Object} context Context object
*/
var _math_divide = function(p, signed, context) {
var divisor = p.opd[0];
var divisor_is_ptr = !!divisor.mem_access;
var osize = divisor.mem_access || _find_bits(divisor.token);
var dividend = {
8: ['ax'],
16: ['dx', 'ax'],
32: ['edx', 'eax'],
64: ['rdx', 'rax']
}[osize];
var remainder = {
8: 'ah',
16: 'dx',
32: 'edx',
64: 'rdx',
}[osize];
var quotient = {
8: 'al',
16: 'ax',
32: 'eax',
64: 'rax'
}[osize];
_has_changed_return(quotient, signed, context);
var dividend_type = divisor_is_ptr ? 'pointer' : 'local';
var arg_dividend = Variable[dividend_type](dividend.join(':'), osize, signed);
var arg_quotient = Variable.local(quotient, osize, signed);
var arg_remainder = Variable.local(remainder, osize, signed);
// quotient = dividend / divisor
// remainder = dividend % divisor
return Base.composed([
new Base.divide(arg_quotient, arg_dividend, divisor.token),
new Base.module(arg_remainder, arg_dividend, divisor.token)
]);
};
/**
* Handles arithmetic multiplications.
* @param {Object} p Parsed instruction structure
* @param {boolean} signed Signed operation or operands
* @param {Object} context Context object
* @returns {Object} Multiply instruction instance
*/
var _math_multiply = function(p, signed, context) {
var multiplier;
var multiplicand;
var destination;
// operation size: this is determined by the size of the first operand
var osize = p.opd[0].mem_access || _find_bits(p.opd[0].token);
// while the "mul" instruction supports only one variant, in which there is only one operand, the
// "imul" instruction supports three of them: with one, two or three operands. each of which has
// a different meaning for the operands.
switch (_num_operands(p)) {
case 3:
multiplier = p.opd[2];
multiplicand = p.opd[1];
destination = [p.opd[0].token];
break;
case 2:
multiplier = p.opd[1];
multiplicand = p.opd[0];
destination = [p.opd[0].token];
break;
case 1:
multiplier = p.opd[0];
multiplicand = {
token: {
8: 'al',
16: 'ax',
32: 'eax',
64: 'rax'
}[osize]
};
destination = {
8: ['ax'],
16: ['dx', 'ax'],
32: ['edx', 'eax'],
64: ['rdx', 'rax']
}[osize];
break;
}
_has_changed_return(destination[destination.length - 1], signed, context);
var multiplicand_type = multiplicand.mem_access ? 'pointer' : 'local';
var multiplier_type = multiplier.mem_access ? 'pointer' : 'local';
var arg_destination = Variable.local(destination.join(':'), osize * destination.length, signed, false, false);
var arg_multiplicand = Variable[multiplicand_type](multiplicand.token, osize, signed);
var arg_multiplier = Variable[multiplier_type](multiplier.token, osize, signed);
// destination = multiplicand * multiplier
return Base.multiply(arg_destination, arg_multiplicand, arg_multiplier);
};
/**
* Handles bitwise rotation operations.
* @param {Object} p Parsed instruction structure
* @param {Object} op Operator constructor to use
* @param {Object} context Context object
* @returns {Object} Bitwise rotation instruction instance
*/
var _bitwise_rotate = function(p, op, context) {
var lhand = p.opd[0];
var rhand = p.opd[1];
var signed = context.returns.signed;
_has_changed_return(lhand.token, signed, context);
var lhand_arg = lhand.mem_access ? Variable.pointer(lhand.token, lhand.mem_access, signed) : lhand.token;
var rhand_arg = rhand.mem_access ? Variable.pointer(rhand.token, rhand.mem_access, signed) : rhand.token;
// lhand = lhand op rhand
return op(lhand_arg, lhand_arg, rhand_arg, lhand.mem_access || _find_bits(lhand.token));
};
/**
* Handles SETcc (conditional set) instructions.
* @param {Object} p Parsed instruction structure
* @param {boolean} signed Signed operation or operands
* @param {string} condition Operation string literal
* @param {Object} context Context object
*/
var _setcc_common = function(p, signed, condition, context) {
var dest = p.opd[0];
_has_changed_return(dest.token, signed, context);
// destination, source_a, source_b, cond, src_true, src_false
return Base.conditional_assign(dest.token, context.cond.a, context.cond.b, condition, '1', '0');
};
/**
* Handles Jcc (conditional jump) instructions.
* @param {Object} p Parsed instruction structure
* @param {Object} context Context object
* @param {string} type Condition type symbol
*/
var _jcc_common = function(instr, context, type) {
instr.conditional(context.cond.a, context.cond.b, type);
return Base.nop();
};
/**
* Handles CMOV (conditional mov) instructions.
* @param {Object} p Parsed instruction structure
* @param {Object} context Context object
* @param {Array<Object>} instrs Array of function's instructions
* @param {string} type Condition type symbol
*/
var _cmov_common = function(instr, context, instrs, type) {
instr.conditional(context.cond.a, context.cond.b, type);
instr.jump = instrs[instrs.indexOf(instr) + 1].location;
return _standard_mov(instr, context, instrs);
};
// TODO: the following function should be moved to a higher analysis level, and be applied by operand size
// rather than the number of bits in the architecture.
/**
* Convert known magic values known to represent negative numbers.
* @param {string} x Value string
* @returns {string} Negative representation of `x` if known to be a negative value, `x` otherwise
*/
var _check_known_neg = function(x) {
var arch_minus_one;
switch (Global.evars.archbits) {
case 64:
arch_minus_one = '0xffffffffffffffff';
break;
case 32:
arch_minus_one = '0xffffffff';
break;
case 16:
arch_minus_one = '0xffff';
break;
}
return (x === arch_minus_one ? '-1' : x);
};
/**
* Try to guess the number of arguments passed to a specific cdecl function call, when
* number of arguments is either unknown or may vary (i.e. like in variadic functions).
* @param {Array<Object>} instrs Array of instructions preceding the function call
* @param {Object} context Context object
* @returns {number} Number of guessed arguments passed in this cdecl function call
*/
var _guess_cdecl_nargs = function(instrs, context) {
var nargs = 0;
// scan preceding instructions backwards, in order to find evidece for passed args
for (var i = (instrs.length - 1); i >= 0; i--) {
var mnem = instrs[i].parsed.mnem;
var opd1 = instrs[i].parsed.opd[0];
// a "push" instruction which is not the function's prologue indicates
// that it is probably a function's argument
if ((mnem === 'push') && !_is_frame_reg(opd1.token)) {
nargs++;
} else if (mnem === 'mov' && ((opd1.mem_access && _is_stack_reg(opd1.token)) || _is_stack_based_local_var(opd1.token, context))) {
nargs++;
} else if ((mnem === 'add') && _is_stack_reg(opd1.token)) {
// reached the previous function call cleanup, stop searching
break;
} else if (mnem === 'call') {
// reached the previous function call, stop searching
break;
}
}
return nargs;
};
/**
* Try to guess the number of arguments passed to a specific amd64 systemv function call,
* when number of arguments is either unknown or may vary (i.e. like in variadic functions).
* @param {Array<Object>} instrs Array of instructions preceding the function call
* @param {Object} context Context object
* @returns {number} Number of guessed arguments passed in this cdecl function call
*/
var _guess_amd64_nargs = function(instrs, context) {
var nargs = 0;
// TODO: implement this
return nargs;
};
/**
* Return a list of the cdecl function call arguments.
* @param {Array<Object>} instrs Array of instructions preceding the function call
* @param {number} nargs Number of arguments expected for this function call
* @param {Object} context Context object
* @returns {Array<Variable>} An array of arguments instances, ordered as declared in callee
*/
var _populate_cdecl_call_args = function(instrs, nargs, context) {
var args = [];
var argidx = 0;
var arg;
var varsname = context.vars.map(function(x) {
return x.name;
});
for (var i = instrs.length - 1; i >= 0 && nargs > 0; i--) {
arg = null;
var mnem = instrs[i].parsed.mnem;
var opd1 = instrs[i].parsed.opd[0];
var opd2 = instrs[i].parsed.opd[1];
if (instrs[i].jump || mnem == 'call') {
break;
}
// passing argument by referring to stack pointer directly rather than pushing
if (mnem === 'mov') {
// normally arguments will be passed in the order they are defined at the callee declaration. however
// it is not guaranteed, so we will need the stack offset that is used to determine which argument
// is being set; for example, "mov [esp + 12], val" indicates that the 3rd argument is being set
var offset, idx;
// opd1.token may be set to a variable name, and therefore mask the stack pointer dereference. for that
// reason we also check whether it appears as a stack variable, to extract its offset from stack pointer.
// [another option would be undefining that variable manually using the "afvs-" r2 command]
// check whether this is a plain stack pointer dereference, or a stack pointer dereference masekd by a
// variable name. if the former, extract the offset manually; if the latter, use r2 data to retreive
// that value.
idx = varsname.indexOf(opd1.token);
if (idx >= 0) {
offset = nargs;
} else if (opd1.mem_access && _is_stack_reg(opd1.token)) {
var deref = opd1.token.match(/[er]?[bs]p(?:\s+\+\s+(\d+))/);
offset = deref ? (parseInt(deref[1]) / (Global.evars.archbits / 8)) : 0;
} else if (_is_stack_based_local_var(opd1.token, context)) {
offset = Math.abs(_get_var_offset(opd1.token, context)) / (Global.evars.archbits / 8);
} else {
// an irrelevant 'mov' instruction; nothing to do here
continue;
}
arg = instrs[i].string ?
Variable.string(instrs[i].string) :
Variable[opd2.mem_access ? 'pointer' : 'local'](opd2.token, Extra.to.type(opd2.mem_access, false));
instrs[i].valid = false;
args[offset] = arg;
nargs--;
} else if (mnem === 'push') {
// passing argument by pushing them to stack
if (instrs[i - 1] &&
['lea'].indexOf(instrs[i - 1].parsed.mnem) >= 0 &&
opd1.token == instrs[i - 1].parsed.opd[0].token) {
instrs[i - 1].valid = false;
opd2 = instrs[i - 1].parsed.opd[1];
arg = instrs[i - 1].string ?
Variable.string(instrs[i - 1].string) :
Variable[opd2.mem_access ? 'pointer' : 'local'](opd2.token, Extra.to.type(opd2.mem_access, false));
} else {
arg = instrs[i].string ?
Variable.string(instrs[i].string) :
Variable[opd1.mem_access ? 'pointer' : 'local'](opd1.token, Extra.to.type(opd1.mem_access, false));
}
instrs[i].valid = false;
args[argidx++] = arg;
nargs--;
}
}
return args.filter(function(x) {
return !!x;
});
};
/**
* Return a list of the amd64 systemv function call arguments.
* @param {Array<Object>} instrs Array of instructions preceding the function call
* @param {number} nargs Number of arguments expected for this function call
* @param {Object} context Context object (not used)
* @returns {Array<Variable>} An array of arguments instances, ordered as declared in callee
*/
var _populate_amd64_call_args = function(instrs, nargs, context) {
var _regs64 = [ /**/ 'rdi', /**/ 'rsi', /**/ 'rdx', /* */ 'rcx', /* */ 'r8', /* */ 'r9'];
var _regs32 = [ /**/ 'edi', /**/ 'esi', /**/ 'edx', /* */ 'ecx', /**/ 'r8d', /**/ 'r9d'];
var _krnl64 = [ /* */ , /* */ , /* */ , /* */ 'r10', /* */ , /* */ ]; // kernel interface uses r10 instead of rcx
var _krnl32 = [ /* */ , /* */ , /* */ , /**/ 'r10d', /* */ , /* */ ];
var amd64 = Array.prototype.concat(_regs64, _regs32, _krnl64, _krnl32);
// arguments are set to default values which will be used in case we cannot find any reference to them
// in the preceding assembly code. for example, the caller passes its first argument ('rdi') as the first
// argument to the callee; in such case we won't find its initialization instruction, so we'll just use 'rdi'.
var args = _regs64.slice(0, nargs);
var seen_regs = []; // regs can be used only once.
// scan the preceding instructions to find where args registers are used, to take their values
for (var i = (instrs.length - 1);
(i >= 0) && (nargs > 0); i--) {
if (instrs[i].jump || instrs[i].parsed.mnem == 'call') {
break;
}
var opd1 = instrs[i].parsed.opd[0];
var opd2 = instrs[i].parsed.opd[1];
// look for an instruction that has two arguments. we assume that such an instruction would use
// its second operand to set the value of the first. although this is not an accurate observation,
// it could be used to replace the argument with its value on the arguments list
if (opd2.token && ['mov', 'xor', 'lea'].indexOf(instrs[i].parsed.mnem) >= 0) {
var argidx = amd64.indexOf(opd1.token) % _regs64.length;
var argvalue = opd2.token;
var argsize = opd2.mem_access;
var notseen = seen_regs.indexOf(opd2.token) < 0;
if (notseen) {
seen_regs.push(opd2.token);
if (instrs[i].parsed.mnem == 'xor' && opd1.token == opd2.token) {
argvalue = '0';
}
} else {
argvalue = opd1.token;
argsize = opd1.mem_access;
}
// is destination operand an amd64 systemv argument which has not been considered yet?
if ((argidx > (-1)) && (typeof args[argidx] === 'string')) {
// take the second operand value, that is likely to be used as the first operand's
// initialization value.
var arg = instrs[i].string ?
Variable.string(instrs[i].string) :
Variable[opd2.mem_access ? 'pointer' : 'local'](argvalue, Extra.to.type(argsize, false));
instrs[i].valid = !notseen;
args[argidx] = arg;
nargs--;
}
}
}
return args.filter(function(x) {
return !!x;
});
};
var _call_function = function(instr, context, instrs, is_pointer) {
var start = instrs.indexOf(instr);
// indicates the function call return type (if used)
var returnval = undefined;
var tailcall = false;
// is this a tail call?
if (_is_last_instruction(instr, instrs)) {
tailcall = true;
} else {
// scan the instructions down the road to see whether the function's call return
// value is used or ignored. if it used, use that information to infer the return type
// TODO: to do this properly, we need to follow possible branches rather than scan sequentially
for (var i = (start + 1); i < instrs.length; i++) {
var mnem = instrs[i].parsed.mnem;
var dst = instrs[i].parsed.opd[0].token;
var src = instrs[i].parsed.opd[1].token;
// determiming whether an instruction reads or writes a gpr is not trivial at this point.
// assuming that the lhand (first) operator is always overwritten and the rhand (second)
// operator is always read, is far from being accurate as many instructions may read the
// lhand operand before overwriting it (i.e. when updating the first operand, but not only).
//
// the following code tries to work around this, quite poorly though, by listing all
// instructions that read both first and second operands
var insn_uses_dst_as_src = (mnem.match(/pop|lea|c?mov\w*|set\w+/) == null);
if (src in _return_regs_bits) {
returnval = src;
_has_changed_return(src, false, context);
break;
} else if (insn_uses_dst_as_src && (dst in _return_regs_bits)) {
returnval = dst;
_has_changed_return(dst, false, context);
break;
} else if (dst in _return_regs_bits) {
// register used to store returned value is overwritten
break;
} else if (mnem.match(/\b(call|i?div|i?mul|lods[bwdq]?|in(?:s[bwd]?)?)\b/)) {
// register used to store returned value is clobbered
break;
}
}
}
var callsite = instr.parsed.opd[0];
var callname = instr.symbol || callsite.token;
var nargs, args = [];
var callee = instr.callee;
if (callee) {
var guess_nargs = {
'cdecl': _guess_cdecl_nargs,
'amd64': _guess_amd64_nargs
}[callee.calltype];
var populate_call_args = {
'cdecl': _populate_cdecl_call_args,
'amd64': _populate_amd64_call_args
}[callee.calltype];
// every non-import callee has a known number of arguments
// for imported libc functions, get the number of arguments out of a predefined list
nargs = callee.name.startsWith('sym.') ?
Extra.find.arguments_number(callee.name) :
callee.nargs;
// if number of arguments is unknown (either an unrecognized or a variadic function),
// try to guess the number of arguments
if (nargs == (-1)) {
nargs = guess_nargs(instrs.slice(0, start), context);
}
args = populate_call_args(instrs.slice(0, start), nargs, context);
} else {
// trying to identify the fcn..
nargs = callname.startsWith('sym.') ?
Extra.find.arguments_number(callname) : -1;
// if number of arguments is unknown (either an unrecognized or a variadic function),
// try to guess the number of arguments
if (nargs == (-1)) {
nargs = _guess_cdecl_nargs(instrs.slice(0, start), context);
callee = _populate_cdecl_call_args;
}
if (nargs == (-1)) {
nargs = _guess_amd64_nargs(instrs.slice(0, start), context);
callee = _populate_amd64_call_args;
}
if (callee) {
args = callee(instrs.slice(0, start), nargs, context);
}
}
if (callname.startsWith('0x')) {
callname = Variable.functionPointer(callname, callsite.mem_access, args);
} else if (is_pointer || (!callsite.mem_access && _x86_x64_registers.indexOf(callname) > (-1))) {
callname = Variable.functionPointer(callname, 0, args);
}
var call = Base.call(callname, args);
if (tailcall) {
// ControlFlow does not interpret well the specific case of a tail jmp through
// a register. in this case, we will need to emit an explicit return statement
if (_x86_x64_registers.indexOf(callsite.token) > (-1)) {
return Base.return(call);
}
return call;
}
// if return value is used, assign it. otherwise just emit the call
if (returnval) {
return Base.assign(returnval, call);
} else {
return call;
}
};
var _standard_mov = function(instr, context, instructions) {
var dst = instr.parsed.opd[0];
var src = instr.parsed.opd[1];
var prev = instructions[instructions.indexOf(instr) - 1];
_has_changed_return(dst.token, context.returns.signed, context);
if (dst.mem_access) {
return Base.write_memory(dst.token, instr.string ? Variable.string(instr.string) : src.token, dst.mem_access, true);
} else if (src.mem_access) {
return Base.read_memory(src.token, dst.token, src.mem_access, true);
} else if (_is_stack_reg(dst.token) || _is_frame_reg(dst.token)) {
return null;
} else {
if (prev && prev.parsed.mnem == instr.parsed.mnem &&
prev.parsed.opd[0].token == src.token &&
!prev.parsed.opd[0].mem_access && !src.mem_access) {
src = instr.parsed.opd[1] = prev.parsed.opd[1];
}
var arg = instr.string ?
Variable.string(instr.string) :
Variable[src.mem_access ? 'pointer' : 'local'](src.token, src.mem_access, false);
return Base.assign(dst.token, arg);
}
};
/**
* Hanldes assignments that require size extension.
* @param {object} p Parsed instruction structure
* @param {boolean} signed Signed operation
* @param {object} context Context structure
*/
var _extended_mov = function(p, signed, context) {
var dst = p.opd[0];
var src = p.opd[1];
_has_changed_return(dst.token, signed, context);
if (src.mem_access) {
return Base.read_memory(src.token, dst.token, src.mem_access, signed);
} else {
return Base.cast(dst.token, src.token, Extra.to.type(_find_bits(dst.token), true));
}
};
var _string_common = function(instr, context) {
// possible instructions:
// o lods : lhand = rhand; rhand += osize;
// o stos : lhand = rhand; lhand += osize;
// o movs : lhand = rhand; rhand += osize; lhand += osize;
// o cmps : $zf = cmp(lhand, rhand); rhand += osize; lhand += osize;
// o scas : $zf = cmp(lhand, rhand); rhand += osize;
var p = instr.parsed;
var lhand = p.opd[0];
var rhand = p.opd[1];
// scasd eax, dword es:[edi]
// cmpsd dword [esi], dword ptr es:[edi]
// lodsd eax, dword [esi]
// stosd dword es:[edi], eax
// movsd dword es:[edi], dword ptr [esi]
var reciept = {
'lods': [lhand, rhand, [rhand]],
'stos': [lhand, rhand, [lhand]],
'movs': [lhand, rhand, [rhand, lhand]],
// 'cmps': [$zf, cmp(lhand, rhand), [rhand, lhand]],
// 'scas': [$zf, cmp(lhand, rhand) ,[rhand]]
}[p.mnem.substr(0, 4)];
// TODO: the direction in which the source and destination pointers are going depedns on the value of the direction flag.
// normally the direction flag is cleared just before a string operation using the "cld" instruction, but this is not necessarily
// the case. however, since we do not keep track of the df value (yet), we have no way to know for sure whether it is set (pointers
// are decreasing) or cleared (pointers are increasing).
//
// tracking the "cld" and "std" instruction may not be sufficient since the flags register might be modified in various ways, e.g. by
// combinig a "pushf" and a "popf" instructions with some bitwise manipulation in between. until this is taken care of, we may just
// assume that the direction flag is cleared.
var dflag = 0;
var incdec = dflag ?
Base.decrease :
Base.increase;
var counter = {
16: 'cx',
32: 'ecx',
64: 'rcx'
}[Global.evars.archbits];
// possible prefixes:
// o rep
// o repe / repz
// o repne / repnz
// TODO: e|z and ne|nz suffixes are relevant only for "scas" and "cmps", which are currently not supported
var loop = p.pref && p.pref.match(/(rep)(n)?([ze])?/);
if (loop) {
instr.conditional(counter, '0', 'NE');
instr.jump = instr.location;
}
var dst = reciept[0];
var src = reciept[1];
var inc = reciept[2];
var ops = [];
// assignment
var assign_dst = Variable[dst.mem_access ? 'pointer' : 'local'](dst.token, dst.mem_access, false);
var assign_src = Variable[src.mem_access ? 'pointer' : 'local'](src.token, src.mem_access, false);
ops.push(Base.assign(assign_dst, assign_src));
// loop counter decrement
if (loop) {
ops.push(Base.decrease(counter, 1));
}
// source and destination pointers increment / decrement
ops = ops.concat(inc.map(function(r) {
return incdec(r.token, (dst.mem_access || src.mem_access) / 8);
}));
// TODO: if (loop[3]) add a condition that tests $zf and break if (loop[2] ? clear : set)
return Base.composed(ops);
};
var _get_reg_value_from_map = function(expected, map) {
var regs_def = null;
if (map[expected]) {
return map[expected];
}
for (var i = 0; i < _x86_x64_registers.length; i++) {
if (_x86_x64_registers[i].indexOf(expected) >= 0) {
regs_def = _x86_x64_registers[i];
break;
}
}
if (!regs_def) {
return null;
}
for (i = 0; i < regs_def.length; i++) {
var data = map[regs_def[i]];
if (data) {
return data;
}
}
return null;
};
var _syscall_common = function(instr, instructions, sysinfo, regs) {
if (!sysinfo) {
return null;
}
if (sysinfo.comment) {
instr.comments.push(sysinfo.comment);
}
if (!sysinfo.table) {
return null;
}
var reglist = {};
var pos = instructions.indexOf(instr);
var end = pos - regs.length;
// push pop variable to save the register.
var push_instr = null;
var dst, src;
for (var i = pos - 1; i >= end; i--) {
var prev = instructions[i] || {};
if (!prev || !prev.parsed) {
continue;
}
if (prev.parsed.mnem == 'int' || prev.parsed.mnem == 'syscall') {
break;
}
if (prev.parsed.mnem == 'pop' && end > 0) {
end--;
push_instr = prev;
}
if (prev.parsed.mnem != 'mov' && prev.parsed.mnem != 'push') {
continue;
}
if (prev.parsed.mnem == 'push') {
if (!push_instr) {
break;
}
dst = push_instr.parsed.opd[0];
push_instr = null;
src = prev.parsed.opd[0];
} else {
dst = prev.parsed.opd[0];
src = prev.parsed.opd[1];
}
if (prev.string) {