From cd692923282561219fc4cff994a1bd3fdde4cb6d Mon Sep 17 00:00:00 2001 From: Windel Bouwman Date: Mon, 24 Jun 2019 22:29:33 +0200 Subject: [PATCH] Change C string literal type to array of char. --- .hgignore | 11 ++ docs/development.rst | 11 ++ examples/c/hello/std.c | 2 +- examples/c/hello/std.h | 2 +- examples/python/python_snake.py | 45 ++--- librt/libc/assert.h | 3 +- ppci/lang/c/codegenerator.py | 286 ++++++++++++++++-------------- ppci/lang/c/context.py | 62 +------ ppci/lang/c/eval.py | 109 ++++++++++++ ppci/lang/c/nodes/declarations.py | 9 +- ppci/lang/c/nodes/expressions.py | 26 ++- ppci/lang/c/nodes/types.py | 5 + ppci/lang/c/nodes/visitor.py | 9 +- ppci/lang/c/parser.py | 2 + ppci/lang/c/scope.py | 4 +- ppci/lang/c/semantics.py | 24 ++- ppci/lang/c/synthesize.py | 54 ++++-- ppci/lang/python/ir2py.py | 2 + ppci/wasm/arch.py | 3 +- ppci/wasm/ppci2wasm.py | 26 ++- readme.rst | 1 + test/lang/c/test_c.py | 14 ++ tools/compile_lcc.py | 6 +- tools/ppci_explorer.py | 20 ++- 24 files changed, 469 insertions(+), 267 deletions(-) create mode 100644 ppci/lang/c/eval.py diff --git a/.hgignore b/.hgignore index 76ca4390..cb6f1321 100644 --- a/.hgignore +++ b/.hgignore @@ -22,7 +22,16 @@ docs/_build docs/examples.zip examples/linux64/hello/hello examples/linux64/snake/snake +examples/linux64/fib/main +examples/linux64/algos/main +examples/linux64/wasm_fac/wasm_fact +examples/m68k/demo +examples/microblaze/hello/trace.txt +examples/python/generated_python_structs.py +examples/python/python_snake2.py +examples/riscvpicorv32/picorv examples/toydsl/dslenv +examples/toydsl/example .pytest_cache/ test/.coverage test/listings @@ -31,6 +40,8 @@ test/FORTRAN .mypy_cache *report.html tools/report*.html +tools/ppci_explorer_source.txt +tools/arch_info.html .cache .coverage htmlcov diff --git a/docs/development.rst b/docs/development.rst index 3dfdcd8d..8ab518aa 100644 --- a/docs/development.rst +++ b/docs/development.rst @@ -108,6 +108,17 @@ pyprof2calltree. $ pip install pyprof2calltree $ pyprof2calltree -i profiled.out -k +Debugging tests +~~~~~~~~~~~~~~~ + +To debug test cases, a handy trick is to use pudb (when not using fancy ide +like vscode or pycharm). To do this, specify the debugger to use with pytest +like this: + +.. code:: bash + + $ pytest -v --pdb --pdbcls pudb.debugger:Debugger --capture=no + Debugging dynamic code ~~~~~~~~~~~~~~~~~~~~~~ diff --git a/examples/c/hello/std.c b/examples/c/hello/std.c index c16c2c82..5bf6a214 100644 --- a/examples/c/hello/std.c +++ b/examples/c/hello/std.c @@ -1,6 +1,6 @@ #include "std.h" -void printf() +void printf(char* x) { } diff --git a/examples/c/hello/std.h b/examples/c/hello/std.h index 063edc65..db516870 100644 --- a/examples/c/hello/std.h +++ b/examples/c/hello/std.h @@ -1,2 +1,2 @@ -void printf(); +void printf(char*); diff --git a/examples/python/python_snake.py b/examples/python/python_snake.py index 5b0c8201..77300703 100644 --- a/examples/python/python_snake.py +++ b/examples/python/python_snake.py @@ -1,35 +1,40 @@ import io -from ppci.api import ir_to_python, c3toir, get_arch +from ppci.api import ir_to_python, c3_to_ir, get_arch def run_it(): - arch = get_arch('example') - bsp = io.StringIO(""" + arch = get_arch("example") + bsp = io.StringIO( + """ module bsp; public function void sleep(int ms); public function void putc(byte c); public function bool get_key(int* key); - """) + """ + ) - ircode = c3toir( - ['../src/snake/game.c3', '../src/snake/main.c3', '../../librt/io.c3'], - [bsp], arch) + ircode = c3_to_ir( + ["../src/snake/game.c3", "../src/snake/main.c3", "../../librt/io.c3"], + [bsp], + arch, + ) - with open('python_snake2.py', 'w') as f: - print('import time', file=f) - print('import sys', file=f) - print('import threading', file=f) - ir_to_python(ircode, f) + with open("python_snake2.py", "w") as f: + print("import time", file=f) + print("import sys", file=f) + print("import threading", file=f) + ir_to_python([ircode], f) - print('', file=f) - print('def bsp_putc(c):', file=f) + print("", file=f) + print("def bsp_putc(c):", file=f) print(' print(chr(c), end="")', file=f) - print('def bsp_get_key(x):', file=f) - print(' return 0', file=f) - print('def bsp_sleep(x):', file=f) - print(' time.sleep(x*0.001)', file=f) - print('main_main()', file=f) + print("def bsp_get_key(x):", file=f) + print(" return 0", file=f) + print("def bsp_sleep(x):", file=f) + print(" time.sleep(x*0.001)", file=f) + print("main_main()", file=f) + + print("Now run python_snake2.py !") - print('Now run python_snake2.py !') run_it() diff --git a/librt/libc/assert.h b/librt/libc/assert.h index 1e185bc9..d20bea37 100644 --- a/librt/libc/assert.h +++ b/librt/libc/assert.h @@ -1,6 +1,7 @@ #ifndef ASSERT_H #define ASSERT_H -#define assert(x) +// do nothing for now.. +#define assert(x) (0) #endif diff --git a/ppci/lang/c/codegenerator.py b/ppci/lang/c/codegenerator.py index 40a0dbc3..f9cba2a2 100644 --- a/ppci/lang/c/codegenerator.py +++ b/ppci/lang/c/codegenerator.py @@ -15,6 +15,7 @@ from .nodes.types import BasicType from .scope import RootScope from ...utils.bitfun import value_to_bits, bits_to_bytes +from .eval import ConstantExpressionEvaluator class CCodeGenerator: @@ -34,6 +35,7 @@ def __init__(self, context): self.continue_block_stack = [] # A stack of for loops self.labeled_blocks = {} self.switch_options = None + self.unreachable = False self.static_counter = 0 # Unique number to make static vars unique int_types = {2: ir.i16, 4: ir.i32, 8: ir.i64} uint_types = {2: ir.i16, 4: ir.u32, 8: ir.u64} @@ -54,6 +56,7 @@ def __init__(self, context): BasicType.DOUBLE: (ir.f64, 8), BasicType.LONGDOUBLE: (ir.f64, 8), # TODO: is this correct? } + self._constant_evaluator = LinkTimeExpressionEvaluator(self) def get_label_block(self, name): """ Get the ir block for a given label, and create it if necessary """ @@ -125,7 +128,7 @@ def error(self, message, location): def gen_global_variable(self, var_decl): """ Generate code for a global variable """ assert isinstance(var_decl, declarations.VariableDeclaration) - if var_decl.storage_class == "extern": + if var_decl.storage_class == "extern" and not var_decl.is_definition(): # create an external variable: ir_var = ir.ExternalVariable(var_decl.name) self.builder.module.add_external(ir_var) @@ -173,26 +176,7 @@ def gen_global_ival(self, typ, ival): def gen_global_initialize_expression(self, typ, expr): """ Generate memory slab for global expression. """ - # First check for string literals and variable / function references - if isinstance(expr, expressions.VariableAccess): - declaration = expr.variable.declaration - if isinstance( - declaration, - ( - declarations.VariableDeclaration, - declarations.FunctionDeclaration, - ), - ): - # emit reference to global symbol - cval = (ir.ptr, declaration.name) - else: - cval = self.context.eval_expr(expr) - elif isinstance(expr, expressions.StringLiteral): - # Now emit new global variable, and return pointer to it. - text_var = self.gen_global_string_constant(expr) - cval = (ir.ptr, text_var.name) - else: - cval = self.context.eval_expr(expr) + cval = self._constant_evaluator.eval_expr(expr) if isinstance(cval, tuple): assert cval[0] is ir.ptr and len(cval) == 2 @@ -952,34 +936,11 @@ def gen_expr(self, expr, rvalue=False): elif isinstance(expr, expressions.TernaryOperator): value = self.gen_ternop(expr) elif isinstance(expr, expressions.VariableAccess): - declaration = expr.variable.declaration - if isinstance( - declaration, - ( - declarations.VariableDeclaration, - declarations.ParameterDeclaration, - declarations.ConstantDeclaration, - declarations.FunctionDeclaration, - ), - ): - value = self.ir_var_map[declaration] - elif isinstance(declaration, declarations.EnumConstantDeclaration): - # Enum value declaration! - constant_value = self.context.get_enum_value( - declaration.typ, declaration - ) - ir_typ = self.get_ir_type(expr.typ) - value = self.emit( - ir.Const(constant_value, declaration.name, ir_typ) - ) - else: # pragma: no cover - raise NotImplementedError(str(declaration)) + value = self.gen_variable_access(expr) elif isinstance(expr, expressions.FunctionCall): value = self.gen_call(expr) elif isinstance(expr, expressions.StringLiteral): - # Construct nifty 0-terminated string into memory! - encoding = "latin1" - data = expr.value[1:-1].encode(encoding) + bytes([0]) + data = expr.to_bytes() value = self.emit(ir.LiteralData(data, "cstr")) value = self.emit(ir.AddressOf(value, "dptr")) elif isinstance(expr, expressions.CharLiteral): @@ -1000,36 +961,11 @@ def gen_expr(self, expr, rvalue=False): elif isinstance(expr, expressions.InitializerList): self.error("Illegal initializer list", expr.location) elif isinstance(expr, expressions.Cast): - a = self.gen_expr(expr.expr, rvalue=True) - if expr.to_typ.is_void: - value = None - else: - ir_typ = self.get_ir_type(expr.to_typ) - value = self.emit( - ir.Cast(a, "typecast", ir_typ), location=expr.location - ) + value = self.gen_cast(expr) elif isinstance(expr, expressions.Sizeof): value = self.gen_sizeof(expr) elif isinstance(expr, expressions.FieldSelect): - base = self.gen_expr(expr.base, rvalue=False) - field_offsets = self.context.get_field_offsets(expr.base.typ)[1] - offset = field_offsets[expr.field] - if expr.field.is_bitfield: - offset, bitshift = offset // 8, offset % 8 - offset = self.emit(ir.Const(offset, "offset", ir.ptr)) - value = self.emit( - ir.Binop(base, "+", offset, "offset", ir.ptr) - ) - bitsize = self.context.eval_expr(expr.field.bitsize) - signed = expr.field.typ.is_signed - value = BitFieldAccess(value, bitshift, bitsize, signed) - else: - assert offset % 8 == 0 - offset //= 8 - offset = self.emit(ir.Const(offset, "offset", ir.ptr)) - value = self.emit( - ir.Binop(base, "+", offset, "offset", ir.ptr) - ) + value = self.gen_field_select(expr) elif isinstance(expr, expressions.ArrayIndex): value = self.gen_array_index(expr) elif isinstance(expr, expressions.BuiltIn): @@ -1043,8 +979,6 @@ def gen_expr(self, expr, rvalue=False): # If we need an rvalue, load it! if rvalue and expr.lvalue: - # Array handling is a special case! - # when accessed, arrays turn into pointers to its first element. value = self._load_value(value, expr.typ) elif not rvalue: @@ -1053,8 +987,8 @@ def gen_expr(self, expr, rvalue=False): def _load_value(self, lvalue, ctyp): """ Load a value from given l-value """ - if isinstance(ctyp, types.ArrayType): - self.logger.debug("Array type accessed %s", ctyp) + + if ctyp.is_array: value = lvalue else: # TODO: inject evil bitfield manipulation code here: @@ -1198,29 +1132,10 @@ def gen_unop(self, expr: expressions.UnaryOperator): """ Generate code for unary operator """ if expr.op in ["x++", "x--", "--x", "++x"]: # Increment and decrement in pre and post form - ir_a = self.gen_expr(expr.a, rvalue=False) - assert expr.a.lvalue - - ir_typ = self.get_ir_type(expr.typ) - loaded = self._load_value(ir_a, expr.typ) - # for pointers, this is not one, but sizeof - if isinstance(expr.typ, types.PointerType): - size = self.context.sizeof(expr.typ.element_type) - one = self.emit(ir.Const(size, "one_element", ir_typ)) - else: - one = self.emit(ir.Const(1, "one", ir_typ)) - # Determine increment or decrement: op = expr.op[1] - changed = self.emit(ir.Binop(loaded, op, one, "inc", ir_typ)) - self._store_value(changed, ir_a) - - # Determine pre or post form: pre = expr.op[0] == "x" - if pre: - value = loaded - else: - value = changed + value = self.gen_inplace_mutation(expr, op, pre) elif expr.op == "*": value = self.gen_expr(expr.a, rvalue=True) assert expr.lvalue @@ -1237,6 +1152,27 @@ def gen_unop(self, expr: expressions.UnaryOperator): raise NotImplementedError(str(expr.op)) return value + def gen_inplace_mutation(self, expr, op, pre): + """ Generate code for x++ or --y. """ + ir_a = self.gen_expr(expr.a, rvalue=False) + assert expr.a.lvalue + + ir_typ = self.get_ir_type(expr.typ) + loaded = self._load_value(ir_a, expr.typ) + # for pointers, this is not one, but sizeof + if isinstance(expr.typ, types.PointerType): + size = self.context.sizeof(expr.typ.element_type) + one = self.emit(ir.Const(size, "one_element", ir_typ)) + else: + one = self.emit(ir.Const(1, "one", ir_typ)) + + changed = self.emit(ir.Binop(loaded, op, one, "inc", ir_typ)) + self._store_value(changed, ir_a) + + # Determine pre or post form: + value = loaded if pre else changed + return value + def gen_binop(self, expr: expressions.BinaryOperator): """ Generate code for binary operation expression """ if expr.op in ["-", "*", "/", "%", "^", "|", "&", ">>", "<<"]: @@ -1366,6 +1302,32 @@ def gen_ternop(self, expr: expressions.TernaryOperator): raise NotImplementedError(str(expr.op)) return value + def gen_variable_access(self, expr: expressions.VariableAccess): + """ Generate code for accessing a variable. """ + declaration = expr.variable.declaration + if isinstance( + declaration, + ( + declarations.VariableDeclaration, + declarations.ParameterDeclaration, + declarations.ConstantDeclaration, + declarations.FunctionDeclaration, + ), + ): + value = self.ir_var_map[declaration] + elif isinstance(declaration, declarations.EnumConstantDeclaration): + # Enum value declaration! + constant_value = self.context.get_enum_value( + declaration.typ, declaration + ) + ir_typ = self.get_ir_type(expr.typ) + value = self.emit( + ir.Const(constant_value, declaration.name, ir_typ) + ) + else: # pragma: no cover + raise NotImplementedError(str(declaration)) + return value + def gen_call(self, expr: expressions.FunctionCall): """ Generate code for a function call """ if isinstance(expr.callee.typ, types.FunctionType): @@ -1373,13 +1335,46 @@ def gen_call(self, expr: expressions.FunctionCall): else: ftyp = expr.callee.typ.element_type + ir_arguments, rval_alloc = self.prepare_arguments(ftyp, expr.args) + + # Get function pointer or label: + if isinstance(expr.callee.typ, types.FunctionType): + # Normal call, get global value: + # print(expr.callee, expr.location) + # if isinstance(expr.callee + # ir_function = self.gen_expr(expr.callee, rvalue=False) + ir_function = self.ir_var_map[expr.callee.variable.declaration] + elif isinstance(expr.callee.typ, types.PointerType) and isinstance( + expr.callee.typ.element_type, types.FunctionType + ): + ir_function = self.gen_expr(expr.callee, rvalue=True) + else: # pragma: no cover + raise NotImplementedError() + + # Use function or procedure call depending on return type: + if ftyp.return_type.is_void: + self.emit(ir.ProcedureCall(ir_function, ir_arguments)) + value = None + elif ftyp.return_type.is_struct: + self.emit(ir.ProcedureCall(ir_function, ir_arguments)) + value = rval_alloc + else: + ir_typ = self.get_ir_type(expr.typ) + value = self.emit( + ir.FunctionCall(ir_function, ir_arguments, "result", ir_typ) + ) + + return value + + def prepare_arguments(self, ftyp, args): + """ Generate code to evaluate arguments. """ # Determine fixed and variable arguments: if ftyp.is_vararg: - x = len(ftyp.arguments) - fixed_args = expr.args[:x] - var_args = expr.args[x:] + fixed_amount = len(ftyp.arguments) + fixed_args = args[:fixed_amount] + var_args = args[fixed_amount:] else: - fixed_args = expr.args + fixed_args = args var_args = [] # Evaluate arguments: @@ -1392,6 +1387,8 @@ def gen_call(self, expr: expressions.FunctionCall): rval_alloc = self.emit(ir.Alloc("rval_alloc", size, alignment)) rval_ptr = self.emit(ir.AddressOf(rval_alloc, "rval_ptr")) ir_arguments.append(rval_ptr) + else: + rval_alloc = None # Place other arguments: for argument in fixed_args: @@ -1405,33 +1402,7 @@ def gen_call(self, expr: expressions.FunctionCall): else: assert not var_args - # Get function pointer or label: - if isinstance(expr.callee.typ, types.FunctionType): - # Normal call, get global value: - ir_function = self.ir_var_map[ - expr.callee.variable.declaration - ] - elif isinstance(expr.callee.typ, types.PointerType) and isinstance( - expr.callee.typ.element_type, types.FunctionType - ): - ir_function = self.gen_expr(expr.callee, rvalue=True) - else: # pragma: no cover - raise NotImplementedError() - - # Use function or procedure call depending on return type: - if ftyp.return_type.is_void: - self.emit(ir.ProcedureCall(ir_function, ir_arguments)) - value = None - elif ftyp.return_type.is_struct: - self.emit(ir.ProcedureCall(ir_function, ir_arguments)) - value = rval_alloc - else: - ir_typ = self.get_ir_type(expr.typ) - value = self.emit( - ir.FunctionCall(ir_function, ir_arguments, "result", ir_typ) - ) - - return value + return ir_arguments, rval_alloc def gen_fill_varargs(self, var_args): """ Generate code to fill variable arguments. @@ -1458,7 +1429,7 @@ def gen_fill_varargs(self, var_args): for argument in var_args: value = self.gen_expr(argument, rvalue=True) va_size = self.context.sizeof(argument.typ) - va_alignment = self.context.alignment(va.typ) + va_alignment = self.context.alignment(argument.typ) # handle alignment: padding = required_padding(offset, va_alignment) @@ -1494,6 +1465,25 @@ def gen_compound_literal(self, expr): self.gen_local_init(ir_addr, expr.typ, expr.init) return ir_addr + def gen_field_select(self, expr: expressions.FieldSelect): + """ Generate code for field select operation. """ + base = self.gen_expr(expr.base, rvalue=False) + field_offsets = self.context.get_field_offsets(expr.base.typ)[1] + offset = field_offsets[expr.field] + if expr.field.is_bitfield: + offset, bitshift = offset // 8, offset % 8 + offset = self.emit(ir.Const(offset, "offset", ir.ptr)) + value = self.emit(ir.Binop(base, "+", offset, "offset", ir.ptr)) + bitsize = self.context.eval_expr(expr.field.bitsize) + signed = expr.field.typ.is_signed + value = BitFieldAccess(value, bitshift, bitsize, signed) + else: + assert offset % 8 == 0 + offset //= 8 + offset = self.emit(ir.Const(offset, "offset", ir.ptr)) + value = self.emit(ir.Binop(base, "+", offset, "offset", ir.ptr)) + return value + def gen_array_index(self, expr: expressions.ArrayIndex): """ Generate code for array indexing """ # Load base as an rvalue, to make sure we load pointers values. @@ -1579,6 +1569,26 @@ def gen_offsetof(self, expr: expressions.BuiltInOffsetOf): value = self.emit(ir.Const(offset, "offset", ir_typ)) return value + def gen_cast(self, expr: expressions.Cast): + """ Generate code for casting operation. """ + if expr.is_array_decay(): + # Pointer decay! + # assert expr.expr.lvalue + value = self.gen_expr(expr.expr, rvalue=True) + # if expr.expr.lvalue: + # value = self.emit(ir.AddressOf(value, "decay_ptr")) + else: + value = self.gen_expr(expr.expr, rvalue=True) + + if expr.to_typ.is_void: + value = None + else: + ir_typ = self.get_ir_type(expr.to_typ) + value = self.emit( + ir.Cast(value, "typecast", ir_typ), location=expr.location + ) + return value + def gen_sizeof(self, expr: expressions.Sizeof): """ Generate code for sizeof construction """ if isinstance(expr.sizeof_typ, types.CType): @@ -1678,3 +1688,21 @@ def __init__(self, address, bitshift, bitsize, signed): self.bitshift = bitshift self.bitsize = bitsize self.signed = signed + + +class LinkTimeExpressionEvaluator(ConstantExpressionEvaluator): + """ Special purpose evaluator for link time constant expressions. """ + + def __init__(self, codegenerator): + super().__init__(codegenerator.context) + self.codegenerator = codegenerator + + def eval_global_access(self, declaration): + # emit reference to global symbol + cval = (ir.ptr, declaration.name) + return cval + + def eval_string_literal(self, expr: expressions.StringLiteral): + text_var = self.codegenerator.gen_global_string_constant(expr) + cval = (ir.ptr, text_var.name) + return cval diff --git a/ppci/lang/c/context.py b/ppci/lang/c/context.py index bfca3abe..39441747 100644 --- a/ppci/lang/c/context.py +++ b/ppci/lang/c/context.py @@ -9,8 +9,9 @@ from ...arch.arch_info import Endianness from ... import ir from .nodes.types import BasicType -from .nodes import types, expressions, declarations +from .nodes import types, expressions from .utils import required_padding +from .eval import ConstantExpressionEvaluator class CContext: @@ -21,6 +22,7 @@ class CContext: def __init__(self, coptions, arch_info): self.coptions = coptions self.arch_info = arch_info + self._expression_evaluator = ConstantExpressionEvaluator(self) self._field_offsets = {} self._enum_values = {} @@ -256,60 +258,4 @@ def warning(self, message, location, hints=None): def eval_expr(self, expr): """ Evaluate an expression right now! (=at compile time) """ - if isinstance(expr, expressions.BinaryOperator): - lhs = self.eval_expr(expr.a) - rhs = self.eval_expr(expr.b) - op = expr.op - - op_map = { - "+": lambda x, y: x + y, - "-": lambda x, y: x - y, - "*": lambda x, y: x * y, - } - - # Ensure division is integer division: - if expr.typ.is_integer: - op_map["/"] = lambda x, y: x // y - op_map[">>"] = lambda x, y: x >> y - op_map["<<"] = lambda x, y: x << y - else: - op_map["/"] = lambda x, y: x / y - - value = op_map[op](lhs, rhs) - elif isinstance(expr, expressions.UnaryOperator): - if expr.op in ["-"]: - a = self.eval_expr(expr.a) - op_map = {"-": lambda x: -x} - value = op_map[expr.op](a) - else: # pragma: no cover - raise NotImplementedError(str(expr)) - elif isinstance(expr, expressions.VariableAccess): - declaration = expr.variable.declaration - if isinstance(declaration, declarations.EnumConstantDeclaration): - value = self.get_enum_value(declaration.typ, declaration) - else: - raise NotImplementedError(str(expr.variable)) - elif isinstance(expr, expressions.NumericLiteral): - value = expr.value - elif isinstance(expr, expressions.CharLiteral): - value = expr.value - elif isinstance(expr, expressions.Cast): - value = self.eval_expr(expr.expr) - - # do some real casting: - if expr.typ.is_integer: - value = int(value) - elif expr.typ.is_float or expr.typ.is_double: - value = float(value) - else: - pass - elif isinstance(expr, expressions.Sizeof): - if isinstance(expr.sizeof_typ, types.CType): - value = self.sizeof(expr.sizeof_typ) - else: - value = self.sizeof(expr.sizeof_typ.typ) - elif isinstance(expr, int): - value = expr - else: # pragma: no cover - raise NotImplementedError(str(expr)) - return value + return self._expression_evaluator.eval_expr(expr) diff --git a/ppci/lang/c/eval.py b/ppci/lang/c/eval.py new file mode 100644 index 00000000..feb16920 --- /dev/null +++ b/ppci/lang/c/eval.py @@ -0,0 +1,109 @@ +from .nodes import types, expressions, declarations + + +class ConstantExpressionEvaluator: + """ Class which is capable of evaluating expressions. """ + + def __init__(self, context): + self.context = context + + def eval_expr(self, expr): + """ Evaluate an expression right now! (=at compile time) """ + if isinstance(expr, expressions.BinaryOperator): + value = self.eval_binop(expr) + elif isinstance(expr, expressions.UnaryOperator): + value = self.eval_unop(expr) + elif isinstance(expr, expressions.VariableAccess): + value = self.eval_variable_access(expr) + elif isinstance(expr, expressions.NumericLiteral): + value = expr.value + elif isinstance(expr, expressions.CharLiteral): + value = expr.value + elif isinstance(expr, expressions.StringLiteral): + value = self.eval_string_literal(expr) + elif isinstance(expr, expressions.Cast): + value = self.eval_cast(expr) + elif isinstance(expr, expressions.Sizeof): + if isinstance(expr.sizeof_typ, types.CType): + value = self.context.sizeof(expr.sizeof_typ) + else: + value = self.context.sizeof(expr.sizeof_typ.typ) + elif isinstance(expr, int): + value = expr + else: # pragma: no cover + raise NotImplementedError(str(expr)) + return value + + def eval_variable_access(self, expr): + """ Evaluate variable access. """ + declaration = expr.variable.declaration + if isinstance(declaration, declarations.EnumConstantDeclaration): + value = self.eval_enum(declaration) + elif isinstance( + declaration, + ( + declarations.VariableDeclaration, + declarations.FunctionDeclaration, + ), + ): + value = self.eval_global_access(declaration) + else: + raise NotImplementedError(str(expr.variable)) + return value + + def eval_enum(self, declaration): + """ Evaluate enum value. """ + value = self.context.get_enum_value(declaration.typ, declaration) + return value + + def eval_global_access(self, declaration): + raise NotImplementedError() + + def eval_string_literal(self, expr): + raise NotImplementedError() + + def eval_cast(self, expr): + """ Evaluate cast expression. """ + value = self.eval_expr(expr.expr) + + # do some real casting: + if expr.typ.is_integer: + value = int(value) + elif expr.typ.is_float or expr.typ.is_double: + value = float(value) + else: + pass + return value + + def eval_unop(self, expr): + """ Evaluate unary operation. """ + if expr.op in ["-"]: + a = self.eval_expr(expr.a) + op_map = {"-": lambda x: -x} + value = op_map[expr.op](a) + else: # pragma: no cover + raise NotImplementedError(str(expr)) + return value + + def eval_binop(self, expr): + """ Evaluate binary operator. """ + lhs = self.eval_expr(expr.a) + rhs = self.eval_expr(expr.b) + op = expr.op + + op_map = { + "+": lambda x, y: x + y, + "-": lambda x, y: x - y, + "*": lambda x, y: x * y, + } + + # Ensure division is integer division: + if expr.typ.is_integer: + op_map["/"] = lambda x, y: x // y + op_map[">>"] = lambda x, y: x >> y + op_map["<<"] = lambda x, y: x << y + else: + op_map["/"] = lambda x, y: x / y + + value = op_map[op](lhs, rhs) + return value diff --git a/ppci/lang/c/nodes/declarations.py b/ppci/lang/c/nodes/declarations.py index 68f2b22e..f27a6109 100644 --- a/ppci/lang/c/nodes/declarations.py +++ b/ppci/lang/c/nodes/declarations.py @@ -28,10 +28,11 @@ def is_definition(self): class StorageClass: """ Sort of enum with all options for storage classes. """ - AUTO = 'auto' - EXTERN = 'extern' - REGISTER = 'register' - STATIC = 'static' + + AUTO = "auto" + EXTERN = "extern" + REGISTER = "register" + STATIC = "static" class Typedef(CDeclaration): diff --git a/ppci/lang/c/nodes/expressions.py b/ppci/lang/c/nodes/expressions.py index 7018e431..42610ba3 100644 --- a/ppci/lang/c/nodes/expressions.py +++ b/ppci/lang/c/nodes/expressions.py @@ -138,6 +138,10 @@ def __init__(self, expr, typ, lvalue, location): def __repr__(self): return "Cast {}".format(self.to_typ) + def is_array_decay(self): + """ Test if this cast is a pointer decay. """ + return self.to_typ.is_pointer and self.expr.typ.is_array + class ImplicitCast(Cast): """ An implicit cast """ @@ -195,10 +199,6 @@ def __repr__(self): class Literal(CExpression): """ Literal value such as 'h' or 1.22 """ - def __init__(self, value, typ, location): - super().__init__(typ, False, location) - self.value = value - def __repr__(self): return "Literal {} <{}>".format(self.value, self.typ) @@ -206,13 +206,21 @@ def __repr__(self): class CharLiteral(Literal): """ A character literal """ + def __init__(self, value, typ, location): + super().__init__(typ, False, location) + self.value = value + def __repr__(self): - return "Char literal {}".format(self.value) + return "Char literal '{}'".format(self.value) class NumericLiteral(Literal): """ A numeric literal """ + def __init__(self, value, typ, location): + super().__init__(typ, False, location) + self.value = value + def __repr__(self): return "Numeric literal {} <{}>".format(self.value, self.typ) @@ -220,13 +228,17 @@ def __repr__(self): class StringLiteral(Literal): """ A string literal """ + def __init__(self, value, typ, location): + super().__init__(typ, True, location) + self.value = value + def __repr__(self): - return "String literal {}".format(self.value) + return 'String literal "{}"'.format(self.value) def to_bytes(self): """ Convert this string literal to zero terminated byte string. """ encoding = "latin1" - data = self.value[1:-1].encode(encoding) + bytes([0]) + data = self.value.encode(encoding) + bytes([0]) return data diff --git a/ppci/lang/c/nodes/types.py b/ppci/lang/c/nodes/types.py index d78c9ee0..94a4a864 100644 --- a/ppci/lang/c/nodes/types.py +++ b/ppci/lang/c/nodes/types.py @@ -70,6 +70,11 @@ def is_void(self): """ See if this type is void """ return is_void(self) + @property + def is_pointer(self): + """ Test if this type is of pointer type. """ + return isinstance(self, PointerType) + @property def is_float(self): """ See if this type is float """ diff --git a/ppci/lang/c/nodes/visitor.py b/ppci/lang/c/nodes/visitor.py index 5656cb68..e582ecb8 100644 --- a/ppci/lang/c/nodes/visitor.py +++ b/ppci/lang/c/nodes/visitor.py @@ -5,6 +5,7 @@ class Visitor: """ Recursively visit all nodes """ def visit(self, node): + """ Recursively visit node's child nodes. """ if isinstance(node, nodes.CompilationUnit): for d in node.declarations: self.visit(d) @@ -30,10 +31,12 @@ def visit(self, node): elif isinstance(node, expressions.BinaryOperator): self.visit(node.a) self.visit(node.b) + self.visit(node.typ) elif isinstance(node, expressions.UnaryOperator): self.visit(node.a) + self.visit(node.typ) elif isinstance(node, expressions.Literal): - pass + self.visit(node.typ) elif isinstance(node, expressions.InitializerList): for element in node.elements: self.visit(element) @@ -45,6 +48,7 @@ def visit(self, node): elif isinstance(node, expressions.ArrayIndex): self.visit(node.base) self.visit(node.index) + self.visit(node.typ) elif isinstance(node, expressions.FieldSelect): self.visit(node.base) elif isinstance(node, expressions.FunctionCall): @@ -136,3 +140,6 @@ def visit(self, node): self.visit(node.value) else: # pragma: no cover raise NotImplementedError(str(type(node))) + + def visit_type(self, node): + raise NotImplementedError("todo: to reduce visit function size") diff --git a/ppci/lang/c/parser.py b/ppci/lang/c/parser.py index 01bf0abb..b2133ad2 100644 --- a/ppci/lang/c/parser.py +++ b/ppci/lang/c/parser.py @@ -110,6 +110,8 @@ def __init__(self, coptions, semantics): self.keywords |= self.type_specifiers # Define a priority map for operators: + # See also: + # https://en.cppreference.com/w/cpp/language/operator_precedence self.prio_map = { ",": (LEFT_ASSOCIATIVE, 3), "=": (RIGHT_ASSOCIATIVE, 10), diff --git a/ppci/lang/c/scope.py b/ppci/lang/c/scope.py index fc8c7cb9..7eef1073 100644 --- a/ppci/lang/c/scope.py +++ b/ppci/lang/c/scope.py @@ -89,7 +89,9 @@ def equal_types(self, typ1: types.CType, typ2, unqualified=False): ) ) elif isinstance(typ1, types.IndexableType): - if isinstance(typ2, types.IndexableType): + if isinstance(typ2, types.IndexableType) and type(typ1) is type( + typ2 + ): return self.equal_types(typ1.element_type, typ2.element_type) elif isinstance(typ1, types.UnionType): if isinstance(typ2, types.UnionType): diff --git a/ppci/lang/c/semantics.py b/ppci/lang/c/semantics.py index bd7d6f1d..8c8b3153 100644 --- a/ppci/lang/c/semantics.py +++ b/ppci/lang/c/semantics.py @@ -38,7 +38,6 @@ def __init__(self, context): # Define the type for a string: self.int_type = self.get_type(["int"]) self.char_type = self.get_type(["char"]) - self.cstr_type = types.PointerType(self.char_type) self.intptr_type = types.PointerType(self.int_type) # Working variables: @@ -83,7 +82,7 @@ def end_function(self, body): self.current_function = None if self.scope.is_definition(function.name): - self.error('invalid redefinition', function.location) + self.error("invalid redefinition", function.location) function.body = body assert not self.switch_stack @@ -157,7 +156,7 @@ def on_variable_initialization(self, variable, expression): # Check double initializations if self.scope.is_definition(variable.name): - self.error('Invalid redefinition.', variable.location) + self.error("Invalid redefinition.", variable.location) variable.initial_value = expression @@ -318,22 +317,17 @@ def check_redeclaration_storage_class(self, sym, declaration): old_storage_class = sym.declaration.storage_class new_storage_class = declaration.storage_class # None == automatic storage class. - invalid_combos = [ - (None, 'static'), - ('extern', 'static'), - ] + invalid_combos = [(None, "static"), ("extern", "static")] combo = (old_storage_class, new_storage_class) if combo in invalid_combos: - message = 'Invalid redefine of storage class. Was {}, but now {}'.format(old_storage_class, new_storage_class) - self.invalid_redeclaration( - sym, declaration, message, + message = "Invalid redefine of storage class. Was {}, but now {}".format( + old_storage_class, new_storage_class ) + self.invalid_redeclaration(sym, declaration, message) if not declaration.storage_class: if sym.declaration.storage_class: - declaration.storage_class = ( - sym.declaration.storage_class - ) + declaration.storage_class = sym.declaration.storage_class def invalid_redeclaration( self, sym, declaration, message="Invalid redefinition" @@ -538,7 +532,9 @@ def on_return(self, value, location): # Expressions! def on_string(self, value, location): """ React on string literal """ - return expressions.StringLiteral(value, self.cstr_type, location) + value = value[1:-1] # Strip of " chars. + cstr_type = types.ArrayType(self.char_type, len(value) + 1) + return expressions.StringLiteral(value, cstr_type, location) def on_number(self, value, location): """ React on numeric literal """ diff --git a/ppci/lang/c/synthesize.py b/ppci/lang/c/synthesize.py index 9a01e43c..c837909c 100644 --- a/ppci/lang/c/synthesize.py +++ b/ppci/lang/c/synthesize.py @@ -25,13 +25,24 @@ def syn_function(self, function): def syn_block(self, block): """ Synthesize an ir block into C """ - inner_statements = [] + self._inner_statements = [] for instruction in block: - inner_statements.append(self.syn_instruction(instruction)) + self.syn_instruction(instruction) loc = None - compound = statements.Compound(inner_statements, loc) + compound = statements.Compound(self._inner_statements, loc) statements.Label(block.name, compound, None) + def emit_statement(self, statement): + self._inner_statements.append(statement) + + def emit_expression(self, expression): + self.emit_statement(statements.ExpressionStatement(expression)) + + def emit_store(self, lhs, rhs): + typ = self.get_ctype() + expression = expressions.BinaryOperator(lhs, "=", rhs, typ, True, None) + self.emit_expression(expression) + def get_var(self, ir_val): if ir_val not in self.var_map: # Create a variable now. @@ -50,6 +61,10 @@ def get_var_ref(self, ir_val): access = expressions.VariableAccess(var, typ, True, loc) return access + def get_ctype(self): + typ = self.voidptr_type + return typ + def syn_instruction(self, instruction): """ Convert ir instruction to its corresponding C counterpart """ if isinstance(instruction, ir.Alloc): @@ -57,15 +72,23 @@ def syn_instruction(self, instruction): declaration = declarations.VariableDeclaration( None, ctyp, instruction.name, None, None ) - statement = statements.DeclarationStatement(declaration, None) + self.emit_statement( + statements.DeclarationStatement(declaration, None) + ) elif isinstance(instruction, ir.Store): lhs = self.get_var_ref(instruction.address) value = self.get_var_ref(instruction.value) - typ = self.voidptr_type + typ = self.get_ctype() expression = expressions.BinaryOperator( lhs, "=", value, typ, True, None ) - statement = statements.ExpressionStatement(expression) + self.emit_expression(expression) + elif isinstance(instruction, ir.Load): + lhs = self.get_var_ref(instruction) + address = self.get_var_ref(instruction.address) + typ = self.get_ctype() + rhs = expressions.UnaryOperator("*", address, typ, False, None) + self.emit_store(lhs, rhs) elif isinstance(instruction, ir.Binop): lhs = instruction.name op = instruction.op @@ -74,32 +97,35 @@ def syn_instruction(self, instruction): typ = instruction.typ rhs = expressions.BinaryOperator(a, op, b, typ, False, None) print(lhs, rhs) - # expression = expressions.Binop('=', a, b, d) - statement = statements.ExpressionStatement(expression) + self.emit_store(lhs, rhs) elif isinstance(instruction, ir.AddressOf): typ = self.voidptr_type loc = None src = self.get_var_ref(instruction.src) expression = expressions.UnaryOperator("&", src, typ, False, loc) - statement = statements.ExpressionStatement(expression) + self.emit_expression(expression) + elif isinstance(instruction, ir.Cast): + dst = self.get_var_ref(instruction) + src = self.get_var_ref(instruction.src) + # TODO: do some casting? + self.emit_store(dst, src) elif isinstance(instruction, ir.LiteralData): value = instruction.data typ = self.voidptr_type loc = None expression = expressions.StringLiteral(value, typ, loc) - statement = statements.ExpressionStatement(expression) + self.emit_expression(expression) elif isinstance(instruction, ir.ProcedureCall): callee = self.get_var_ref(instruction.callee) args = [] typ = self.voidptr_type loc = None call = expressions.FunctionCall(callee, args, typ, False, loc) - statement = statements.ExpressionStatement(call) + self.emit_expression(call) elif isinstance(instruction, ir.Exit): - statement = statements.Return(None, None) + self.emit_statement(statements.Return(None, None)) elif isinstance(instruction, ir.Return): value = expressions.VariableAccess() - statement = statements.Return(value, None) + self.emit_statement(statements.Return(value, None)) else: # pragma: no cover raise NotImplementedError(str(instruction)) - return statement diff --git a/ppci/lang/python/ir2py.py b/ppci/lang/python/ir2py.py index 5e5ac1ff..a21add06 100644 --- a/ppci/lang/python/ir2py.py +++ b/ppci/lang/python/ir2py.py @@ -22,6 +22,8 @@ def ir_to_python(ir_modules, f, reporter=None): generator = IrToPythonCompiler(f, reporter) generator.header() for ir_module in ir_modules: + if not isinstance(ir_module, ir.Module): + raise TypeError('ir_modules must be list of ir.Module') generator.generate(ir_module) if reporter: diff --git a/ppci/wasm/arch.py b/ppci/wasm/arch.py index 4cb6790c..79fe4e26 100644 --- a/ppci/wasm/arch.py +++ b/ppci/wasm/arch.py @@ -24,6 +24,7 @@ class F64Register(Register): bitsize = 64 +# TODO: Note: having u64 in an i64 register might be wrong.. register_classes = [ RegisterClass( "i32", @@ -31,7 +32,7 @@ class F64Register(Register): I32Register, None, ), - RegisterClass("i64", [ir.u32, ir.i64], I64Register, None), + RegisterClass("i64", [ir.u32, ir.i64, ir.u64], I64Register, None), RegisterClass("f32", [ir.f32], F32Register, None), RegisterClass("f64", [ir.f64], F64Register, None), ] diff --git a/ppci/wasm/ppci2wasm.py b/ppci/wasm/ppci2wasm.py index 264c6d4f..195ca99b 100644 --- a/ppci/wasm/ppci2wasm.py +++ b/ppci/wasm/ppci2wasm.py @@ -126,6 +126,9 @@ def compile(self, ir_module): (type_ref,), ) ) + elif isinstance(ir_external, ir.ExternalVariable): + if ir_external.is_used: + raise NotImplementedError(str(ir_external)) else: raise NotImplementedError(str(ir_external)) @@ -481,6 +484,7 @@ def do_block(self, ir_block): "STRI32": "i32.store", "STRU32": "i64.store32", # TODO "STRI64": "i64.store", + "STRU64": "i64.store", # Dubious, is this correct? "STRF32": "f32.store", "STRF64": "f64.store", } @@ -493,6 +497,7 @@ def do_block(self, ir_block): "LDRI32": "i32.load", "LDRU32": "i64.load32_u", "LDRI64": "i64.load", + "LDRU64": "i64.load", "LDRF32": "f32.load", "LDRF64": "f64.load", } @@ -511,6 +516,17 @@ def do_block(self, ir_block): } cast_operators = { + # 64 -- 64 + "I64TOI64", + "I64TOU64", + "U64TOI64", + "U64TOU64", + # 32 -- 64 + "U32TOI64", + "U32TOU64", + # 64 -- 32 + "U64TOU32", + "I64TOU32", # 32 --- 32 "I32TOI32", "U32TOU32", @@ -548,8 +564,12 @@ def do_block(self, ir_block): # float to float: "F64TOF32": ["f32.demote/f64"], "F32TOF64": ["f64.promote/f32"], - # i64 - # 'I64TOI32': + # 32 -- 64 + "I32TOI64": ['i64.extend_s/i32'], + "I32TOU64": ['i64.extend_u/i32'], + # i64 -- 32 + "U64TOI32": ["i32.wrap/i64"], + "I64TOI32": ["i32.wrap/i64"], # Store u32 in i64 type: "I32TOU32": ["i64.extend_s/i32"], "U32TOI32": ["i32.wrap/i64"], @@ -573,7 +593,7 @@ def do_block(self, ir_block): "REGI32", "REGU32", "REGI64", - "REGU32", + "REGU64", "REGF32", "REGF64", } diff --git a/readme.rst b/readme.rst index 9f9ce210..7b82d371 100644 --- a/readme.rst +++ b/readme.rst @@ -13,6 +13,7 @@ The project contains the following: - Language front-ends: Brainfuck, c3, C, WASM - Backends: 6500, arm, avr, m68k, microblaze, msp430, openrisc, risc-v, stm8, x86_64, xtensa +- Other backends: wasm, python .. warning:: diff --git a/test/lang/c/test_c.py b/test/lang/c/test_c.py index 61b26419..9bca2abe 100644 --- a/test/lang/c/test_c.py +++ b/test/lang/c/test_c.py @@ -177,6 +177,16 @@ def test_expressions(self): """ self.do(src) + def test_comma_operator(self): + """ Test comma operator """ + src = """ + void main() { + int a,b,c,d; + a = 2, b=3; + } + """ + self.do(src) + def test_4(self): """ Test expressions """ src = """ @@ -408,6 +418,7 @@ def test_literal_data(self): i = 10l; s = "Hello!" "World!"; c = ' '; + s = &"bla"[2]; // This is fine! } """ self.do(src) @@ -704,6 +715,9 @@ def test_function_pointer_passing(self): void main() { register_callback(callback); + callback(); // direct call + cb(); // via function pointer + // TODO: (*cb)(); // again via function pointer } """ self.do(src) diff --git a/tools/compile_lcc.py b/tools/compile_lcc.py index 9d621ddb..5579d800 100644 --- a/tools/compile_lcc.py +++ b/tools/compile_lcc.py @@ -50,15 +50,13 @@ def main(): this_dir = os.path.abspath(os.path.dirname(__file__)) report_filename = os.path.join(this_dir, "report_lcc.html") libc_includes = os.path.join(this_dir, "..", "librt", "libc") - include_paths = [ - libc_includes - ] + include_paths = [libc_includes] arch = "x86_64" t1 = time.time() failed = 0 passed = 0 - sources = glob.glob(os.path.join(lcc_folder, 'src', '*.c')) + sources = glob.glob(os.path.join(lcc_folder, "src", "*.c")) objs = [] with open(report_filename, "w") as f, HtmlReportGenerator(f) as reporter: for filename in sources: diff --git a/tools/ppci_explorer.py b/tools/ppci_explorer.py index be5422f4..9f376992 100644 --- a/tools/ppci_explorer.py +++ b/tools/ppci_explorer.py @@ -16,6 +16,7 @@ import io import logging +import os from itertools import cycle import traceback @@ -74,6 +75,8 @@ def apply_transformation(self, transformation_input): class PpciExplorer: """ Ppci explorer. """ + cache_filename = "ppci_explorer_source.txt" + def __init__(self): available_archs = [ "arm", @@ -99,6 +102,8 @@ def __init__(self): @kb.add(Keys.F10, eager=True) def quit_(event): + with open(self.cache_filename, "w") as f: + f.write(self.source_buffer.text) event.app.exit() kb.add(Keys.F6, eager=True)(self.cycle_stage) @@ -172,7 +177,12 @@ def quit_(event): layout=layout, key_bindings=kb, style=style, full_screen=True ) - self.source_buffer.text = DEMO_SOURCE + if os.path.exists(self.cache_filename): + with open(self.cache_filename, "r") as f: + src = f.read() + else: + src = DEMO_SOURCE + self.source_buffer.text = src def on_change(self, source_buffer): self.do_compile() @@ -250,14 +260,8 @@ def ppci_explorer(): DEMO_SOURCE = """ -static char *msg[] = { - "Hi", "bonjour" -}; - -int g=23; - int add(int a, int b) { - return a + b - g; + return a + b; } """