transformer: refactor + apply transform to (hopefully) all nodes (#13216)

timbasel · web-flow · commit 14b33baa3b42 · 2022-01-20T08:40:16.000+02:00
diff --git a/vlib/v/ast/ast.v b/vlib/v/ast/ast.v
diff --git a/vlib/v/checker/checker.v b/vlib/v/checker/checker.v
@@ -1821,7 +1821,7 @@ fn (mut c Checker) stmt(node ast.Stmt) {
 			node.typ = c.expr(node.expr)
 			c.expected_type = ast.void_type
 			mut or_typ := ast.void_type
-			match node.expr {
+			match mut node.expr {
 				ast.IndexExpr {
 					if node.expr.or_expr.kind != .absent {
 						node.is_expr = true
@@ -1837,7 +1837,7 @@ fn (mut c Checker) stmt(node ast.Stmt) {
 				else {}
 			}
 			if !c.pref.is_repl && (c.stmt_level == 1 || (c.stmt_level > 1 && !c.is_last_stmt)) {
-				if node.expr is ast.InfixExpr {
+				if mut node.expr is ast.InfixExpr {
 					if node.expr.op == .left_shift {
 						left_sym := c.table.final_sym(node.expr.left_type)
 						if left_sym.kind != .array {
@@ -2422,7 +2422,7 @@ pub fn (mut c Checker) expr(node ast.Expr) ast.Type {
 				c.error('expected `string` instead of `$expr_sym.name` (e.g. `field.name`)',
 					node.field_expr.position())
 			}
-			if node.field_expr is ast.SelectorExpr {
+			if mut node.field_expr is ast.SelectorExpr {
 				left_pos := node.field_expr.expr.position()
 				if c.comptime_fields_type.len == 0 {
 					c.error('compile time field access can only be used when iterating over `T.fields`',
@@ -3025,7 +3025,7 @@ pub fn (mut c Checker) ident(mut node ast.Ident) ast.Type {
 						c.inside_const = false
 						c.mod = old_c_mod
 
-						if obj.expr is ast.CallExpr {
+						if mut obj.expr is ast.CallExpr {
 							if obj.expr.or_block.kind != .absent {
 								typ = typ.clear_flag(.optional)
 							}
diff --git a/vlib/v/checker/for.v b/vlib/v/checker/for.v
@@ -147,10 +147,10 @@ fn (mut c Checker) for_stmt(mut node ast.ForStmt) {
 	if !node.is_inf && typ.idx() != ast.bool_type_idx && !c.pref.translated {
 		c.error('non-bool used as for condition', node.pos)
 	}
-	if node.cond is ast.InfixExpr {
+	if mut node.cond is ast.InfixExpr {
 		infix := node.cond
 		if infix.op == .key_is {
-			if infix.left in [ast.Ident, ast.SelectorExpr] && infix.right is ast.TypeNode {
+			if infix.right is ast.TypeNode && infix.left in [ast.Ident, ast.SelectorExpr] {
 				is_variable := if mut infix.left is ast.Ident {
 					infix.left.kind == .variable
 				} else {
diff --git a/vlib/v/checker/if.v b/vlib/v/checker/if.v
@@ -50,7 +50,7 @@ pub fn (mut c Checker) if_expr(mut node ast.IfExpr) ast.Type {
 		if node.is_comptime { // Skip checking if needed
 			// smartcast field type on comptime if
 			mut comptime_field_name := ''
-			if branch.cond is ast.InfixExpr {
+			if mut branch.cond is ast.InfixExpr {
 				if branch.cond.op == .key_is {
 					if branch.cond.right !is ast.TypeNode {
 						c.error('invalid `\$if` condition: expected a type', branch.cond.right.position())
@@ -106,7 +106,7 @@ pub fn (mut c Checker) if_expr(mut node ast.IfExpr) ast.Type {
 				}
 			} else if c.pref.output_cross_c {
 				mut is_freestanding_block := false
-				if branch.cond is ast.Ident {
+				if mut branch.cond is ast.Ident {
 					if branch.cond.name == 'freestanding' {
 						is_freestanding_block = true
 					}
diff --git a/vlib/v/checker/match.v b/vlib/v/checker/match.v
@@ -8,7 +8,7 @@ import strings
 pub fn (mut c Checker) match_expr(mut node ast.MatchExpr) ast.Type {
 	node.is_expr = c.expected_type != ast.void_type
 	node.expected_type = c.expected_type
-	if node.cond is ast.ParExpr && !c.pref.translated {
+	if mut node.cond is ast.ParExpr && !c.pref.translated {
 		c.error('unnecessary `()` in `match` condition, use `match expr {` instead of `match (expr) {`.',
 			node.cond.pos)
 	}
diff --git a/vlib/v/parser/parse_type.v b/vlib/v/parser/parse_type.v
@@ -27,13 +27,13 @@ pub fn (mut p Parser) parse_array_type(expecting token.Kind) ast.Type {
 				}
 				ast.Ident {
 					mut show_non_const_error := false
-					if const_field := p.table.global_scope.find_const('${p.mod}.$size_expr.name') {
-						if const_field.expr is ast.IntegerLiteral {
+					if mut const_field := p.table.global_scope.find_const('${p.mod}.$size_expr.name') {
+						if mut const_field.expr is ast.IntegerLiteral {
 							fixed_size = const_field.expr.val.int()
 						} else {
-							if const_field.expr is ast.InfixExpr {
+							if mut const_field.expr is ast.InfixExpr {
 								mut t := transformer.new_transformer(p.pref)
-								folded_expr := t.infix_expr(const_field.expr)
+								folded_expr := t.infix_expr(mut const_field.expr)
 
 								if folded_expr is ast.IntegerLiteral {
 									fixed_size = folded_expr.val.int()
diff --git a/vlib/v/transformer/index_state.v b/vlib/v/transformer/index_state.v
@@ -0,0 +1,123 @@
+module transformer
+
+struct KeyVal {
+	key   string
+	value int
+}
+
+[if debug_bounds_checking ?]
+fn debug_bounds_checking(str string) {
+	println(str)
+}
+
+// IndexState is used to track the index analysis performed when parsing the code
+// `IndexExpr` nodes are annotated with `is_direct`, indicating that the array index can be safely directly accessed.
+
+// The c_gen code check will handle this annotation and perform this direct memory access. The following cases are considered valid for this optimisation:
+// 1. the array size is known and has a `len` larger than the index requested
+// 2. the array was previously accessed with a higher value which would have reported the issue already
+// 3. the array was created from a range expression a := range[10..13] and the offset'ed indexes are safe
+
+// Current limitations:
+//  * any function using break/continue or goto/label stopped from being optimised as soon as the relevant AST nodes are found as the code can not be ensured to be sequential
+//  * `enum` and `const` indexes are not optimised (they could probably be looked up)
+//  * for loops with multiple var in their init and/or inc are not analysed
+//  * mut array are not analysed as their size can be reduced, but self-assignment in a single line
+
+pub struct IndexState {
+mut:
+	// max_index has the biggest array index accessed for then named array
+	// so if a[2] was set or read, it will be 2
+	// A new array with no .len will recorded as -1 (accessing a[0] would be invalid)
+	// the value -2 is used to indicate that the array should not be analysed
+	// this is used for a mut array
+	max_index map[string]int
+	// We need to snapshot when entering `if` and `for` blocks and restore on exit
+	// as the statements may not be run. This is managed by indent() & unindent().
+	saved_disabled []bool
+	saved_key_vals [][]KeyVal
+pub mut:
+	// on encountering goto/break/continue statements we stop any analysis
+	// for the current function (as the code is not linear anymore)
+	disabled bool
+	level    int
+}
+
+// we are remembering the last array accessed and checking if the value is safe
+// the node is updated with this information which can then be used by the code generators
+fn (mut i IndexState) safe_access(key string, new int) bool {
+	$if no_bounds_checking {
+		return false
+	}
+	if i.disabled {
+		return false
+	}
+	old := i.max_index[key] or {
+		debug_bounds_checking('$i.level ${key}.len = $new')
+		i.max_index[key] = new
+		return false
+	}
+	if new > old {
+		if old < -1 {
+			debug_bounds_checking('$i.level $key[$new] unsafe (mut array)')
+			return false
+		}
+		debug_bounds_checking('$i.level $key[$new] unsafe (index was $old)')
+		i.max_index[key] = new
+		return false
+	}
+	debug_bounds_checking('$i.level $key[$new] safe (index is $old)')
+	return true
+}
+
+// safe_offset returns for a previvous array what was the highest
+// offset we ever accessed for that identifier
+fn (mut i IndexState) safe_offset(key string) int {
+	$if no_bounds_checking {
+		return -2
+	}
+	if i.disabled {
+		return -2
+	}
+	return i.max_index[key] or { -1 }
+}
+
+// indent is used for when encountering new code blocks (if, for and functions)
+// The code analysis needs to take into consideration blocks of code which
+// may not run at runtime (if/for) and therefore even if a new maximum for an
+// index access is found on an if branch it can not be used within the parent
+// code. The same is true with for blocks. indent() snapshot the current state,
+// to allow restoration with unindent()
+// Also within a function, analysis must be `disabled` when goto or break are
+// encountered as the code flow is then not lineear, and only restart when a
+// new function analysis is started.
+[if !no_bounds_checking]
+fn (mut i IndexState) indent(is_function bool) {
+	mut kvs := []KeyVal{cap: i.max_index.len}
+	for k, v in i.max_index {
+		kvs << KeyVal{k, v}
+	}
+	i.saved_disabled << i.disabled
+	i.saved_key_vals << kvs
+	if is_function {
+		i.disabled = false
+	}
+	i.level += 1
+}
+
+// restoring the data as it was before the if/for/unsafe block
+[if !no_bounds_checking]
+fn (mut i IndexState) unindent() {
+	i.level -= 1
+	mut keys := []string{cap: i.max_index.len}
+	for k, _ in i.max_index {
+		keys << k
+	}
+	for k in keys {
+		i.max_index.delete(k)
+	}
+	for saved in i.saved_key_vals.pop() {
+		i.max_index[saved.key] = saved.value
+	}
+	i.disabled = i.saved_disabled.pop()
+}
diff --git a/vlib/v/transformer/transformer.v b/vlib/v/transformer/transformer.v

Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,7 @@ import strings`
`8`	`8`	`pub fn (mut c Checker) match_expr(mut node ast.MatchExpr) ast.Type {`
`9`	`9`	`node.is_expr = c.expected_type != ast.void_type`
`10`	`10`	`node.expected_type = c.expected_type`
`11`		`- if node.cond is ast.ParExpr && !c.pref.translated {`
	`11`	`+ if mut node.cond is ast.ParExpr && !c.pref.translated {`
`12`	`12`	c.error('unnecessary `()` in `match` condition, use `match expr {` instead of `match (expr) {`.',
`13`	`13`	`node.cond.pos)`
`14`	`14`	`}`