From 387af74302fbfdd3d8ad980e334fc20e9372b3fe Mon Sep 17 00:00:00 2001
From: Turiiya <34311583+ttytm@users.noreply.github.com>
Date: Sat, 4 May 2024 23:18:39 +0200
Subject: [PATCH] vet, parser: rewrite vet error handling (improve parser
 performance extend vvet) p1 (#21417)

---
 cmd/tools/install_binaryen.vsh |  8 +--
 cmd/tools/vvet/filter.v        | 27 ++++++++++
 cmd/tools/vvet/vvet.v          | 99 ++++++++++++++++++++++++++++++----
 vlib/v/parser/parser.v         | 40 --------------
 4 files changed, 121 insertions(+), 53 deletions(-)
 create mode 100644 cmd/tools/vvet/filter.v

diff --git a/cmd/tools/install_binaryen.vsh b/cmd/tools/install_binaryen.vsh
index 72546c037c0b6e..cae685284aa308 100755
--- a/cmd/tools/install_binaryen.vsh
+++ b/cmd/tools/install_binaryen.vsh
@@ -26,10 +26,10 @@ fn main() {
 	// TODO: add retries here, github requests can fail
 	jq := http.get_text('https://api.github.com/repos/WebAssembly/binaryen/releases/latest')
 	tag := json.decode(JQ, jq)!.tag_name
-        if github_job != '' {
-                dump(jq)
-                dump(tag)
-        }
+	     if github_job != '' {
+	             dump(jq)
+	             dump(tag)
+	     }
 	*/
 	tag := 'version_112'
 
diff --git a/cmd/tools/vvet/filter.v b/cmd/tools/vvet/filter.v
new file mode 100644
index 00000000000000..fe81614b63b8c7
--- /dev/null
+++ b/cmd/tools/vvet/filter.v
@@ -0,0 +1,27 @@
+module main
+
+import v.token
+import v.vet
+
+type FilteredLines = map[vet.ErrorType]map[int]bool
+
+fn (mut fl FilteredLines) comments(is_multi bool, pos token.Pos) {
+	if !is_multi {
+		return
+	}
+	for ln in pos.line_nr + 1 .. pos.last_line + 1 {
+		fl[.space_indent][ln] = true
+	}
+}
+
+fn (mut fl FilteredLines) assigns(pos token.Pos) {
+	if pos.line_nr == pos.last_line {
+		return
+	}
+	for ln in pos.line_nr + 1 .. pos.last_line {
+		fl[.trailing_space][ln] = true
+		fl[.space_indent][ln] = true
+	}
+	fl[.trailing_space][pos.line_nr] = true
+	fl[.space_indent][pos.last_line] = true
+}
diff --git a/cmd/tools/vvet/vvet.v b/cmd/tools/vvet/vvet.v
index 69aab234d3a1d6..ebd10d86281ea2 100644
--- a/cmd/tools/vvet/vvet.v
+++ b/cmd/tools/vvet/vvet.v
@@ -13,12 +13,13 @@ import v.help
 import term
 
 struct Vet {
-	opt Options
 mut:
-	errors  []vet.Error
-	warns   []vet.Error
-	notices []vet.Error
-	file    string
+	opt            Options
+	errors         []vet.Error
+	warns          []vet.Error
+	notices        []vet.Error
+	file           string
+	filtered_lines FilteredLines
 }
 
 struct Options {
@@ -28,6 +29,8 @@ struct Options {
 	show_warnings       bool
 	use_color           bool
 	doc_private_fns_too bool
+mut:
+	is_vfmt_off bool
 }
 
 const term_colors = term.can_show_color_on_stderr()
@@ -107,20 +110,42 @@ fn (mut vt Vet) vet_file(path string) {
 	prefs.is_vsh = path.ends_with('.vsh')
 	mut table := ast.new_table()
 	vt.vprintln("vetting file '${path}'...")
-	_, errors, notices := parser.parse_vet_file(path, mut table, prefs)
+	file, errors, notices := parser.parse_vet_file(path, mut table, prefs)
+	vt.stmts(file.stmts)
 	// Transfer errors from scanner and parser
 	vt.errors << errors
 	vt.notices << notices
-	// Scan each line in file for things to improve
 	source_lines := os.read_lines(vt.file) or { []string{} }
-	for lnumber, line in source_lines {
-		vt.vet_line(source_lines, line, lnumber)
+	for ln, line in source_lines {
+		vt.vet_line(source_lines, line, ln)
 	}
 }
 
 // vet_line vets the contents of `line` from `vet.file`.
 fn (mut vt Vet) vet_line(lines []string, line string, lnumber int) {
 	vt.vet_fn_documentation(lines, line, lnumber)
+	vt.vet_space_usage(line, lnumber)
+}
+
+fn (mut vt Vet) vet_space_usage(line string, lnumber int) {
+	if line.starts_with('// vfmt off') {
+		vt.opt.is_vfmt_off = true
+	} else if line.starts_with('// vfmt on') {
+		vt.opt.is_vfmt_off = false
+	}
+	if vt.opt.is_vfmt_off {
+		return
+	}
+	if lnumber !in vt.filtered_lines[.space_indent] {
+		if line.starts_with(' ') {
+			vt.error('Looks like you are using spaces for indentation.', lnumber, .vfmt)
+		}
+	}
+	if lnumber !in vt.filtered_lines[.trailing_space] {
+		if line.ends_with(' ') {
+			vt.error('Looks like you have trailing whitespace.', lnumber, .unknown)
+		}
+	}
 }
 
 // vet_fn_documentation ensures that functions are documented
@@ -229,6 +254,62 @@ fn (mut vt Vet) vet_fn_documentation(lines []string, line string, lnumber int) {
 	}
 }
 
+fn (mut vt Vet) stmts(stmts []ast.Stmt) {
+	for stmt in stmts {
+		vt.stmt(stmt)
+	}
+}
+
+fn (mut vt Vet) stmt(stmt ast.Stmt) {
+	match stmt {
+		ast.ConstDecl { vt.exprs(stmt.fields.map(it.expr)) }
+		ast.ExprStmt { vt.expr(stmt.expr) }
+		ast.Return { vt.exprs(stmt.exprs) }
+		ast.AssertStmt { vt.expr(stmt.expr) }
+		ast.AssignStmt { vt.exprs(stmt.right) }
+		ast.FnDecl { vt.stmts(stmt.stmts) }
+		else {}
+	}
+}
+
+fn (mut vt Vet) exprs(exprs []ast.Expr) {
+	for expr in exprs {
+		vt.expr(expr)
+	}
+}
+
+fn (mut vt Vet) expr(expr ast.Expr) {
+	match expr {
+		ast.Comment {
+			vt.filtered_lines.comments(expr.is_multi, expr.pos)
+		}
+		ast.StringLiteral, ast.StringInterLiteral {
+			vt.filtered_lines.assigns(expr.pos)
+		}
+		ast.ArrayInit {
+			vt.filtered_lines.assigns(expr.pos)
+		}
+		ast.InfixExpr {
+			vt.expr(expr.right)
+		}
+		ast.CallExpr {
+			vt.expr(expr.left)
+			vt.exprs(expr.args.map(it.expr))
+		}
+		ast.MatchExpr {
+			for b in expr.branches {
+				vt.stmts(b.stmts)
+			}
+		}
+		ast.IfExpr {
+			for b in expr.branches {
+				vt.stmts(b.stmts)
+			}
+		}
+		else {}
+	}
+}
+
 fn (vt &Vet) vprintln(s string) {
 	if !vt.opt.is_verbose {
 		return
diff --git a/vlib/v/parser/parser.v b/vlib/v/parser/parser.v
index 86e9b91f3c05d8..2f438a9741cb14 100644
--- a/vlib/v/parser/parser.v
+++ b/vlib/v/parser/parser.v
@@ -278,28 +278,6 @@ pub fn parse_vet_file(path string, mut table_ ast.Table, pref_ &pref.Preferences
 		warnings: []errors.Warning{}
 	}
 	p.set_path(path)
-	if p.scanner.text.contains_any_substr(['\n  ', ' \n']) {
-		source_lines := os.read_lines(path) or { []string{} }
-		mut is_vfmt_off := false
-		for lnumber, line in source_lines {
-			if line.starts_with('// vfmt off') {
-				is_vfmt_off = true
-			} else if line.starts_with('// vfmt on') {
-				is_vfmt_off = false
-			}
-			if is_vfmt_off {
-				continue
-			}
-			if line.starts_with('  ') {
-				p.vet_error('Looks like you are using spaces for indentation.', lnumber,
-					.vfmt, .space_indent)
-			}
-			if line.ends_with(' ') {
-				p.vet_error('Looks like you have trailing whitespace.', lnumber, .unknown,
-					.trailing_space)
-			}
-		}
-	}
 	p.vet_errors << p.scanner.vet_errors
 	file := p.parse()
 	unsafe { p.free_scanner() }
@@ -910,11 +888,6 @@ fn (mut p Parser) comment() ast.Comment {
 	is_multi := num_newlines > 0
 	pos.last_line = pos.line_nr + num_newlines
 	p.next()
-	// Filter out false positive space indent vet errors inside comments
-	if p.vet_errors.len > 0 && is_multi {
-		p.vet_errors = p.vet_errors.filter(it.typ != .space_indent
-			|| it.pos.line_nr - 1 > pos.last_line || it.pos.line_nr - 1 <= pos.line_nr)
-	}
 	return ast.Comment{
 		text: text
 		is_multi: is_multi
@@ -3467,17 +3440,6 @@ fn (mut p Parser) enum_val() ast.EnumVal {
 	}
 }
 
-fn (mut p Parser) filter_string_vet_errors(pos token.Pos) {
-	if p.vet_errors.len == 0 {
-		return
-	}
-	p.vet_errors = p.vet_errors.filter(
-		(it.typ == .trailing_space && it.pos.line_nr - 1 >= pos.last_line)
-		|| (it.typ != .trailing_space && it.pos.line_nr - 1 > pos.last_line)
-		|| (it.typ == .space_indent && it.pos.line_nr - 1 <= pos.line_nr)
-		|| (it.typ != .space_indent && it.pos.line_nr - 1 < pos.line_nr))
-}
-
 fn (mut p Parser) string_expr() ast.Expr {
 	is_raw := p.tok.kind == .name && p.tok.lit == 'r'
 	is_cstr := p.tok.kind == .name && p.tok.lit == 'c'
@@ -3490,7 +3452,6 @@ fn (mut p Parser) string_expr() ast.Expr {
 	pos.last_line = pos.line_nr + val.count('\n')
 	if p.peek_tok.kind != .str_dollar {
 		p.next()
-		p.filter_string_vet_errors(pos)
 		node = ast.StringLiteral{
 			val: val
 			is_raw: is_raw
@@ -3570,7 +3531,6 @@ fn (mut p Parser) string_expr() ast.Expr {
 		fposs << p.prev_tok.pos()
 	}
 	pos = pos.extend(p.prev_tok.pos())
-	p.filter_string_vet_errors(pos)
 	node = ast.StringInterLiteral{
 		vals: vals
 		exprs: exprs