From e03ea993441cba574f04ef608ca2066f393b1152 Mon Sep 17 00:00:00 2001
From: Ryota <rytswd@gmail.com>
Date: Fri, 1 Oct 2021 18:59:38 +0100
Subject: [PATCH] Fix incorrect marker removal (#64)

---
 internal/file/process.go        | 49 +++++++++++++++++++++++++++------
 internal/file/process_test.go   | 42 +++++++++++++++++++++++++++-
 internal/marker/marker_regex.go |  2 +-
 internal/regexpplus/map.go      | 13 +++++++++
 4 files changed, 96 insertions(+), 10 deletions(-)

diff --git a/internal/file/process.go b/internal/file/process.go
index 3cf524e..fc0237b 100644
--- a/internal/file/process.go
+++ b/internal/file/process.go
@@ -8,6 +8,7 @@ import (
 	"regexp"
 
 	"github.com/upsidr/importer/internal/marker"
+	"github.com/upsidr/importer/internal/regexpplus"
 )
 
 const br = byte('\n')
@@ -78,13 +79,47 @@ func (f *File) RemoveMarkers() {
 	scanner := bufio.NewScanner(bytes.NewReader(f.ContentAfter))
 	for scanner.Scan() {
 		currentLine := scanner.Bytes()
-		if s := importerRe.Find(currentLine); s != nil {
-			// Importer Marker found, ignore
-			continue
+
+		if s := importerRe.Find(currentLine); len(s) != 0 {
+			matches, err := regexpplus.MapWithNamedSubgroupsRegexp(string(currentLine), importerRe)
+			if err != nil {
+				panic(err) // Unknown error, should not happen
+			}
+			precedingData := []byte("")
+			// If regexp contains importer_marker_indentation, keep that untouched.
+			if m, ok := matches["importer_marker_indentation"]; ok {
+				precedingData = []byte(m)
+			}
+
+			markerRemoved := importerRe.ReplaceAll(currentLine, precedingData)
+
+			// If the given line only contains marker and some spaces, simply
+			// remove the entire line.
+			if b := bytes.TrimSpace(markerRemoved); len(b) == 0 {
+				continue
+			}
+			currentLine = markerRemoved
 		}
-		if s := exporterRe.Find(currentLine); s != nil {
-			// Exporter Marker found, ignore
-			continue
+
+		if s := exporterRe.Find(currentLine); len(s) != 0 {
+			matches, err := regexpplus.MapWithNamedSubgroupsRegexp(string(currentLine), exporterRe)
+			if err != nil {
+				panic(err) // Unknown error, should not happen
+			}
+			precedingData := []byte("")
+			// If regexp contains export_marker_indent, keep that untouched.
+			if m, ok := matches["export_marker_indent"]; ok {
+				precedingData = []byte(m)
+			}
+
+			markerRemoved := exporterRe.ReplaceAll(currentLine, precedingData)
+
+			// If the given line only contains marker and some spaces, simply
+			// remove the entire line.
+			if b := bytes.TrimSpace(markerRemoved); len(b) == 0 {
+				continue
+			}
+			currentLine = markerRemoved
 		}
 
 		currentLine = append(currentLine, []byte("\n")...)
@@ -92,6 +127,4 @@ func (f *File) RemoveMarkers() {
 	}
 
 	f.ContentAfter = newResult
-
-	return
 }
diff --git a/internal/file/process_test.go b/internal/file/process_test.go
index aaa9625..16d06a6 100644
--- a/internal/file/process_test.go
+++ b/internal/file/process_test.go
@@ -184,6 +184,46 @@ a:
 a:
   b:
     c: data
+`),
+		},
+		"yaml: importer marker is removed but the rest of the line is kept": {
+			file: &File{
+				FileName: "test-file.yaml",
+				ContentAfter: []byte(`
+data:
+  - # == i: abc / begin from: some-file.yaml#[abc] ==
+    a:
+      b:
+        c: data
+    # == i: abc / end ==
+`),
+			},
+			want: []byte(`
+data:
+  - 
+    a:
+      b:
+        c: data
+`),
+		},
+		"yaml: exporter marker is removed but the rest of the line is kept": {
+			file: &File{
+				FileName: "test-file.yaml",
+				ContentAfter: []byte(`
+data:
+  - # == e: abc / begin ==
+    a:
+      b:
+        c: data
+    # == i: abc / end ==
+`),
+			},
+			want: []byte(`
+data:
+  - 
+    a:
+      b:
+        c: data
 `),
 		},
 		"unknown file type: keep input as is": {
@@ -214,7 +254,7 @@ a:
 	for name, tc := range cases {
 		t.Run(name, func(t *testing.T) {
 			tc.file.RemoveMarkers()
-			if diff := cmp.Diff(tc.want, tc.file.ContentAfter); diff != "" {
+			if diff := cmp.Diff(string(tc.want), string(tc.file.ContentAfter)); diff != "" {
 				t.Errorf("parsed result didn't match (-want / +got)\n%s", diff)
 			}
 		})
diff --git a/internal/marker/marker_regex.go b/internal/marker/marker_regex.go
index d09ed17..44b8976 100644
--- a/internal/marker/marker_regex.go
+++ b/internal/marker/marker_regex.go
@@ -44,5 +44,5 @@ var (
 	//     # == export: random_data / begin ==
 	//     random-data: this is exported
 	//     # == export: random_data / end ==
-	ExporterMarkerYAML = `(?P<export_marker_indent>\s*)# == (exptr|export|exporter|e): (?P<export_marker_name>\S+) \/ (?P<exporter_marker_condition>begin|end) ==`
+	ExporterMarkerYAML = `(?P<export_marker_indent>.*)# == (exptr|export|exporter|e): (?P<export_marker_name>\S+) \/ (?P<exporter_marker_condition>begin|end) ==`
 )
diff --git a/internal/regexpplus/map.go b/internal/regexpplus/map.go
index 6d9606a..7b18dde 100644
--- a/internal/regexpplus/map.go
+++ b/internal/regexpplus/map.go
@@ -21,6 +21,19 @@ var (
 // create a map that will not have all the matched components.
 func MapWithNamedSubgroups(targetLine string, expression string) (map[string]string, error) {
 	re := regexp.MustCompile(expression)
+	return MapWithNamedSubgroupsRegexp(targetLine, re)
+}
+
+// MapWithNamedSubgroupsRegexp runs regexp FindStringSubmatch against
+// `targetLine` input, and returns a map representation. The map contains the
+// key as the subgroup name, and value for the matched data.
+//
+// If there is no match found, an error of ErrNoMatch will be returned.
+//
+// This can be used for regular expression which does not have any subgroup,
+// but as it is designed specifically for subgroup based use cases, it will
+// create a map that will not have all the matched components.
+func MapWithNamedSubgroupsRegexp(targetLine string, re *regexp.Regexp) (map[string]string, error) {
 	ms := re.FindStringSubmatch(targetLine)
 
 	if len(ms) == 0 {