From e03ea993441cba574f04ef608ca2066f393b1152 Mon Sep 17 00:00:00 2001 From: Ryota Date: Fri, 1 Oct 2021 18:59:38 +0100 Subject: [PATCH] Fix incorrect marker removal (#64) --- internal/file/process.go | 49 +++++++++++++++++++++++++++------ internal/file/process_test.go | 42 +++++++++++++++++++++++++++- internal/marker/marker_regex.go | 2 +- internal/regexpplus/map.go | 13 +++++++++ 4 files changed, 96 insertions(+), 10 deletions(-) diff --git a/internal/file/process.go b/internal/file/process.go index 3cf524e..fc0237b 100644 --- a/internal/file/process.go +++ b/internal/file/process.go @@ -8,6 +8,7 @@ import ( "regexp" "github.com/upsidr/importer/internal/marker" + "github.com/upsidr/importer/internal/regexpplus" ) const br = byte('\n') @@ -78,13 +79,47 @@ func (f *File) RemoveMarkers() { scanner := bufio.NewScanner(bytes.NewReader(f.ContentAfter)) for scanner.Scan() { currentLine := scanner.Bytes() - if s := importerRe.Find(currentLine); s != nil { - // Importer Marker found, ignore - continue + + if s := importerRe.Find(currentLine); len(s) != 0 { + matches, err := regexpplus.MapWithNamedSubgroupsRegexp(string(currentLine), importerRe) + if err != nil { + panic(err) // Unknown error, should not happen + } + precedingData := []byte("") + // If regexp contains importer_marker_indentation, keep that untouched. + if m, ok := matches["importer_marker_indentation"]; ok { + precedingData = []byte(m) + } + + markerRemoved := importerRe.ReplaceAll(currentLine, precedingData) + + // If the given line only contains marker and some spaces, simply + // remove the entire line. + if b := bytes.TrimSpace(markerRemoved); len(b) == 0 { + continue + } + currentLine = markerRemoved } - if s := exporterRe.Find(currentLine); s != nil { - // Exporter Marker found, ignore - continue + + if s := exporterRe.Find(currentLine); len(s) != 0 { + matches, err := regexpplus.MapWithNamedSubgroupsRegexp(string(currentLine), exporterRe) + if err != nil { + panic(err) // Unknown error, should not happen + } + precedingData := []byte("") + // If regexp contains export_marker_indent, keep that untouched. + if m, ok := matches["export_marker_indent"]; ok { + precedingData = []byte(m) + } + + markerRemoved := exporterRe.ReplaceAll(currentLine, precedingData) + + // If the given line only contains marker and some spaces, simply + // remove the entire line. + if b := bytes.TrimSpace(markerRemoved); len(b) == 0 { + continue + } + currentLine = markerRemoved } currentLine = append(currentLine, []byte("\n")...) @@ -92,6 +127,4 @@ func (f *File) RemoveMarkers() { } f.ContentAfter = newResult - - return } diff --git a/internal/file/process_test.go b/internal/file/process_test.go index aaa9625..16d06a6 100644 --- a/internal/file/process_test.go +++ b/internal/file/process_test.go @@ -184,6 +184,46 @@ a: a: b: c: data +`), + }, + "yaml: importer marker is removed but the rest of the line is kept": { + file: &File{ + FileName: "test-file.yaml", + ContentAfter: []byte(` +data: + - # == i: abc / begin from: some-file.yaml#[abc] == + a: + b: + c: data + # == i: abc / end == +`), + }, + want: []byte(` +data: + - + a: + b: + c: data +`), + }, + "yaml: exporter marker is removed but the rest of the line is kept": { + file: &File{ + FileName: "test-file.yaml", + ContentAfter: []byte(` +data: + - # == e: abc / begin == + a: + b: + c: data + # == i: abc / end == +`), + }, + want: []byte(` +data: + - + a: + b: + c: data `), }, "unknown file type: keep input as is": { @@ -214,7 +254,7 @@ a: for name, tc := range cases { t.Run(name, func(t *testing.T) { tc.file.RemoveMarkers() - if diff := cmp.Diff(tc.want, tc.file.ContentAfter); diff != "" { + if diff := cmp.Diff(string(tc.want), string(tc.file.ContentAfter)); diff != "" { t.Errorf("parsed result didn't match (-want / +got)\n%s", diff) } }) diff --git a/internal/marker/marker_regex.go b/internal/marker/marker_regex.go index d09ed17..44b8976 100644 --- a/internal/marker/marker_regex.go +++ b/internal/marker/marker_regex.go @@ -44,5 +44,5 @@ var ( // # == export: random_data / begin == // random-data: this is exported // # == export: random_data / end == - ExporterMarkerYAML = `(?P\s*)# == (exptr|export|exporter|e): (?P\S+) \/ (?Pbegin|end) ==` + ExporterMarkerYAML = `(?P.*)# == (exptr|export|exporter|e): (?P\S+) \/ (?Pbegin|end) ==` ) diff --git a/internal/regexpplus/map.go b/internal/regexpplus/map.go index 6d9606a..7b18dde 100644 --- a/internal/regexpplus/map.go +++ b/internal/regexpplus/map.go @@ -21,6 +21,19 @@ var ( // create a map that will not have all the matched components. func MapWithNamedSubgroups(targetLine string, expression string) (map[string]string, error) { re := regexp.MustCompile(expression) + return MapWithNamedSubgroupsRegexp(targetLine, re) +} + +// MapWithNamedSubgroupsRegexp runs regexp FindStringSubmatch against +// `targetLine` input, and returns a map representation. The map contains the +// key as the subgroup name, and value for the matched data. +// +// If there is no match found, an error of ErrNoMatch will be returned. +// +// This can be used for regular expression which does not have any subgroup, +// but as it is designed specifically for subgroup based use cases, it will +// create a map that will not have all the matched components. +func MapWithNamedSubgroupsRegexp(targetLine string, re *regexp.Regexp) (map[string]string, error) { ms := re.FindStringSubmatch(targetLine) if len(ms) == 0 {