From cee4acbaf4d5624ed32c43c6759777bebf4a9ae7 Mon Sep 17 00:00:00 2001 From: Denis Blokhin Date: Wed, 27 May 2026 23:26:58 +0300 Subject: [PATCH 1/3] feat: parse multi-file git diffs piped to stdin --- app/diff/multiparse.go | 153 ++++++++++++++++++++ app/diff/multiparse_test.go | 250 +++++++++++++++++++++++++++++++++ app/diff/stdin.go | 89 +++++++++++- app/diff/stdin_test.go | 268 ++++++++++++++++++++++++++++++++++++ app/stdin.go | 19 ++- 5 files changed, 777 insertions(+), 2 deletions(-) create mode 100644 app/diff/multiparse.go create mode 100644 app/diff/multiparse_test.go create mode 100644 app/diff/stdin_test.go diff --git a/app/diff/multiparse.go b/app/diff/multiparse.go new file mode 100644 index 00000000..1e33b9d9 --- /dev/null +++ b/app/diff/multiparse.go @@ -0,0 +1,153 @@ +package diff + +import ( + "errors" + "regexp" + "strings" +) + +// rawFileSection holds a raw diff section before parsing. +type rawFileSection struct { + path string // new-side path, extracted by parseFileHeader + status FileStatus // derived from mode/rename headers + diffText string // full section text to pass to parseUnifiedDiff +} + +// unifiedDiffSniffLimit caps how many leading bytes IsUnifiedDiff inspects. +const unifiedDiffSniffLimit = 4096 + +// IsUnifiedDiff reports whether the content looks like a git unified diff. +// Detection criteria: +// - starts with "diff --git a/" +// - has "@@ -" hunk header pattern within the sniff limit +func IsUnifiedDiff(content string) bool { + if content == "" { + return false + } + + // inspect only the leading bytes for efficiency + sample := content[:min(unifiedDiffSniffLimit, len(content))] + + // primary: git diff header + if strings.Contains(sample, "diff --git a/") { + return true + } + + // secondary: hunk header pattern + return strings.Contains(sample, "@@ -") +} + +// diffGitHeaderRe matches "diff --git a/path b/path", including quoted paths with spaces. +// Handles git's path forms: +// - diff --git a/path b/path (simple) +// - diff --git "a/path with spaces" "b/path with spaces" (quoted whole path) +// - diff --git a/"path with spaces" b/"path with spaces" (quoted path after prefix) +var diffGitHeaderRe = regexp.MustCompile(`^diff --git ("?a/[^"]*"?|"?a/.*?") ("?b/[^"]*"?|"?b/.*?")`) + +// splitMultiFileDiff splits a multi-file unified diff into per-file sections. +// it handles git format: "diff --git a/path b/path". +// each section includes the full diff header through to the next file boundary +// (next "diff --git" or end of input). path and status are resolved once per +// section by parseFileHeader — there is no separate inline header parse. +func splitMultiFileDiff(raw string) ([]rawFileSection, error) { + if raw == "" { + return nil, errors.New("empty input") + } + + var sections []rawFileSection + var current strings.Builder + inSection := false + + // flush appends the accumulated section text, resolving path/status from its header. + // sections without a parseable new-side path are skipped. + flush := func() { + if !inSection { + return + } + text := current.String() + path, status := parseFileHeader(text) + if path == "" { + return + } + sections = append(sections, rawFileSection{path: path, status: status, diffText: text}) + } + + for line := range strings.SplitSeq(raw, "\n") { + if strings.HasPrefix(line, "diff --git ") { + // new file boundary: flush the previous section and start a fresh one + flush() + current.Reset() + inSection = true + } + if inSection { + current.WriteString(line) + current.WriteString("\n") + } + } + flush() + + if len(sections) == 0 { + return nil, errors.New("no file sections found") + } + + return sections, nil +} + +// cleanPath strips a leading "a/" or "b/" prefix and surrounding quotes from a path. +// Handles both "b/path with spaces" (quotes outside prefix) and b/"path with spaces" +// (quotes inside prefix) by stripping the prefix, then the quotes, then the prefix again. +func cleanPath(path string) string { + path = strings.TrimPrefix(strings.TrimPrefix(path, "b/"), "a/") + path = strings.Trim(path, `"`) + return strings.TrimPrefix(strings.TrimPrefix(path, "b/"), "a/") +} + +// parseFileHeader extracts the new-side path and change status from a diff +// section's header lines. it parses: +// - "diff --git a/old b/new" → path +// - "new file mode" → FileAdded +// - "deleted file mode" → FileDeleted +// - "rename from/to" → FileRenamed (uses the new path) +// +// status defaults to FileModified. +func parseFileHeader(section string) (path string, status FileStatus) { + status = FileModified + + for line := range strings.SplitSeq(section, "\n") { + // header ends at the first hunk + if strings.HasPrefix(line, "@@") { + break + } + + switch { + case strings.HasPrefix(line, "new file mode"): + status = FileAdded + case strings.HasPrefix(line, "deleted file mode"): + status = FileDeleted + case strings.HasPrefix(line, "rename from"): + status = FileRenamed + case strings.HasPrefix(line, "diff --git "): + if m := diffGitHeaderRe.FindStringSubmatch(line); len(m) >= 3 { + // use the b-side (new) path + path = cleanPath(m[2]) + } else if rest := strings.TrimPrefix(line, "diff --git "); rest != "" { + // fallback for paths the regex cannot split: take the last field as b-path + if parts := strings.Fields(rest); len(parts) >= 2 { + path = cleanPath(parts[len(parts)-1]) + } + } + case strings.HasPrefix(line, "rename to "): + // rename target overrides the diff --git path + if parts := strings.Fields(line); len(parts) >= 3 { + path = strings.Join(parts[2:], " ") + } + case strings.HasPrefix(line, "+++ "): + // fallback: derive the path from the +++ line when the header lacked one + if path == "" { + path = cleanPath(strings.TrimSpace(strings.TrimPrefix(line, "+++ "))) + } + } + } + + return path, status +} diff --git a/app/diff/multiparse_test.go b/app/diff/multiparse_test.go new file mode 100644 index 00000000..77b8bb98 --- /dev/null +++ b/app/diff/multiparse_test.go @@ -0,0 +1,250 @@ +package diff + +import ( + "testing" +) + +func TestIsUnifiedDiff(t *testing.T) { + tests := []struct { + name string + content string + want bool + }{ + { + name: "empty", + content: "", + want: false, + }, + { + name: "raw text", + content: "This is just plain text\nwithout any diff markers\n", + want: false, + }, + { + name: "git diff header", + content: "diff --git a/file.go b/file.go\nindex abc..def\n", + want: true, + }, + { + name: "hunk header", + content: "--- a/file.go\n+++ b/file.go\n@@ -1,3 +1,4 @@\n", + want: true, + }, + { + name: "diff-like text in code", + content: "func TestDiff() {\n // check @@ format\n s := \"diff --git\"\n}\n", + want: false, // Should not detect as diff (no "diff --git a/" marker) + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := IsUnifiedDiff(tt.content); got != tt.want { + t.Errorf("IsUnifiedDiff() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestSplitMultiFileDiff(t *testing.T) { + tests := []struct { + name string + raw string + want int // number of sections + wantErr bool + }{ + { + name: "empty", + raw: "", + wantErr: true, + }, + { + name: "single file", + raw: `diff --git a/file.go b/file.go +index abc..def +--- a/file.go ++++ b/file.go +@@ -1,1 +1,2 @@ + line1 ++line2 +`, + want: 1, + }, + { + name: "two files", + raw: `diff --git a/file1.go b/file1.go +index abc..def +--- a/file1.go ++++ b/file1.go +@@ -1,1 +1,2 @@ + line1 ++line2 + +diff --git a/file2.go b/file2.go +index ghi..jkl +--- a/file2.go ++++ b/file2.go +@@ -1,1 +1,1 @@ +-old ++new +`, + want: 2, + }, + { + name: "three files with new and deleted", + raw: `diff --git a/new.go b/new.go +new file mode 100644 +index 0000000..abc1234 +--- /dev/null ++++ b/new.go +@@ -0,0 +1,1 @@ ++new content + +diff --git a/existing.go b/existing.go +index def..ghi +--- a/existing.go ++++ b/existing.go +@@ -1,1 +1,1 @@ +-old ++new + +diff --git a/deleted.go b/deleted.go +deleted file mode 100644 +index jkl..0000000 +--- a/deleted.go ++++ /dev/null +@@ -1,1 +0,0 @@ +-deleted content +`, + want: 3, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := splitMultiFileDiff(tt.raw) + if (err != nil) != tt.wantErr { + t.Errorf("splitMultiFileDiff() error = %v, wantErr %v", err, tt.wantErr) + return + } + if !tt.wantErr && len(got) != tt.want { + t.Errorf("splitMultiFileDiff() got %d sections, want %d", len(got), tt.want) + } + }) + } +} + +func TestParseFileHeader(t *testing.T) { + tests := []struct { + name string + section string + wantPath string + wantStatus FileStatus + }{ + { + name: "modified file", + section: `diff --git a/file.go b/file.go +index abc..def +--- a/file.go ++++ b/file.go +@@ -1,1 +1,2 @@`, + wantPath: "file.go", + wantStatus: FileModified, + }, + { + name: "new file", + section: `diff --git a/new.go b/new.go +new file mode 100644 +index 0000000..abc1234 +--- /dev/null ++++ b/new.go +@@ -0,0 +1,1 @@`, + wantPath: "new.go", + wantStatus: FileAdded, + }, + { + name: "deleted file", + section: `diff --git a/deleted.go b/deleted.go +deleted file mode 100644 +index abc..0000000 +--- a/deleted.go ++++ /dev/null +@@ -1,1 +0,0 @@`, + wantPath: "deleted.go", + wantStatus: FileDeleted, + }, + { + name: "renamed file", + section: `diff --git a/old.go b/new.go +similarity index 100% +rename from old.go +rename to new.go`, + wantPath: "new.go", + wantStatus: FileRenamed, + }, + { + name: "file with spaces", + section: `diff --git "a/path with spaces/file.go" "b/path with spaces/file.go" +index abc..def +--- "a/path with spaces/file.go" ++++ "b/path with spaces/file.go" +@@ -1,1 +1,1 @@`, + wantPath: "path with spaces/file.go", + wantStatus: FileModified, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + path, status := parseFileHeader(tt.section) + if path != tt.wantPath { + t.Errorf("parseFileHeader() path = %q, want %q", path, tt.wantPath) + } + if status != tt.wantStatus { + t.Errorf("parseFileHeader() status = %v, want %v", status, tt.wantStatus) + } + }) + } +} + +func TestCleanPath(t *testing.T) { + tests := []struct { + name string + input string + want string + }{ + { + name: "b prefix", + input: "b/path/file.go", + want: "path/file.go", + }, + { + name: "a prefix", + input: "a/path/file.go", + want: "path/file.go", + }, + { + name: "quoted path", + input: `"path/file.go"`, + want: "path/file.go", + }, + { + name: "b prefix with quotes", + input: `b/"path with spaces/file.go"`, + want: "path with spaces/file.go", + }, + { + name: "no prefix", + input: "path/file.go", + want: "path/file.go", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := cleanPath(tt.input); got != tt.want { + t.Errorf("cleanPath() = %q, want %q", got, tt.want) + } + }) + } +} diff --git a/app/diff/stdin.go b/app/diff/stdin.go index da212aa3..0f6176a4 100644 --- a/app/diff/stdin.go +++ b/app/diff/stdin.go @@ -1,6 +1,12 @@ package diff -import "io" +import ( + "errors" + "fmt" + "io" + "os" + "strings" +) // StdinReader is an in-memory renderer for scratch-buffer review mode. type StdinReader struct { @@ -22,6 +28,16 @@ func NewStdinReaderFromReader(name string, r io.Reader) (*StdinReader, error) { return NewStdinReader(name, lines), nil } +// NewStdinReaderFromString creates a StdinReader from string content. +// Used when content has already been read for multi-file detection. +func NewStdinReaderFromString(name, content string) (*StdinReader, error) { + lines, err := readReaderAsContext(strings.NewReader(content)) + if err != nil { + return nil, err + } + return NewStdinReader(name, lines), nil +} + // ChangedFiles returns the single synthetic filename. func (r *StdinReader) ChangedFiles(_ string, _ bool) ([]FileEntry, error) { return []FileEntry{{Path: r.name}}, nil @@ -35,3 +51,74 @@ func (r *StdinReader) FileDiff(_, file string, _ bool, _ int) ([]DiffLine, error } return r.lines, nil } + +// MultiFileStdinReader implements Renderer for multi-file unified diffs from stdin. +type MultiFileStdinReader struct { + sections map[string]parsedSection // path -> parsed diff lines + order []string // preserve file order from diff +} + +// parsedSection holds parsed diff lines and status for one file. +type parsedSection struct { + lines []DiffLine + status FileStatus +} + +// NewMultiFileStdinReader parses multi-file unified diff content. +// Returns (*MultiFileStdinReader, nil) on success. +// Falls back to StdinReader via the caller if detection fails. +func NewMultiFileStdinReader(content string) (*MultiFileStdinReader, error) { + sections, err := splitMultiFileDiff(content) + if err != nil { + return nil, fmt.Errorf("split multi-file diff: %w", err) + } + + r := &MultiFileStdinReader{ + sections: make(map[string]parsedSection, len(sections)), + order: make([]string, 0, len(sections)), + } + + for _, sec := range sections { + // reuse existing parseUnifiedDiff for each file section + lines, parseErr := parseUnifiedDiff(sec.diffText, 0) + if parseErr != nil { + // warn, skip this section, continue with the others + fmt.Fprintf(os.Stderr, "warning: failed to parse section %s: %v\n", sec.path, parseErr) + continue + } + r.sections[sec.path] = parsedSection{ + lines: lines, + status: sec.status, + } + r.order = append(r.order, sec.path) + } + + if len(r.sections) == 0 { + return nil, errors.New("no valid file sections parsed") + } + + return r, nil +} + +// ChangedFiles returns file entries in original diff order. +func (r *MultiFileStdinReader) ChangedFiles(_ string, _ bool) ([]FileEntry, error) { + entries := make([]FileEntry, 0, len(r.order)) + for _, path := range r.order { + sec := r.sections[path] + entries = append(entries, FileEntry{ + Path: path, + Status: sec.status, + }) + } + return entries, nil +} + +// FileDiff returns pre-parsed diff lines for the requested file. +// contextLines is ignored — sections are pre-parsed from the original diff. +func (r *MultiFileStdinReader) FileDiff(_, file string, _ bool, _ int) ([]DiffLine, error) { + sec, ok := r.sections[file] + if !ok { + return nil, nil + } + return sec.lines, nil +} diff --git a/app/diff/stdin_test.go b/app/diff/stdin_test.go new file mode 100644 index 00000000..643821b0 --- /dev/null +++ b/app/diff/stdin_test.go @@ -0,0 +1,268 @@ +package diff + +import ( + "testing" +) + +func TestMultiFileStdinReader_TwoFiles(t *testing.T) { + content := `diff --git a/file1.go b/file1.go +index abc..def +--- a/file1.go ++++ b/file1.go +@@ -1,1 +1,2 @@ + line1 ++line2 + +diff --git a/file2.go b/file2.go +index ghi..jkl +--- a/file2.go ++++ b/file2.go +@@ -1,1 +1,1 @@ +-old ++new +` + + r, err := NewMultiFileStdinReader(content) + if err != nil { + t.Fatalf("NewMultiFileStdinReader() error = %v", err) + } + + // test ChangedFiles + files, err := r.ChangedFiles("", false) + if err != nil { + t.Fatalf("ChangedFiles() error = %v", err) + } + if len(files) != 2 { + t.Errorf("ChangedFiles() returned %d files, want 2", len(files)) + } + if files[0].Path != "file1.go" { + t.Errorf("files[0].Path = %q, want %q", files[0].Path, "file1.go") + } + if files[1].Path != "file2.go" { + t.Errorf("files[1].Path = %q, want %q", files[1].Path, "file2.go") + } + + // test FileDiff for file1 + lines1, err := r.FileDiff("", "file1.go", false, 0) + if err != nil { + t.Fatalf("FileDiff(file1.go) error = %v", err) + } + if len(lines1) == 0 { + t.Error("FileDiff(file1.go) returned no lines") + } + + // test FileDiff for file2 + lines2, err := r.FileDiff("", "file2.go", false, 0) + if err != nil { + t.Fatalf("FileDiff(file2.go) error = %v", err) + } + if len(lines2) == 0 { + t.Error("FileDiff(file2.go) returned no lines") + } + + // test FileDiff for non-existent file + linesNone, err := r.FileDiff("", "nonexistent.go", false, 0) + if err != nil { + t.Fatalf("FileDiff(nonexistent.go) error = %v", err) + } + if linesNone != nil { + t.Error("FileDiff(nonexistent.go) should return nil") + } +} + +func TestMultiFileStdinReader_BinaryFile(t *testing.T) { + content := `diff --git a/text.go b/text.go +index abc..def +--- a/text.go ++++ b/text.go +@@ -1,1 +1,1 @@ +-old ++new + +diff --git a/image.png b/image.png +new file mode 100644 +index 0000000..mno7890 +Binary files /dev/null and b/image.png differ +` + + r, err := NewMultiFileStdinReader(content) + if err != nil { + t.Fatalf("NewMultiFileStdinReader() error = %v", err) + } + + files, err := r.ChangedFiles("", false) + if err != nil { + t.Fatalf("ChangedFiles() error = %v", err) + } + if len(files) != 2 { + t.Errorf("ChangedFiles() returned %d files, want 2", len(files)) + } + + // check that binary file was parsed + lines, err := r.FileDiff("", "image.png", false, 0) + if err != nil { + t.Fatalf("FileDiff(image.png) error = %v", err) + } + if len(lines) == 0 { + t.Error("FileDiff(image.png) returned no lines") + } + if len(lines) > 0 && !lines[0].IsBinary { + t.Error("FileDiff(image.png) should have IsBinary=true") + } +} + +func TestMultiFileStdinReader_NewDeletedFiles(t *testing.T) { + content := `diff --git a/new.go b/new.go +new file mode 100644 +index 0000000..abc1234 +--- /dev/null ++++ b/new.go +@@ -0,0 +1,1 @@ ++new content + +diff --git a/deleted.go b/deleted.go +deleted file mode 100644 +index jkl..0000000 +--- a/deleted.go ++++ /dev/null +@@ -1,1 +0,0 @@ +-deleted content +` + + r, err := NewMultiFileStdinReader(content) + if err != nil { + t.Fatalf("NewMultiFileStdinReader() error = %v", err) + } + + files, err := r.ChangedFiles("", false) + if err != nil { + t.Fatalf("ChangedFiles() error = %v", err) + } + if len(files) != 2 { + t.Errorf("ChangedFiles() returned %d files, want 2", len(files)) + } + + // check status + if files[0].Status != FileAdded { + t.Errorf("files[0].Status = %v, want FileAdded", files[0].Status) + } + if files[1].Status != FileDeleted { + t.Errorf("files[1].Status = %v, want FileDeleted", files[1].Status) + } +} + +func TestMultiFileStdinReader_RenamedFile(t *testing.T) { + content := `diff --git a/old.go b/new.go +similarity index 100% +rename from old.go +rename to new.go +` + + r, err := NewMultiFileStdinReader(content) + if err != nil { + t.Fatalf("NewMultiFileStdinReader() error = %v", err) + } + + files, err := r.ChangedFiles("", false) + if err != nil { + t.Fatalf("ChangedFiles() error = %v", err) + } + if len(files) != 1 { + t.Errorf("ChangedFiles() returned %d files, want 1", len(files)) + } + + // check that renamed file uses new name + if files[0].Path != "new.go" { + t.Errorf("files[0].Path = %q, want %q", files[0].Path, "new.go") + } + if files[0].Status != FileRenamed { + t.Errorf("files[0].Status = %v, want FileRenamed", files[0].Status) + } +} + +func TestMultiFileStdinReader_EmptyInput(t *testing.T) { + _, err := NewMultiFileStdinReader("") + if err == nil { + t.Error("NewMultiFileStdinReader(\"\") should return error") + } +} + +func TestMultiFileStdinReader_PreservesOrder(t *testing.T) { + content := `diff --git a/z.go b/z.go +index abc..def +--- a/z.go ++++ b/z.go +@@ -1,1 +1,1 @@ +-old ++new + +diff --git a/a.go b/a.go +index abc..def +--- a/a.go ++++ b/a.go +@@ -1,1 +1,1 @@ +-old ++new + +diff --git a/m.go b/m.go +index abc..def +--- a/m.go ++++ b/m.go +@@ -1,1 +1,1 @@ +-old ++new +` + + r, err := NewMultiFileStdinReader(content) + if err != nil { + t.Fatalf("NewMultiFileStdinReader() error = %v", err) + } + + files, err := r.ChangedFiles("", false) + if err != nil { + t.Fatalf("ChangedFiles() error = %v", err) + } + + // files should be in diff order, not alphabetical + if len(files) != 3 { + t.Fatalf("ChangedFiles() returned %d files, want 3", len(files)) + } + if files[0].Path != "z.go" { + t.Errorf("files[0].Path = %q, want %q", files[0].Path, "z.go") + } + if files[1].Path != "a.go" { + t.Errorf("files[1].Path = %q, want %q", files[1].Path, "a.go") + } + if files[2].Path != "m.go" { + t.Errorf("files[2].Path = %q, want %q", files[2].Path, "m.go") + } +} + +func TestNewStdinReaderFromString(t *testing.T) { + content := "line1\nline2\nline3\n" + name := "test.txt" + + r, err := NewStdinReaderFromString(name, content) + if err != nil { + t.Fatalf("NewStdinReaderFromString() error = %v", err) + } + + files, err := r.ChangedFiles("", false) + if err != nil { + t.Fatalf("ChangedFiles() error = %v", err) + } + if len(files) != 1 { + t.Errorf("ChangedFiles() returned %d files, want 1", len(files)) + } + if files[0].Path != name { + t.Errorf("files[0].Path = %q, want %q", files[0].Path, name) + } + + lines, err := r.FileDiff("", name, false, 0) + if err != nil { + t.Fatalf("FileDiff() error = %v", err) + } + if len(lines) == 0 { + t.Error("FileDiff() returned no lines") + } +} diff --git a/app/stdin.go b/app/stdin.go index 59383d75..f2aba189 100644 --- a/app/stdin.go +++ b/app/stdin.go @@ -3,6 +3,7 @@ package main import ( "errors" "fmt" + "io" "os" "github.com/umputun/revdiff/app/diff" @@ -80,11 +81,27 @@ func prepareStdinMode(opts options, stdin *os.File) (ui.Renderer, *os.File, erro return nil, nil, err } - renderer, err := diff.NewStdinReaderFromReader(stdinName(opts.StdinName), stdin) + // read all stdin into memory for detection + content, err := io.ReadAll(stdin) if err != nil { return nil, nil, fmt.Errorf("read stdin: %w", err) } + var renderer ui.Renderer + if diff.IsUnifiedDiff(string(content)) { + // try multi-file diff parsing; nil renderer falls through to context mode below + if multi, mErr := diff.NewMultiFileStdinReader(string(content)); mErr == nil { + renderer = multi + } + } + if renderer == nil { + // raw text, or fallback after a multi-file parse failure + renderer, err = diff.NewStdinReaderFromString(stdinName(opts.StdinName), string(content)) + if err != nil { + return nil, nil, fmt.Errorf("create stdin reader: %w", err) + } + } + tty, err := openTTY() if err != nil { return nil, nil, err From 3961ee0784dba34c32836a62bca1091f62de4432 Mon Sep 17 00:00:00 2001 From: Denis Blokhin Date: Thu, 28 May 2026 09:34:52 +0300 Subject: [PATCH 2/3] fix(stdin): harden multi-file diff detection and size-cap stdin Address PR #216 review: - fail NewMultiFileStdinReader on any per-section parse error so the caller falls back to raw-text mode for the whole input instead of silently dropping files behind an invisible stderr warning - cap stdin reads at 64 MiB via io.LimitReader and convert the buffer to a string once instead of three times - anchor the unified-diff sniffer to line-start (matching "diff --git a/" at column 0) and drop the loose "@@ -" substring fallback so markdown prose mentioning the marker is not reclassified - expose ErrNotUnifiedDiff sentinel for silent fallback; log non-sentinel parse failures via log.Printf so they survive the alt-screen - split MultiFileStdinReader into its own file, drop the redundant NewStdinReaderFromString constructor, unexport the sniffer - docs updated: README, site/docs.html, usage reference, all three SKILL.md copies (with gh pr diff / git format-patch guidance), ARCHITECTURE renderer table --- .claude-plugin/skills/revdiff/SKILL.md | 19 +++ .../skills/revdiff/references/usage.md | 10 +- README.md | 10 +- app/diff/multiparse.go | 26 ++-- app/diff/multiparse_test.go | 29 +++- app/diff/multistdin.go | 89 ++++++++++++ app/diff/stdin.go | 85 ----------- app/diff/stdin_test.go | 135 ++++++++++++++---- app/stdin.go | 73 +++++++--- app/stdin_test.go | 89 ++++++++++++ docs/ARCHITECTURE.md | 3 +- plugins/codex/skills/revdiff/SKILL.md | 19 +++ plugins/pi/skills/revdiff/SKILL.md | 3 + site/docs.html | 10 +- 14 files changed, 440 insertions(+), 160 deletions(-) create mode 100644 app/diff/multistdin.go diff --git a/.claude-plugin/skills/revdiff/SKILL.md b/.claude-plugin/skills/revdiff/SKILL.md index 953af735..314f7f5f 100644 --- a/.claude-plugin/skills/revdiff/SKILL.md +++ b/.claude-plugin/skills/revdiff/SKILL.md @@ -42,6 +42,25 @@ The script resolves the history dir from `$REVDIFF_HISTORY_DIR` (default `~/.con When the user asks to open an in-session review in revdiff (the conversation already contains review comments produced earlier in the session), write those comments to a temp file (e.g. `/tmp/revdiff-review-XXXXXX.md`) using the format documented in `references/usage.md` ("Output Format" section), then run the normal launcher flow (Step 1 ref detection, Step 2 invocation) with `--annotations=` appended. Step 3 onward handles the curated annotations as usual. +## Reviewing a Diff That Lives Outside the Working Tree + +Some review targets are not the current repo state: a GitHub PR diff, a patch file on disk, or `git format-patch -1 --stdout` output. Pipe the unified diff into `revdiff --stdin` and the input is parsed as a real multi-file diff (one tree entry per file, hunk navigation, per-file annotations) instead of a context-only buffer. revdiff auto-detects the unified-diff signature; on a malformed patch the input falls back silently to raw-text mode. + +Use this instead of the normal launcher flow when: +- the user asks to "review PR #N", "review this patch", "review `gh pr diff` output", or supplies a patch URL/path +- the diff describes commits that are not checked out locally (e.g. someone else's branch on a remote-only PR) +- the user pastes a unified diff and asks for a review of *that diff*, not the working tree + +Example invocations (route through the same launcher resolver as the normal flow): + +```bash +gh pr diff 123 | "$("${CLAUDE_SKILL_DIR}/scripts/resolve-launcher.sh" launch-revdiff.sh "${CLAUDE_PLUGIN_DATA}")" --stdin +git format-patch -1 --stdout | "$("${CLAUDE_SKILL_DIR}/scripts/resolve-launcher.sh" launch-revdiff.sh "${CLAUDE_PLUGIN_DATA}")" --stdin +cat /tmp/feature.patch | "$("${CLAUDE_SKILL_DIR}/scripts/resolve-launcher.sh" launch-revdiff.sh "${CLAUDE_PLUGIN_DATA}")" --stdin +``` + +`--stdin` is mutually exclusive with refs, `--staged`, `--only`, `--all-files`, `--include`, `--exclude`, and `--annotations`, so do not combine with the Step 1 ref detection — go directly to Step 3 once the launcher returns. Annotations come back keyed by the real file paths from the diff (not by `--stdin-name`). + ## How It Works 1. Launch revdiff in a terminal overlay (tmux popup, Zellij floating pane, kitty overlay, wezterm/Kaku split-pane, cmux split, ghostty split+zoom, iTerm2 split pane, or Emacs vterm frame) diff --git a/.claude-plugin/skills/revdiff/references/usage.md b/.claude-plugin/skills/revdiff/references/usage.md index 85464fd1..fe056ed2 100644 --- a/.claude-plugin/skills/revdiff/references/usage.md +++ b/.claude-plugin/skills/revdiff/references/usage.md @@ -99,11 +99,17 @@ revdiff --compare-old=a.txt --compare-new=b.txt ## Scratch-Buffer Review -Use `--stdin` to review arbitrary piped or redirected text as one synthetic file. All lines are treated as context, so single-file mode, inline annotations, file-level notes, search, wrap, collapsed mode, and structured output all work unchanged. +Use `--stdin` to review arbitrary piped or redirected text. revdiff sniffs the input for a git unified-diff signature: when a line beginning with `diff --git a/` is found near the start, the input is parsed as a real multi-file diff (one tree entry per file, with `+`/`-` markers, hunk navigation, word-diff, compact mode, per-file annotations). Otherwise the input is shown as a single context-only buffer — single-file mode, inline annotations, file-level notes, search, wrap, collapsed mode, and structured output all work unchanged. - `--stdin` is explicit and requires piped or redirected input -- `--stdin-name` sets the synthetic filename used in annotations and syntax highlighting +- `--stdin-name` sets the synthetic filename used by the context-only buffer (ignored in multi-file diff mode, where real paths are shown) - `--stdin` conflicts with refs, `--staged`, `--only`, `--all-files`, `--include`, and `--exclude` +- Any per-section parse failure falls the whole input back to raw-text mode so a malformed patch never silently drops files +- Input is capped at 64 MiB + +Examples piping a real diff: +- `gh pr diff 123 | revdiff --stdin` — review a GitHub PR end-to-end +- `git format-patch -1 --stdout | revdiff --stdin` — review the latest commit as a multi-file diff ## Key Bindings diff --git a/README.md b/README.md index b4d07e57..5dc41472 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ Built for a specific use case: reviewing code changes, plans, and documents with - Markdown TOC navigation: single-file markdown files in context-only mode show a table-of-contents pane with header navigation and active section tracking - All-files mode: browse and annotate all tracked files with `--all-files` (git `ls-files` or jj `file list`), filter with `--include` and `--exclude` - No-VCS file review: `--only` files outside a VCS repo (or not in any diff) are shown as context-only with full annotation support -- Scratch-buffer review: annotate arbitrary piped or redirected text with `--stdin`, optionally naming it with `--stdin-name` +- Scratch-buffer review: annotate arbitrary piped or redirected text with `--stdin`, optionally naming it with `--stdin-name`. When the piped content sniffs as a git unified diff, revdiff parses it as a real multi-file diff (review `gh pr diff` or `git format-patch -1 --stdout` output directly); otherwise the input is shown as a single context-only buffer. - Pi package: launch revdiff from pi, capture annotations, and send them to the agent immediately for the normal review loop - Review history: auto-saves annotations and diffs to `~/.config/revdiff/history/` on quit as a safety net - Fully customizable colors via environment variables, CLI flags, or config file @@ -589,16 +589,20 @@ revdiff --compare-old=a.txt --compare-new=b.txt ### Scratch-Buffer Review -Use `--stdin` to review arbitrary piped or redirected text as a single synthetic file. All lines are shown as context, so the normal single-file review flow still works: annotations, file-level notes, search, wrap, collapsed mode, and structured output. +Use `--stdin` to review arbitrary piped or redirected text. revdiff sniffs the input for a git unified-diff signature: when a line beginning with `diff --git a/` is found near the start, the input is parsed as a real multi-file diff (one tree entry per file, with `+`/`-` markers, hunk navigation, word-diff, compact mode, and per-file annotations); otherwise the input is shown as a single context-only buffer with all lines as context, supporting annotations, file-level notes, search, wrap, collapsed mode, and structured output. Any per-section parse failure falls the whole input back to raw-text mode so a malformed patch never silently drops files. Input is capped at 64 MiB. `--stdin` is explicit and mutually exclusive with refs, `--staged`, `--only`, `--all-files`, `--include`, `--exclude`, and `--annotations`. stdin mode requires piped or redirected input; plain terminal stdin is rejected to avoid accidentally launching an empty scratch buffer. -Use `--stdin-name` to control the synthetic filename. This gives annotation output a stable key and enables filename-based syntax highlighting or markdown TOC activation: +Use `--stdin-name` to control the synthetic filename for the context-only case (it is ignored in multi-file diff mode, where the tree shows the real paths). This gives annotation output a stable key and enables filename-based syntax highlighting or markdown TOC activation: ```bash echo "plain text" | revdiff --stdin printf '# Plan\n\nBody\n' | revdiff --stdin --stdin-name plan.md git show HEAD~1:README.md | revdiff --stdin --stdin-name README.md + +# multi-file diff parsing — tree shows real paths, per-file annotations +gh pr diff 123 | revdiff --stdin +git format-patch -1 --stdout | revdiff --stdin ``` ### Review Description diff --git a/app/diff/multiparse.go b/app/diff/multiparse.go index 1e33b9d9..72b7824a 100644 --- a/app/diff/multiparse.go +++ b/app/diff/multiparse.go @@ -13,14 +13,17 @@ type rawFileSection struct { diffText string // full section text to pass to parseUnifiedDiff } -// unifiedDiffSniffLimit caps how many leading bytes IsUnifiedDiff inspects. +// unifiedDiffSniffLimit caps how many leading bytes isUnifiedDiff inspects. const unifiedDiffSniffLimit = 4096 -// IsUnifiedDiff reports whether the content looks like a git unified diff. -// Detection criteria: -// - starts with "diff --git a/" -// - has "@@ -" hunk header pattern within the sniff limit -func IsUnifiedDiff(content string) bool { +// isUnifiedDiff reports whether the content looks like a git unified diff. +// A line in the leading sniff window must START with "diff --git a/" (or the +// quoted form "diff --git \"a/" used by git for paths containing spaces). The +// previous substring check falsely classified prose that merely mentioned the +// marker (e.g. a markdown file documenting diff output). No "@@ -" fallback: +// revdiff only knows how to split sections by "diff --git" boundaries, so +// hunk-only input would mis-render anyway. +func isUnifiedDiff(content string) bool { if content == "" { return false } @@ -28,13 +31,12 @@ func IsUnifiedDiff(content string) bool { // inspect only the leading bytes for efficiency sample := content[:min(unifiedDiffSniffLimit, len(content))] - // primary: git diff header - if strings.Contains(sample, "diff --git a/") { - return true + for line := range strings.SplitSeq(sample, "\n") { + if strings.HasPrefix(line, "diff --git a/") || strings.HasPrefix(line, `diff --git "a/`) { + return true + } } - - // secondary: hunk header pattern - return strings.Contains(sample, "@@ -") + return false } // diffGitHeaderRe matches "diff --git a/path b/path", including quoted paths with spaces. diff --git a/app/diff/multiparse_test.go b/app/diff/multiparse_test.go index 77b8bb98..dbf99cd5 100644 --- a/app/diff/multiparse_test.go +++ b/app/diff/multiparse_test.go @@ -26,21 +26,36 @@ func TestIsUnifiedDiff(t *testing.T) { want: true, }, { - name: "hunk header", - content: "--- a/file.go\n+++ b/file.go\n@@ -1,3 +1,4 @@\n", + name: "format-patch with leading mail headers", + content: "From abc Mon Sep 17\nFrom: A\nSubject: foo\n\ndiff --git a/file.go b/file.go\n@@ -1,1 +1,1 @@\n", want: true, }, { - name: "diff-like text in code", - content: "func TestDiff() {\n // check @@ format\n s := \"diff --git\"\n}\n", - want: false, // Should not detect as diff (no "diff --git a/" marker) + name: "hunk header only (no diff --git)", + content: "--- a/file.go\n+++ b/file.go\n@@ -1,3 +1,4 @@\n", + want: false, // no diff --git boundary, splitter can't section it + }, + { + name: "diff-like text in code, no line-start marker", + content: "func TestDiff() {\n // check @@ format\n s := \"diff --git a/foo b/foo\"\n}\n", + want: false, // marker is mid-line inside a quoted string + }, + { + name: "marker mentioned in prose, not line-anchored", + content: "The header is `diff --git a/x b/x` and separates files.\n", + want: false, + }, + { + name: "marker only at line start but after blank line", + content: "\n\ndiff --git a/foo b/foo\n@@ -1,1 +1,1 @@\n", + want: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - if got := IsUnifiedDiff(tt.content); got != tt.want { - t.Errorf("IsUnifiedDiff() = %v, want %v", got, tt.want) + if got := isUnifiedDiff(tt.content); got != tt.want { + t.Errorf("isUnifiedDiff() = %v, want %v", got, tt.want) } }) } diff --git a/app/diff/multistdin.go b/app/diff/multistdin.go new file mode 100644 index 00000000..d14966bf --- /dev/null +++ b/app/diff/multistdin.go @@ -0,0 +1,89 @@ +package diff + +import ( + "errors" + "fmt" +) + +// ErrNotUnifiedDiff is returned by NewMultiFileStdinReader when the input does +// not look like a git unified diff. Callers use errors.Is to silently fall +// back to the raw-text StdinReader without logging — the sniff failing is the +// expected path for plain text input, not a parse error worth surfacing. +var ErrNotUnifiedDiff = errors.New("input is not a unified diff") + +// MultiFileStdinReader implements Renderer for multi-file unified diffs from stdin. +type MultiFileStdinReader struct { + sections map[string]parsedSection // path -> parsed diff lines + order []string // preserve file order from diff +} + +// parsedSection holds parsed diff lines and status for one file. +type parsedSection struct { + lines []DiffLine + status FileStatus +} + +// NewMultiFileStdinReader parses multi-file unified diff content. The sniff +// is internal: when content does not look like a unified diff the call +// returns ErrNotUnifiedDiff so the caller can silently route to the raw-text +// StdinReader. Any per-section parse failure fails the whole call — partial +// success would leak a tree where one file's hunks are silently dropped, +// which is a review-integrity hazard. +func NewMultiFileStdinReader(content string) (*MultiFileStdinReader, error) { + if !isUnifiedDiff(content) { + return nil, ErrNotUnifiedDiff + } + + sections, err := splitMultiFileDiff(content) + if err != nil { + return nil, fmt.Errorf("split multi-file diff: %w", err) + } + + r := &MultiFileStdinReader{ + sections: make(map[string]parsedSection, len(sections)), + order: make([]string, 0, len(sections)), + } + + for _, sec := range sections { + lines, parseErr := parseUnifiedDiff(sec.diffText, 0) + if parseErr != nil { + // fail the whole reader so the caller falls back to raw-text mode + // for the entire input rather than silently dropping one file's hunks. + return nil, fmt.Errorf("parse section %q: %w", sec.path, parseErr) + } + r.sections[sec.path] = parsedSection{ + lines: lines, + status: sec.status, + } + r.order = append(r.order, sec.path) + } + + if len(r.sections) == 0 { + return nil, errors.New("no valid file sections parsed") + } + + return r, nil +} + +// ChangedFiles returns file entries in original diff order. +func (r *MultiFileStdinReader) ChangedFiles(_ string, _ bool) ([]FileEntry, error) { + entries := make([]FileEntry, 0, len(r.order)) + for _, path := range r.order { + sec := r.sections[path] + entries = append(entries, FileEntry{ + Path: path, + Status: sec.status, + }) + } + return entries, nil +} + +// FileDiff returns pre-parsed diff lines for the requested file. +// contextLines is ignored — sections are pre-parsed from the original diff. +func (r *MultiFileStdinReader) FileDiff(_, file string, _ bool, _ int) ([]DiffLine, error) { + sec, ok := r.sections[file] + if !ok { + return nil, nil + } + return sec.lines, nil +} diff --git a/app/diff/stdin.go b/app/diff/stdin.go index 0f6176a4..fa071252 100644 --- a/app/diff/stdin.go +++ b/app/diff/stdin.go @@ -1,11 +1,7 @@ package diff import ( - "errors" - "fmt" "io" - "os" - "strings" ) // StdinReader is an in-memory renderer for scratch-buffer review mode. @@ -28,16 +24,6 @@ func NewStdinReaderFromReader(name string, r io.Reader) (*StdinReader, error) { return NewStdinReader(name, lines), nil } -// NewStdinReaderFromString creates a StdinReader from string content. -// Used when content has already been read for multi-file detection. -func NewStdinReaderFromString(name, content string) (*StdinReader, error) { - lines, err := readReaderAsContext(strings.NewReader(content)) - if err != nil { - return nil, err - } - return NewStdinReader(name, lines), nil -} - // ChangedFiles returns the single synthetic filename. func (r *StdinReader) ChangedFiles(_ string, _ bool) ([]FileEntry, error) { return []FileEntry{{Path: r.name}}, nil @@ -51,74 +37,3 @@ func (r *StdinReader) FileDiff(_, file string, _ bool, _ int) ([]DiffLine, error } return r.lines, nil } - -// MultiFileStdinReader implements Renderer for multi-file unified diffs from stdin. -type MultiFileStdinReader struct { - sections map[string]parsedSection // path -> parsed diff lines - order []string // preserve file order from diff -} - -// parsedSection holds parsed diff lines and status for one file. -type parsedSection struct { - lines []DiffLine - status FileStatus -} - -// NewMultiFileStdinReader parses multi-file unified diff content. -// Returns (*MultiFileStdinReader, nil) on success. -// Falls back to StdinReader via the caller if detection fails. -func NewMultiFileStdinReader(content string) (*MultiFileStdinReader, error) { - sections, err := splitMultiFileDiff(content) - if err != nil { - return nil, fmt.Errorf("split multi-file diff: %w", err) - } - - r := &MultiFileStdinReader{ - sections: make(map[string]parsedSection, len(sections)), - order: make([]string, 0, len(sections)), - } - - for _, sec := range sections { - // reuse existing parseUnifiedDiff for each file section - lines, parseErr := parseUnifiedDiff(sec.diffText, 0) - if parseErr != nil { - // warn, skip this section, continue with the others - fmt.Fprintf(os.Stderr, "warning: failed to parse section %s: %v\n", sec.path, parseErr) - continue - } - r.sections[sec.path] = parsedSection{ - lines: lines, - status: sec.status, - } - r.order = append(r.order, sec.path) - } - - if len(r.sections) == 0 { - return nil, errors.New("no valid file sections parsed") - } - - return r, nil -} - -// ChangedFiles returns file entries in original diff order. -func (r *MultiFileStdinReader) ChangedFiles(_ string, _ bool) ([]FileEntry, error) { - entries := make([]FileEntry, 0, len(r.order)) - for _, path := range r.order { - sec := r.sections[path] - entries = append(entries, FileEntry{ - Path: path, - Status: sec.status, - }) - } - return entries, nil -} - -// FileDiff returns pre-parsed diff lines for the requested file. -// contextLines is ignored — sections are pre-parsed from the original diff. -func (r *MultiFileStdinReader) FileDiff(_, file string, _ bool, _ int) ([]DiffLine, error) { - sec, ok := r.sections[file] - if !ok { - return nil, nil - } - return sec.lines, nil -} diff --git a/app/diff/stdin_test.go b/app/diff/stdin_test.go index 643821b0..1b13dfa1 100644 --- a/app/diff/stdin_test.go +++ b/app/diff/stdin_test.go @@ -1,6 +1,8 @@ package diff import ( + "errors" + "strings" "testing" ) @@ -182,8 +184,111 @@ rename to new.go func TestMultiFileStdinReader_EmptyInput(t *testing.T) { _, err := NewMultiFileStdinReader("") + if !errors.Is(err, ErrNotUnifiedDiff) { + t.Errorf("NewMultiFileStdinReader(\"\") error = %v, want ErrNotUnifiedDiff", err) + } +} + +func TestMultiFileStdinReader_NotUnifiedDiffSentinel(t *testing.T) { + // plain text returns the sentinel so the caller can silently fall back + _, err := NewMultiFileStdinReader("just some plain text\nnothing diff-like here\n") + if !errors.Is(err, ErrNotUnifiedDiff) { + t.Errorf("plain text NewMultiFileStdinReader error = %v, want ErrNotUnifiedDiff", err) + } +} + +func TestMultiFileStdinReader_ProseMentioningMarker(t *testing.T) { + // the marker is referenced inside a sentence (not at line start) — must NOT sniff true + content := "Documentation: the header `diff --git a/foo b/foo` separates file sections.\n" + + "It is followed by `@@ -1,1 +1,1 @@` and the hunk body.\n" + _, err := NewMultiFileStdinReader(content) + if !errors.Is(err, ErrNotUnifiedDiff) { + t.Errorf("prose mention NewMultiFileStdinReader error = %v, want ErrNotUnifiedDiff", err) + } +} + +func TestMultiFileStdinReader_HunkOnlyNoDiffGit(t *testing.T) { + // hunk header without a "diff --git" boundary cannot be sectioned; reject sniff + content := "--- a/file.go\n+++ b/file.go\n@@ -1,1 +1,1 @@\n-old\n+new\n" + _, err := NewMultiFileStdinReader(content) + if !errors.Is(err, ErrNotUnifiedDiff) { + t.Errorf("hunk-only NewMultiFileStdinReader error = %v, want ErrNotUnifiedDiff", err) + } +} + +func TestMultiFileStdinReader_MalformedHunkHeaderFails(t *testing.T) { + // hunk header start exceeds int64 range — matches the regex but Atoi fails, + // triggering parseUnifiedDiff's only practical error path. The whole call + // must fail so the caller falls back to raw-text mode rather than silently + // dropping the bad section. + content := `diff --git a/bad.go b/bad.go +index abc..def +--- a/bad.go ++++ b/bad.go +@@ -99999999999999999999999,1 +1,1 @@ +-old ++new +` + _, err := NewMultiFileStdinReader(content) if err == nil { - t.Error("NewMultiFileStdinReader(\"\") should return error") + t.Fatal("malformed hunk header NewMultiFileStdinReader should return error") + } + if errors.Is(err, ErrNotUnifiedDiff) { + t.Errorf("malformed hunk header should NOT return ErrNotUnifiedDiff, got %v", err) + } +} + +func TestMultiFileStdinReader_PartialFailureFailsWhole(t *testing.T) { + // first section parses cleanly, second section has a malformed hunk; + // reader must fail so the caller falls back rather than rendering a + // tree with one file silently dropped. + content := `diff --git a/good.go b/good.go +index abc..def +--- a/good.go ++++ b/good.go +@@ -1,1 +1,1 @@ +-old ++new + +diff --git a/bad.go b/bad.go +index def..ghi +--- a/bad.go ++++ b/bad.go +@@ -99999999999999999999999,1 +1,1 @@ +-old ++new +` + _, err := NewMultiFileStdinReader(content) + if err == nil { + t.Fatal("partial failure NewMultiFileStdinReader should return error") + } + if !strings.Contains(err.Error(), "bad.go") { + t.Errorf("error %q should reference the failing section path", err) + } +} + +func TestMultiFileStdinReader_RenameTargetWithSpaces(t *testing.T) { + content := `diff --git "a/old name.go" "b/new name.go" +similarity index 100% +rename from old name.go +rename to new name.go +` + r, err := NewMultiFileStdinReader(content) + if err != nil { + t.Fatalf("NewMultiFileStdinReader() error = %v", err) + } + files, err := r.ChangedFiles("", false) + if err != nil { + t.Fatalf("ChangedFiles() error = %v", err) + } + if len(files) != 1 { + t.Fatalf("ChangedFiles() returned %d files, want 1", len(files)) + } + if files[0].Path != "new name.go" { + t.Errorf("files[0].Path = %q, want %q", files[0].Path, "new name.go") + } + if files[0].Status != FileRenamed { + t.Errorf("files[0].Status = %v, want FileRenamed", files[0].Status) } } @@ -238,31 +343,3 @@ index abc..def } } -func TestNewStdinReaderFromString(t *testing.T) { - content := "line1\nline2\nline3\n" - name := "test.txt" - - r, err := NewStdinReaderFromString(name, content) - if err != nil { - t.Fatalf("NewStdinReaderFromString() error = %v", err) - } - - files, err := r.ChangedFiles("", false) - if err != nil { - t.Fatalf("ChangedFiles() error = %v", err) - } - if len(files) != 1 { - t.Errorf("ChangedFiles() returned %d files, want 1", len(files)) - } - if files[0].Path != name { - t.Errorf("files[0].Path = %q, want %q", files[0].Path, name) - } - - lines, err := r.FileDiff("", name, false, 0) - if err != nil { - t.Fatalf("FileDiff() error = %v", err) - } - if len(lines) == 0 { - t.Error("FileDiff() returned no lines") - } -} diff --git a/app/stdin.go b/app/stdin.go index f2aba189..069b71d6 100644 --- a/app/stdin.go +++ b/app/stdin.go @@ -4,7 +4,9 @@ import ( "errors" "fmt" "io" + "log" "os" + "strings" "github.com/umputun/revdiff/app/diff" "github.com/umputun/revdiff/app/ui" @@ -12,6 +14,15 @@ import ( const defaultScratchBufferName = "scratch-buffer" +// maxStdinSize bounds the in-memory buffer used for stdin diff detection and +// parsing. The pre-PR path streamed stdin straight into context lines and only +// the final []DiffLine survived; the multi-file path needs the full byte slice +// (plus its string view) resident for sniff+split+parse. The cap protects +// against unbounded reads like `cat /dev/zero | revdiff --stdin` while still +// leaving plenty of room for real-world multi-file patches (64 MiB easily +// covers an entire repository's worth of churn). +const maxStdinSize = 64 * 1024 * 1024 + type stdinStat interface { Stat() (os.FileInfo, error) } @@ -76,30 +87,56 @@ func openTTY() (*os.File, error) { return tty, nil } +// readStdinCapped reads up to maxStdinSize bytes from r and returns the +// content as a string. Reads one byte past the cap so callers can detect +// overflow without relying on possibly-stale stream length. +func readStdinCapped(r io.Reader) (string, error) { + data, err := io.ReadAll(io.LimitReader(r, maxStdinSize+1)) + if err != nil { + return "", fmt.Errorf("read stdin: %w", err) + } + if len(data) > maxStdinSize { + return "", fmt.Errorf("--stdin input exceeds %d-byte cap", maxStdinSize) + } + return string(data), nil +} + +// selectStdinRenderer picks the renderer for piped stdin content: the +// multi-file unified-diff reader when the content sniffs as a diff and parses +// cleanly, otherwise the raw-text StdinReader. Non-sentinel errors from the +// multi-file path are logged before fall-through so partial parse failures +// are visible to operators despite the alt-screen swallowing stderr. +func selectStdinRenderer(opts options, content string) (ui.Renderer, error) { + multi, mErr := diff.NewMultiFileStdinReader(content) + switch { + case mErr == nil: + return multi, nil + case errors.Is(mErr, diff.ErrNotUnifiedDiff): + // expected fall-through for plain text input + default: + log.Printf("[WARN] stdin: multi-file diff parse failed, falling back to raw text: %v", mErr) + } + + renderer, err := diff.NewStdinReaderFromReader(stdinName(opts.StdinName), strings.NewReader(content)) + if err != nil { + return nil, fmt.Errorf("create stdin reader: %w", err) + } + return renderer, nil +} + func prepareStdinMode(opts options, stdin *os.File) (ui.Renderer, *os.File, error) { if err := validateStdinInput(opts, stdin); err != nil { return nil, nil, err } - // read all stdin into memory for detection - content, err := io.ReadAll(stdin) + content, err := readStdinCapped(stdin) + if err != nil { + return nil, nil, err + } + + renderer, err := selectStdinRenderer(opts, content) if err != nil { - return nil, nil, fmt.Errorf("read stdin: %w", err) - } - - var renderer ui.Renderer - if diff.IsUnifiedDiff(string(content)) { - // try multi-file diff parsing; nil renderer falls through to context mode below - if multi, mErr := diff.NewMultiFileStdinReader(string(content)); mErr == nil { - renderer = multi - } - } - if renderer == nil { - // raw text, or fallback after a multi-file parse failure - renderer, err = diff.NewStdinReaderFromString(stdinName(opts.StdinName), string(content)) - if err != nil { - return nil, nil, fmt.Errorf("create stdin reader: %w", err) - } + return nil, nil, err } tty, err := openTTY() diff --git a/app/stdin_test.go b/app/stdin_test.go index 0c73acee..a50e2cbe 100644 --- a/app/stdin_test.go +++ b/app/stdin_test.go @@ -2,12 +2,16 @@ package main import ( "errors" + "io" "os" + "strings" "testing" "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + + "github.com/umputun/revdiff/app/diff" ) type fakeFileInfo struct { @@ -54,3 +58,88 @@ func TestValidateStdinInput(t *testing.T) { assert.Contains(t, err.Error(), "device gone") }) } + +func TestReadStdinCapped(t *testing.T) { + t.Run("under cap succeeds", func(t *testing.T) { + got, err := readStdinCapped(strings.NewReader("hello\n")) + require.NoError(t, err) + assert.Equal(t, "hello\n", got) + }) + + t.Run("over cap rejected with clear error", func(t *testing.T) { + // produce maxStdinSize+1 bytes without holding two copies + r := io.LimitReader(zeroReader{}, maxStdinSize+1) + _, err := readStdinCapped(r) + require.Error(t, err) + assert.Contains(t, err.Error(), "exceeds") + }) +} + +type zeroReader struct{} + +func (zeroReader) Read(p []byte) (int, error) { + for i := range p { + p[i] = 'x' + } + return len(p), nil +} + +func TestSelectStdinRenderer(t *testing.T) { + multiFile := `diff --git a/file1.go b/file1.go +index abc..def +--- a/file1.go ++++ b/file1.go +@@ -1,1 +1,2 @@ + line1 ++line2 + +diff --git a/file2.go b/file2.go +index ghi..jkl +--- a/file2.go ++++ b/file2.go +@@ -1,1 +1,1 @@ +-old ++new +` + + t.Run("multi-file diff selects MultiFileStdinReader", func(t *testing.T) { + r, err := selectStdinRenderer(options{Stdin: true}, multiFile) + require.NoError(t, err) + _, ok := r.(*diff.MultiFileStdinReader) + assert.True(t, ok, "expected *MultiFileStdinReader, got %T", r) + }) + + t.Run("plain text falls back to StdinReader silently", func(t *testing.T) { + r, err := selectStdinRenderer(options{Stdin: true, StdinName: "note.txt"}, "just plain text\nno diff markers\n") + require.NoError(t, err) + _, ok := r.(*diff.StdinReader) + assert.True(t, ok, "expected *StdinReader, got %T", r) + }) + + t.Run("sniff true but parse fails falls back to raw text", func(t *testing.T) { + // sniff matches "diff --git a/", parseUnifiedDiff fails on malformed hunk header. + // caller must still get a working raw-text renderer (locks in #1's fallback contract). + bad := `diff --git a/bad.go b/bad.go +index abc..def +--- a/bad.go ++++ b/bad.go +@@ -99999999999999999999999,1 +1,1 @@ +-old ++new +` + r, err := selectStdinRenderer(options{Stdin: true, StdinName: "bad.diff"}, bad) + require.NoError(t, err) + _, ok := r.(*diff.StdinReader) + assert.True(t, ok, "expected *StdinReader fallback, got %T", r) + }) + + t.Run("markdown with diff snippet inside prose stays raw text", func(t *testing.T) { + // the marker appears mid-line inside prose; sniffer must NOT classify this + // as a diff or surrounding prose would be dropped from rendering. + md := "# Title\n\nSome prose mentioning `diff --git a/x b/x` and `@@ -1,1 +1,1 @@` markers.\n\nMore prose.\n" + r, err := selectStdinRenderer(options{Stdin: true, StdinName: "doc.md"}, md) + require.NoError(t, err) + _, ok := r.(*diff.StdinReader) + assert.True(t, ok, "expected *StdinReader, got %T", r) + }) +} diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 8d004c9b..0ec40f23 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -383,7 +383,8 @@ Several mutually exclusive input sources, validated at parse time: | Staged changes | `--staged` | `Git` or `Hg` | Cannot combine with refs | | All tracked files | `--all-files` / `-A` | `DirectoryReader` | Git only, not with refs/staged/only | | Single file(s) | `--only` / `-F` | `FileReader` | Not with include | -| Stdin | `--stdin` | `StdinReader` | Reads before TUI, reopens `/dev/tty` | +| Stdin (raw text) | `--stdin` | `StdinReader` | Sniff fails or returns `ErrNotUnifiedDiff` | +| Stdin (multi-file diff) | `--stdin` | `MultiFileStdinReader` | Sniffs unified-diff signature, parses each section via `parseUnifiedDiff`; any per-section parse error fails the whole call so the caller falls back to `StdinReader` for the entire input | Filters stack: `--include` narrows first, then `--exclude` removes. Both wrap any renderer as decorators. diff --git a/plugins/codex/skills/revdiff/SKILL.md b/plugins/codex/skills/revdiff/SKILL.md index 2dcc6c2a..c8b8b525 100644 --- a/plugins/codex/skills/revdiff/SKILL.md +++ b/plugins/codex/skills/revdiff/SKILL.md @@ -57,6 +57,25 @@ The script resolves the history dir from `$REVDIFF_HISTORY_DIR` (default `~/.con When the user asks to open an in-session review in revdiff (the conversation already contains review comments produced earlier in the session), write those comments to a temp file (e.g. `/tmp/revdiff-review-XXXXXX.md`) using the format documented in `references/usage.md` ("Output Format" section), then run the normal launcher flow (Step 1 ref detection, Step 2 invocation) with `--annotations=` appended. Step 3 onward handles the curated annotations as usual. +## Reviewing a Diff That Lives Outside the Working Tree + +Some review targets are not the current repo state: a GitHub PR diff, a patch file on disk, or `git format-patch -1 --stdout` output. Pipe the unified diff into `revdiff --stdin` and the input is parsed as a real multi-file diff (one tree entry per file, hunk navigation, per-file annotations) instead of a context-only buffer. revdiff auto-detects the unified-diff signature; on a malformed patch the input falls back silently to raw-text mode. + +Use this instead of the normal launcher flow when: +- the user asks to "review PR #N", "review this patch", "review `gh pr diff` output", or supplies a patch URL/path +- the diff describes commits that are not checked out locally (e.g. someone else's branch on a remote-only PR) +- the user pastes a unified diff and asks for a review of *that diff*, not the working tree + +Example invocations (route through the same launcher as the normal flow): + +```bash +gh pr diff 123 | $SCRIPT_DIR/launch-revdiff.sh --stdin +git format-patch -1 --stdout | $SCRIPT_DIR/launch-revdiff.sh --stdin +cat /tmp/feature.patch | $SCRIPT_DIR/launch-revdiff.sh --stdin +``` + +`--stdin` is mutually exclusive with refs, `--staged`, `--only`, `--all-files`, `--include`, `--exclude`, and `--annotations`, so do not combine with the Step 1 ref detection — go directly to Step 3 once the launcher returns. Annotations come back keyed by the real file paths from the diff (not by `--stdin-name`). + ## How It Works 1. Launch revdiff in a terminal overlay (tmux popup, Zellij floating pane, kitty overlay, wezterm/Kaku split-pane, cmux split, ghostty split+zoom, iTerm2 split pane, or Emacs vterm frame) diff --git a/plugins/pi/skills/revdiff/SKILL.md b/plugins/pi/skills/revdiff/SKILL.md index 9cc22140..e129a0a2 100644 --- a/plugins/pi/skills/revdiff/SKILL.md +++ b/plugins/pi/skills/revdiff/SKILL.md @@ -36,6 +36,7 @@ Tool examples: - `args: "--description='why this refactor matters' main"`: include review context in the info popup - `args: "--description-file=/tmp/revdiff-desc.md main"`: include longer markdown review context - `args: "--annotations=/tmp/revdiff-review.md main"`: preload in-session review notes +- `args: "--stdin"` with a unified diff piped to stdin (e.g. `gh pr diff 123`, `git format-patch -1 --stdout`, a `.patch` file): review a multi-file diff that lives outside the working tree — revdiff parses it as a real diff (one tree entry per file, hunk navigation, per-file annotations) instead of a context-only buffer After `revdiff_review` returns annotations, address them directly from the tool result content. Do not read revdiff history after a successful captured-annotation result. Exit code `10` is success-with-annotations and is handled by the extension; do not report it as a failure. If it returns no annotations, report that no annotations were captured and stop. Do not relaunch revdiff after any no-annotation result unless the user explicitly asks for another review. @@ -75,6 +76,8 @@ When annotations arrive from `/revdiff` or `revdiff_review`: /revdiff HEAD~3 --description="why this refactor matters" /revdiff HEAD~3 --description-file=/tmp/revdiff-desc.md /revdiff main --annotations=/tmp/revdiff-review.md +gh pr diff 123 | revdiff --stdin +git format-patch -1 --stdout | revdiff --stdin ``` ## Recommended natural-language examples diff --git a/site/docs.html b/site/docs.html index d70086be..583da1ac 100644 --- a/site/docs.html +++ b/site/docs.html @@ -265,12 +265,16 @@

Two-file diff

--compare-old and --compare-new must be used together and are mutually exclusive with refs, --staged, --only, --all-files, --stdin, --include, --exclude, and --annotations. All standard features work: word-diff, compact mode, syntax highlighting, scrollbar, and inline annotations.

Scratch-buffer review

-

Use --stdin to review arbitrary piped or redirected text as a single synthetic file. All lines are treated as context, so the normal single-file review flow still works: annotations, file-level notes, search, wrap, collapsed mode, and structured output.

+

Use --stdin to review arbitrary piped or redirected text. revdiff sniffs the input for a git unified-diff signature: when a line beginning with diff --git a/ is found near the start, the input is parsed as a real multi-file diff with one tree entry per file, +/- markers, hunk navigation, word-diff, compact mode, and per-file annotations. Otherwise the input is shown as a single context-only buffer: all lines are treated as context, supporting annotations, file-level notes, search, wrap, collapsed mode, and structured output. Any per-section parse failure falls the whole input back to raw-text mode so a malformed patch never silently drops files. Input is capped at 64 MiB.

--stdin is explicit, requires piped or redirected input, and is mutually exclusive with refs, --staged, --only, --all-files, --include, --exclude, and --annotations.

-

Use --stdin-name to control the synthetic filename for annotation output and extension-based highlighting or markdown TOC activation.

+

Use --stdin-name to control the synthetic filename for the context-only case (it is ignored in multi-file diff mode, where the tree shows the real paths). The name gives annotation output a stable key and enables extension-based highlighting or markdown TOC activation.

echo "plain text" | revdiff --stdin printf '# Plan\n\nBody\n' | revdiff --stdin --stdin-name plan.md -git show HEAD~1:README.md | revdiff --stdin --stdin-name README.md
+git show HEAD~1:README.md | revdiff --stdin --stdin-name README.md + +# multi-file diff parsing — tree shows real paths, per-file annotations +gh pr diff 123 | revdiff --stdin +git format-patch -1 --stdout | revdiff --stdin

Review description

Use --description (or --description-file=path.md) to attach prose context to a review. The text is rendered at the top of the info popup (i key) using the same markdown-highlighting path as .md files in the diff view. Useful when an agent or a script launches revdiff on your behalf and you come back to it later — the popup tells you what the change is and why.

From 12d571e3795b23755226e9756864301469e4056a Mon Sep 17 00:00:00 2001 From: Denis Blokhin Date: Fri, 29 May 2026 06:32:12 +0300 Subject: [PATCH 3/3] fix(stdin): fail whole reader on empty-path or hunkless sections splitMultiFileDiff now errors when parseFileHeader yields no path, and NewMultiFileStdinReader rejects zero-line sections that carry no structural marker (rename, mode, binary, hunk). Both used to silently drop a section, hiding real stdin behind an empty tree entry. Also: dedupe duplicate paths in MultiFileStdinReader.order, route `rename to` through cleanPath, fix cleanPath double-stripping a top-level dir named a/ or b/, rename stdin_test.go to multistdin_test.go, correct selectStdinRenderer godoc rationale, revert paren-wrap on the single-import in app/diff/stdin.go, and document the 4 KiB sniff-window tradeoff for format-patch input. --- app/diff/multiparse.go | 72 ++++++-- app/diff/multiparse_test.go | 21 +++ app/diff/multistdin.go | 41 ++++- .../{stdin_test.go => multistdin_test.go} | 156 ++++++++++++++++++ app/diff/stdin.go | 4 +- app/stdin.go | 2 +- app/stdin_test.go | 33 ++++ 7 files changed, 309 insertions(+), 20 deletions(-) rename app/diff/{stdin_test.go => multistdin_test.go} (64%) diff --git a/app/diff/multiparse.go b/app/diff/multiparse.go index 72b7824a..36e2ca9e 100644 --- a/app/diff/multiparse.go +++ b/app/diff/multiparse.go @@ -2,6 +2,7 @@ package diff import ( "errors" + "fmt" "regexp" "strings" ) @@ -14,6 +15,12 @@ type rawFileSection struct { } // unifiedDiffSniffLimit caps how many leading bytes isUnifiedDiff inspects. +// 4 KiB comfortably covers `git diff` output (the boundary marker is the very +// first line) but can miss `git format-patch` output when a long commit body +// or large mail header pushes the first `diff --git` past this window — those +// inputs fall back to raw-text rendering. Raising the cap trades worst-case +// sniff cost for broader format-patch coverage; pick a larger value if that +// tradeoff changes. const unifiedDiffSniffLimit = 4096 // isUnifiedDiff reports whether the content looks like a git unified diff. @@ -23,6 +30,9 @@ const unifiedDiffSniffLimit = 4096 // marker (e.g. a markdown file documenting diff output). No "@@ -" fallback: // revdiff only knows how to split sections by "diff --git" boundaries, so // hunk-only input would mis-render anyway. +// +// Only the first unifiedDiffSniffLimit bytes are inspected — see the const +// comment for the format-patch caveat. func isUnifiedDiff(content string) bool { if content == "" { return false @@ -51,6 +61,11 @@ var diffGitHeaderRe = regexp.MustCompile(`^diff --git ("?a/[^"]*"?|"?a/.*?") ("? // each section includes the full diff header through to the next file boundary // (next "diff --git" or end of input). path and status are resolved once per // section by parseFileHeader — there is no separate inline header parse. +// +// A section whose header yields no parseable new-side path fails the whole +// call so the caller can fall back to raw-text mode. Silently skipping the +// section would let a single crafted "diff --git" line followed by prose +// drop real content from the rendering with no in-TUI signal. func splitMultiFileDiff(raw string) ([]rawFileSection, error) { if raw == "" { return nil, errors.New("empty input") @@ -60,24 +75,27 @@ func splitMultiFileDiff(raw string) ([]rawFileSection, error) { var current strings.Builder inSection := false - // flush appends the accumulated section text, resolving path/status from its header. - // sections without a parseable new-side path are skipped. - flush := func() { + // flush resolves path/status from the accumulated section text and appends + // it. An empty path is a hard failure: the caller falls back to raw-text mode. + flush := func() error { if !inSection { - return + return nil } text := current.String() path, status := parseFileHeader(text) if path == "" { - return + return fmt.Errorf("section %q has no parseable new-side path", firstLine(text)) } sections = append(sections, rawFileSection{path: path, status: status, diffText: text}) + return nil } for line := range strings.SplitSeq(raw, "\n") { if strings.HasPrefix(line, "diff --git ") { // new file boundary: flush the previous section and start a fresh one - flush() + if err := flush(); err != nil { + return nil, err + } current.Reset() inSection = true } @@ -86,7 +104,9 @@ func splitMultiFileDiff(raw string) ([]rawFileSection, error) { current.WriteString("\n") } } - flush() + if err := flush(); err != nil { + return nil, err + } if len(sections) == 0 { return nil, errors.New("no file sections found") @@ -95,13 +115,34 @@ func splitMultiFileDiff(raw string) ([]rawFileSection, error) { return sections, nil } +// firstLine returns the first line of s, used to identify a malformed section +// in error messages without dumping the whole diff text into the log. +func firstLine(s string) string { + first, _, _ := strings.Cut(s, "\n") + return first +} + // cleanPath strips a leading "a/" or "b/" prefix and surrounding quotes from a path. -// Handles both "b/path with spaces" (quotes outside prefix) and b/"path with spaces" -// (quotes inside prefix) by stripping the prefix, then the quotes, then the prefix again. +// Handles both git path forms: +// - "b/path with spaces" — quotes wrap the prefix; strip quotes first, then prefix +// - b/"path with spaces" — prefix wraps the quotes; strip prefix first, then quotes +// +// Strips the prefix exactly once so a legitimate top-level directory literally +// named "a" or "b" (e.g. "b/b/weird.go" representing repo path b/weird.go) +// resolves to "b/weird.go", not "weird.go". func cleanPath(path string) string { - path = strings.TrimPrefix(strings.TrimPrefix(path, "b/"), "a/") - path = strings.Trim(path, `"`) - return strings.TrimPrefix(strings.TrimPrefix(path, "b/"), "a/") + // outer-quotes form: "a/foo" or "b/foo" + if len(path) >= 2 && path[0] == '"' && path[len(path)-1] == '"' { + path = path[1 : len(path)-1] + } + switch { + case strings.HasPrefix(path, "a/"): + path = path[2:] + case strings.HasPrefix(path, "b/"): + path = path[2:] + } + // inner-quotes form: prefix already stripped, surviving quotes wrap the body + return strings.Trim(path, `"`) } // parseFileHeader extracts the new-side path and change status from a diff @@ -139,9 +180,10 @@ func parseFileHeader(section string) (path string, status FileStatus) { } } case strings.HasPrefix(line, "rename to "): - // rename target overrides the diff --git path - if parts := strings.Fields(line); len(parts) >= 3 { - path = strings.Join(parts[2:], " ") + // rename target overrides the diff --git path; route through cleanPath + // so quoted paths shed their quotes like every other branch does. + if rest := strings.TrimSpace(strings.TrimPrefix(line, "rename to ")); rest != "" { + path = cleanPath(rest) } case strings.HasPrefix(line, "+++ "): // fallback: derive the path from the +++ line when the header lacked one diff --git a/app/diff/multiparse_test.go b/app/diff/multiparse_test.go index dbf99cd5..ba97f6e8 100644 --- a/app/diff/multiparse_test.go +++ b/app/diff/multiparse_test.go @@ -1,6 +1,7 @@ package diff import ( + "strings" "testing" ) @@ -50,6 +51,11 @@ func TestIsUnifiedDiff(t *testing.T) { content: "\n\ndiff --git a/foo b/foo\n@@ -1,1 +1,1 @@\n", want: true, }, + { + name: "marker pushed past sniff window by long preamble", + content: strings.Repeat("preamble line that pads past the 4 KiB sniff cap\n", 100) + "diff --git a/file.go b/file.go\n", + want: false, // documented tradeoff: format-patch output with very long bodies misses the sniff + }, } for _, tt := range tests { @@ -248,11 +254,26 @@ func TestCleanPath(t *testing.T) { input: `b/"path with spaces/file.go"`, want: "path with spaces/file.go", }, + { + name: "quoted b/path with spaces", + input: `"b/path with spaces/file.go"`, + want: "path with spaces/file.go", + }, { name: "no prefix", input: "path/file.go", want: "path/file.go", }, + { + name: "top-level b dir preserved", + input: "b/b/weird.go", + want: "b/weird.go", + }, + { + name: "top-level a dir preserved", + input: "a/a/weird.go", + want: "a/weird.go", + }, } for _, tt := range tests { diff --git a/app/diff/multistdin.go b/app/diff/multistdin.go index d14966bf..055e9dc6 100644 --- a/app/diff/multistdin.go +++ b/app/diff/multistdin.go @@ -3,6 +3,7 @@ package diff import ( "errors" "fmt" + "strings" ) // ErrNotUnifiedDiff is returned by NewMultiFileStdinReader when the input does @@ -51,11 +52,22 @@ func NewMultiFileStdinReader(content string) (*MultiFileStdinReader, error) { // for the entire input rather than silently dropping one file's hunks. return nil, fmt.Errorf("parse section %q: %w", sec.path, parseErr) } + // A zero-line section is acceptable only when the header records a + // structural change (rename, mode flip, new/deleted empty file) — those + // surface in the file tree via status alone. A zero-line section with + // no structural marker means we matched a stray "diff --git" line in + // prose; failing here lets the caller fall back to raw text instead of + // hiding the real stdin behind an empty tree entry. + if len(lines) == 0 && !hasStructuralChange(sec.diffText) { + return nil, fmt.Errorf("section %q has no renderable content", sec.path) + } + if _, exists := r.sections[sec.path]; !exists { + r.order = append(r.order, sec.path) + } r.sections[sec.path] = parsedSection{ lines: lines, status: sec.status, } - r.order = append(r.order, sec.path) } if len(r.sections) == 0 { @@ -65,6 +77,33 @@ func NewMultiFileStdinReader(content string) (*MultiFileStdinReader, error) { return r, nil } +// hasStructuralChange reports whether section text carries a marker that +// renders meaningfully in the tree even without diff lines: a hunk header, a +// binary marker, a mode flip, or a rename/copy header. Used to keep +// rename-only / mode-only / new-empty / deleted-empty sections valid while +// rejecting bare "diff --git" lines surrounded by prose. +func hasStructuralChange(section string) bool { + for line := range strings.SplitSeq(section, "\n") { + switch { + case strings.HasPrefix(line, "@@ "): + return true + case strings.HasPrefix(line, "Binary files "): + return true + case strings.HasPrefix(line, "new file mode"), + strings.HasPrefix(line, "deleted file mode"), + strings.HasPrefix(line, "old mode "), + strings.HasPrefix(line, "new mode "): + return true + case strings.HasPrefix(line, "rename from"), + strings.HasPrefix(line, "rename to"), + strings.HasPrefix(line, "copy from"), + strings.HasPrefix(line, "copy to"): + return true + } + } + return false +} + // ChangedFiles returns file entries in original diff order. func (r *MultiFileStdinReader) ChangedFiles(_ string, _ bool) ([]FileEntry, error) { entries := make([]FileEntry, 0, len(r.order)) diff --git a/app/diff/stdin_test.go b/app/diff/multistdin_test.go similarity index 64% rename from app/diff/stdin_test.go rename to app/diff/multistdin_test.go index 1b13dfa1..815c3758 100644 --- a/app/diff/stdin_test.go +++ b/app/diff/multistdin_test.go @@ -292,6 +292,162 @@ rename to new name.go } } +func TestMultiFileStdinReader_RenameTargetQuoted(t *testing.T) { + // git's actual quoted format for paths with special chars; the rename-to + // line carries the quotes. The branch must route through cleanPath so the + // resolved tree entry has no surviving quote chars. + content := "diff --git \"a/old name.go\" \"b/new name.go\"\n" + + "similarity index 100%\n" + + "rename from \"old name.go\"\n" + + "rename to \"new name.go\"\n" + r, err := NewMultiFileStdinReader(content) + if err != nil { + t.Fatalf("NewMultiFileStdinReader() error = %v", err) + } + files, err := r.ChangedFiles("", false) + if err != nil { + t.Fatalf("ChangedFiles() error = %v", err) + } + if len(files) != 1 { + t.Fatalf("ChangedFiles() returned %d files, want 1", len(files)) + } + if files[0].Path != "new name.go" { + t.Errorf("files[0].Path = %q, want %q", files[0].Path, "new name.go") + } +} + +func TestMultiFileStdinReader_HunklessSectionFails(t *testing.T) { + // section is just "diff --git a/foo b/foo" with no body — no hunks, no + // structural change marker. Currently produced an empty tree entry, + // hiding real prose behind it; must now fail so the caller falls back. + content := "diff --git a/foo b/foo\nsome prose follows that the reader has no way to render\n" + _, err := NewMultiFileStdinReader(content) + if err == nil { + t.Fatal("hunkless section NewMultiFileStdinReader should return error") + } + if errors.Is(err, ErrNotUnifiedDiff) { + t.Errorf("hunkless section should NOT return ErrNotUnifiedDiff, got %v", err) + } +} + +func TestMultiFileStdinReader_ValidPlusHunklessFailsWhole(t *testing.T) { + // first section parses cleanly; second is a bare "diff --git" boundary + // with no body. Whole reader must fail rather than render a one-file tree + // with the second file silently dropped. + content := `diff --git a/good.go b/good.go +index abc..def +--- a/good.go ++++ b/good.go +@@ -1,1 +1,1 @@ +-old ++new + +diff --git a/empty b/empty +` + _, err := NewMultiFileStdinReader(content) + if err == nil { + t.Fatal("valid + hunkless NewMultiFileStdinReader should return error") + } +} + +func TestMultiFileStdinReader_EmptyPathSectionFailsWhole(t *testing.T) { + // first section parses cleanly; second `diff --git` line is malformed so + // parseFileHeader yields no path. The split layer must fail the whole + // call so the caller falls back to raw text instead of silently dropping + // the second section. + content := `diff --git a/good.go b/good.go +index abc..def +--- a/good.go ++++ b/good.go +@@ -1,1 +1,1 @@ +-old ++new + +diff --git malformed-no-prefix +` + _, err := NewMultiFileStdinReader(content) + if err == nil { + t.Fatal("empty-path section NewMultiFileStdinReader should return error") + } + if errors.Is(err, ErrNotUnifiedDiff) { + t.Errorf("empty-path section should NOT return ErrNotUnifiedDiff, got %v", err) + } +} + +func TestMultiFileStdinReader_NewEmptyFileSucceeds(t *testing.T) { + // new empty file: zero hunks but the `new file mode` marker means the + // section still renders meaningfully in the tree. Must NOT trip the + // hunkless-section guard. + content := `diff --git a/empty.txt b/empty.txt +new file mode 100644 +index 0000000..e69de29 +` + r, err := NewMultiFileStdinReader(content) + if err != nil { + t.Fatalf("new empty file NewMultiFileStdinReader error = %v", err) + } + files, err := r.ChangedFiles("", false) + if err != nil { + t.Fatalf("ChangedFiles() error = %v", err) + } + if len(files) != 1 || files[0].Path != "empty.txt" || files[0].Status != FileAdded { + t.Errorf("ChangedFiles() = %+v, want one FileAdded entry for empty.txt", files) + } +} + +func TestMultiFileStdinReader_ModeOnlyChangeSucceeds(t *testing.T) { + // mode-only change: zero hunks but `old mode` / `new mode` markers mean + // the section still renders meaningfully in the tree. + content := `diff --git a/script.sh b/script.sh +old mode 100644 +new mode 100755 +` + r, err := NewMultiFileStdinReader(content) + if err != nil { + t.Fatalf("mode-only NewMultiFileStdinReader error = %v", err) + } + files, err := r.ChangedFiles("", false) + if err != nil { + t.Fatalf("ChangedFiles() error = %v", err) + } + if len(files) != 1 || files[0].Path != "script.sh" { + t.Errorf("ChangedFiles() = %+v, want one entry for script.sh", files) + } +} + +func TestMultiFileStdinReader_DuplicatePathDeduped(t *testing.T) { + // crafted diff with two sections resolving to the same path. The tree + // must list the path once; without dedupe the user would see a duplicate + // row in the navigation pane. + content := `diff --git a/same.go b/same.go +index abc..def +--- a/same.go ++++ b/same.go +@@ -1,1 +1,1 @@ +-first ++first-changed + +diff --git a/same.go b/same.go +index def..ghi +--- a/same.go ++++ b/same.go +@@ -1,1 +1,1 @@ +-second ++second-changed +` + r, err := NewMultiFileStdinReader(content) + if err != nil { + t.Fatalf("duplicate path NewMultiFileStdinReader error = %v", err) + } + files, err := r.ChangedFiles("", false) + if err != nil { + t.Fatalf("ChangedFiles() error = %v", err) + } + if len(files) != 1 { + t.Errorf("ChangedFiles() returned %d entries, want 1 (deduped)", len(files)) + } +} + func TestMultiFileStdinReader_PreservesOrder(t *testing.T) { content := `diff --git a/z.go b/z.go index abc..def diff --git a/app/diff/stdin.go b/app/diff/stdin.go index fa071252..da212aa3 100644 --- a/app/diff/stdin.go +++ b/app/diff/stdin.go @@ -1,8 +1,6 @@ package diff -import ( - "io" -) +import "io" // StdinReader is an in-memory renderer for scratch-buffer review mode. type StdinReader struct { diff --git a/app/stdin.go b/app/stdin.go index 069b71d6..87f2f4d8 100644 --- a/app/stdin.go +++ b/app/stdin.go @@ -105,7 +105,7 @@ func readStdinCapped(r io.Reader) (string, error) { // multi-file unified-diff reader when the content sniffs as a diff and parses // cleanly, otherwise the raw-text StdinReader. Non-sentinel errors from the // multi-file path are logged before fall-through so partial parse failures -// are visible to operators despite the alt-screen swallowing stderr. +// are surfaced rather than silently routed to raw text. func selectStdinRenderer(opts options, content string) (ui.Renderer, error) { multi, mErr := diff.NewMultiFileStdinReader(content) switch { diff --git a/app/stdin_test.go b/app/stdin_test.go index a50e2cbe..e72d9f2b 100644 --- a/app/stdin_test.go +++ b/app/stdin_test.go @@ -142,4 +142,37 @@ index abc..def _, ok := r.(*diff.StdinReader) assert.True(t, ok, "expected *StdinReader, got %T", r) }) + + t.Run("valid + malformed section falls back to raw text", func(t *testing.T) { + // first section is a valid hunk, second is a bare `diff --git` boundary + // with no body. The whole reader must fail so the caller gets a raw-text + // StdinReader showing the full input, not a one-file tree silently + // dropping the second section. + content := "diff --git a/good.go b/good.go\n" + + "index abc..def\n" + + "--- a/good.go\n" + + "+++ b/good.go\n" + + "@@ -1,1 +1,1 @@\n" + + "-old\n" + + "+new\n" + + "\n" + + "diff --git a/orphan b/orphan\n" + r, err := selectStdinRenderer(options{Stdin: true, StdinName: "mixed.diff"}, content) + require.NoError(t, err) + _, ok := r.(*diff.StdinReader) + assert.True(t, ok, "expected *StdinReader fallback, got %T", r) + }) + + t.Run("line-start diff --git with no hunk falls back to raw text", func(t *testing.T) { + // document starts with what looks like a diff header but has no hunks + // or structural markers. Without the hunkless-section guard the tree + // would silently render an empty file entry, hiding the real text. + content := "diff --git a/note b/note\n" + + "this is actually free-form notes that just happen to start with\n" + + "a line that looks like a diff boundary; there are no hunks here.\n" + r, err := selectStdinRenderer(options{Stdin: true, StdinName: "note.txt"}, content) + require.NoError(t, err) + _, ok := r.(*diff.StdinReader) + assert.True(t, ok, "expected *StdinReader fallback, got %T", r) + }) }