From 2c988fab54cadec8281786dec63c54d74bb159b9 Mon Sep 17 00:00:00 2001 From: Oliver Bone <owbone@github.com> Date: Fri, 21 Oct 2022 14:18:01 +0000 Subject: [PATCH 01/28] test: fix tests use of 'file' protocol In response to CVE-2022-39253, Git now considers the `file://` protocol to be unsafe by default. The default value of the `protocol.file.allow` config variable was changed to `user` [1], meaning that a file URL or a local path is only trusted if it came directly from user input, and not if it came through a command which executes a clone/fetch/push internally. The tests fall foul of this new requirement by attempting to run a `git submodule add` with a local directory. Internally, this performs a clone, which is no longer trusted because of the change described above. This results in the command failing with a "transport 'file' not allowed" message. Since this is only the case for a single command, then fix the test by setting `protocol.file.allow` to `always` when we run it. [1] https://github.blog/2022-10-18-git-security-vulnerabilities-announced/#cve-2022-39253 --- git_sizer_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index 580268a..6ab132f 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -701,7 +701,7 @@ func TestSubmodule(t *testing.T) { require.NoError(t, cmd.Run(), "creating main commit") // Make subm a submodule of main: - cmd = mainRepo.GitCommand(t, "submodule", "add", submRepo.Path, "sub") + cmd = mainRepo.GitCommand(t, "-c", "protocol.file.allow=always", "submodule", "add", submRepo.Path, "sub") cmd.Dir = mainRepo.Path require.NoError(t, cmd.Run(), "adding submodule") From 9e95b4b8a63c71f19dfd62b940eedb5927bd5dd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20=C3=81ngel=20Pastor=20Olivar?= <migue@github.com> Date: Mon, 28 Nov 2022 16:05:50 +0100 Subject: [PATCH 02/28] This code has been moved into the go-pipe library --- internal/pipe/command.go | 175 ------ internal/pipe/command_unix.go | 66 --- internal/pipe/command_windows.go | 24 - internal/pipe/filter-error.go | 135 ----- internal/pipe/function.go | 66 --- internal/pipe/iocopier.go | 62 --- internal/pipe/linewise.go | 74 --- internal/pipe/pipeline.go | 272 ---------- internal/pipe/pipeline_test.go | 883 ------------------------------- internal/pipe/print.go | 37 -- internal/pipe/scanner.go | 67 --- internal/pipe/stage.go | 34 -- 12 files changed, 1895 deletions(-) delete mode 100644 internal/pipe/command.go delete mode 100644 internal/pipe/command_unix.go delete mode 100644 internal/pipe/command_windows.go delete mode 100644 internal/pipe/filter-error.go delete mode 100644 internal/pipe/function.go delete mode 100644 internal/pipe/iocopier.go delete mode 100644 internal/pipe/linewise.go delete mode 100644 internal/pipe/pipeline.go delete mode 100644 internal/pipe/pipeline_test.go delete mode 100644 internal/pipe/print.go delete mode 100644 internal/pipe/scanner.go delete mode 100644 internal/pipe/stage.go diff --git a/internal/pipe/command.go b/internal/pipe/command.go deleted file mode 100644 index d370e28..0000000 --- a/internal/pipe/command.go +++ /dev/null @@ -1,175 +0,0 @@ -package pipe - -import ( - "bytes" - "context" - "errors" - "io" - "os" - "os/exec" - "sync/atomic" - "syscall" - - "golang.org/x/sync/errgroup" -) - -// commandStage is a pipeline `Stage` based on running an external -// command and piping the data through its stdin and stdout. -type commandStage struct { - name string - stdin io.Closer - cmd *exec.Cmd - done chan struct{} - wg errgroup.Group - stderr bytes.Buffer - - // If the context expired and we attempted to kill the command, - // `ctx.Err()` is stored here. - ctxErr atomic.Value -} - -// Command returns a pipeline `Stage` based on the specified external -// `command`, run with the given command-line `args`. Its stdin and -// stdout are handled as usual, and its stderr is collected and -// included in any `*exec.ExitError` that the command might emit. -func Command(command string, args ...string) Stage { - if len(command) == 0 { - panic("attempt to create command with empty command") - } - - cmd := exec.Command(command, args...) - return CommandStage(command, cmd) -} - -// Command returns a pipeline `Stage` with the name `name`, based on -// the specified `cmd`. Its stdin and stdout are handled as usual, and -// its stderr is collected and included in any `*exec.ExitError` that -// the command might emit. -func CommandStage(name string, cmd *exec.Cmd) Stage { - return &commandStage{ - name: name, - cmd: cmd, - done: make(chan struct{}), - } -} - -func (s *commandStage) Name() string { - return s.name -} - -func (s *commandStage) Start( - ctx context.Context, env Env, stdin io.ReadCloser, -) (io.ReadCloser, error) { - if s.cmd.Dir == "" { - s.cmd.Dir = env.Dir - } - - if stdin != nil { - s.cmd.Stdin = stdin - // Also keep a copy so that we can close it when the command exits: - s.stdin = stdin - } - - stdout, err := s.cmd.StdoutPipe() - if err != nil { - return nil, err - } - - // If the caller hasn't arranged otherwise, read the command's - // standard error into our `stderr` field: - if s.cmd.Stderr == nil { - // We can't just set `s.cmd.Stderr = &s.stderr`, because if we - // do then `s.cmd.Wait()` doesn't wait to be sure that all - // error output has been captured. By doing this ourselves, we - // can be sure. - p, err := s.cmd.StderrPipe() - if err != nil { - return nil, err - } - s.wg.Go(func() error { - _, err := io.Copy(&s.stderr, p) - // We don't consider `ErrClosed` an error (FIXME: is this - // correct?): - if err != nil && !errors.Is(err, os.ErrClosed) { - return err - } - return nil - }) - } - - // Put the command in its own process group, if possible: - s.runInOwnProcessGroup() - - if err := s.cmd.Start(); err != nil { - return nil, err - } - - // Arrange for the process to be killed (gently) if the context - // expires before the command exits normally: - go func() { - select { - case <-ctx.Done(): - s.kill(ctx.Err()) - case <-s.done: - // Process already done; no need to kill anything. - } - }() - - return stdout, nil -} - -// filterCmdError interprets `err`, which was returned by `Cmd.Wait()` -// (possibly `nil`), possibly modifying it or ignoring it. It returns -// the error that should actually be returned to the caller (possibly -// `nil`). -func (s *commandStage) filterCmdError(err error) error { - if err == nil { - return nil - } - - eErr, ok := err.(*exec.ExitError) - if !ok { - return err - } - - ctxErr, ok := s.ctxErr.Load().(error) - if ok { - // If the process looks like it was killed by us, substitute - // `ctxErr` for the process's own exit error. Note that this - // doesn't do anything on Windows, where the `Signaled()` - // method isn't implemented (it is hardcoded to return - // `false`). - ps, ok := eErr.ProcessState.Sys().(syscall.WaitStatus) - if ok && ps.Signaled() && - (ps.Signal() == syscall.SIGTERM || ps.Signal() == syscall.SIGKILL) { - return ctxErr - } - } - - eErr.Stderr = s.stderr.Bytes() - return eErr -} - -func (s *commandStage) Wait() error { - defer close(s.done) - - // Make sure that any stderr is copied before `s.cmd.Wait()` - // closes the read end of the pipe: - wErr := s.wg.Wait() - - err := s.cmd.Wait() - err = s.filterCmdError(err) - - if err == nil && wErr != nil { - err = wErr - } - - if s.stdin != nil { - cErr := s.stdin.Close() - if cErr != nil && err == nil { - return cErr - } - } - - return err -} diff --git a/internal/pipe/command_unix.go b/internal/pipe/command_unix.go deleted file mode 100644 index c84bcf5..0000000 --- a/internal/pipe/command_unix.go +++ /dev/null @@ -1,66 +0,0 @@ -//go:build !windows -// +build !windows - -package pipe - -import ( - "syscall" - "time" -) - -// runInOwnProcessGroup arranges for `cmd` to be run in its own -// process group. -func (s *commandStage) runInOwnProcessGroup() { - // Put the command in its own process group: - if s.cmd.SysProcAttr == nil { - s.cmd.SysProcAttr = &syscall.SysProcAttr{} - } - s.cmd.SysProcAttr.Setpgid = true -} - -// kill is called to kill the process if the context expires. `err` is -// the corresponding value of `Context.Err()`. -func (s *commandStage) kill(err error) { - // I believe that the calls to `syscall.Kill()` in this method are - // racy. It could be that s.cmd.Wait() succeeds immediately before - // this call, in which case the process group wouldn't exist - // anymore. But I don't see any way to avoid this without - // duplicating a lot of code from `exec.Cmd`. (`os.Cmd.Kill()` and - // `os.Cmd.Signal()` appear to be race-free, but only because they - // use internal synchronization. But those methods only kill the - // process, not the process group, so they are not suitable here. - - // We started the process with PGID == PID: - pid := s.cmd.Process.Pid - select { - case <-s.done: - // Process has ended; no need to kill it again. - return - default: - } - - // Record the `ctx.Err()`, which will be used as the error result - // for this stage. - s.ctxErr.Store(err) - - // First try to kill using a relatively gentle signal so that - // the processes have a chance to clean up after themselves: - _ = syscall.Kill(-pid, syscall.SIGTERM) - - // Well-behaved processes should commit suicide after the above, - // but if they don't exit within 2s, murder the whole lot of them: - go func() { - // Use an explicit `time.Timer` rather than `time.After()` so - // that we can stop it (freeing resources) promptly if the - // command exits before the timer triggers. - timer := time.NewTimer(2 * time.Second) - defer timer.Stop() - - select { - case <-s.done: - // Process has ended; no need to kill it again. - case <-timer.C: - _ = syscall.Kill(-pid, syscall.SIGKILL) - } - }() -} diff --git a/internal/pipe/command_windows.go b/internal/pipe/command_windows.go deleted file mode 100644 index 55af6e3..0000000 --- a/internal/pipe/command_windows.go +++ /dev/null @@ -1,24 +0,0 @@ -//go:build windows -// +build windows - -package pipe - -// runInOwnProcessGroup is not supported on Windows. -func (s *commandStage) runInOwnProcessGroup() {} - -// kill is called to kill the process if the context expires. `err` is -// the corresponding value of `Context.Err()`. -func (s *commandStage) kill(err error) { - select { - case <-s.done: - // Process has ended; no need to kill it again. - return - default: - } - - // Record the `ctx.Err()`, which will be used as the error result - // for this stage. - s.ctxErr.Store(err) - - s.cmd.Process.Kill() -} diff --git a/internal/pipe/filter-error.go b/internal/pipe/filter-error.go deleted file mode 100644 index 6e2bdd5..0000000 --- a/internal/pipe/filter-error.go +++ /dev/null @@ -1,135 +0,0 @@ -package pipe - -import ( - "errors" - "io" - "os/exec" - "syscall" -) - -// ErrorFilter is a function that can filter errors from -// `Stage.Wait()`. The original error (possibly nil) is passed in as -// an argument, and whatever the function returns is the error -// (possibly nil) that is actually emitted. -type ErrorFilter func(err error) error - -func FilterError(s Stage, filter ErrorFilter) Stage { - return efStage{Stage: s, filter: filter} -} - -type efStage struct { - Stage - filter ErrorFilter -} - -func (s efStage) Wait() error { - return s.filter(s.Stage.Wait()) -} - -// ErrorMatcher decides whether its argument matches some class of -// errors (e.g., errors that we want to ignore). The function will -// only be invoked for non-nil errors. -type ErrorMatcher func(err error) bool - -// IgnoreError creates a stage that acts like `s` except that it -// ignores any errors that are matched by `em`. Use like -// -// p.Add(pipe.IgnoreError( -// someStage, -// func(err error) bool { -// var myError *MyErrorType -// return errors.As(err, &myError) && myError.foo == 42 -// }, -// ) -// -// The second argument can also be one of the `ErrorMatcher`s that are -// provided by this package (e.g., `IsError(target)`, -// IsSignal(signal), `IsSIGPIPE`, `IsEPIPE`, `IsPipeError`), or one of -// the functions from the standard library that has the same signature -// (e.g., `os.IsTimeout`), or some combination of these (e.g., -// `AnyError(IsSIGPIPE, os.IsTimeout)`). -func IgnoreError(s Stage, em ErrorMatcher) Stage { - return FilterError(s, - func(err error) error { - if err == nil || em(err) { - return nil - } - return err - }, - ) -} - -// AnyError returns an `ErrorMatcher` that returns true for an error -// that matches any of the `ems`. -func AnyError(ems ...ErrorMatcher) ErrorMatcher { - return func(err error) bool { - if err == nil { - return false - } - for _, em := range ems { - if em(err) { - return true - } - } - return false - } -} - -// IsError returns an ErrorIdentifier for the specified target error, -// matched using `errors.Is()`. Use like -// -// p.Add(pipe.IgnoreError(someStage, IsError(io.EOF))) -func IsError(target error) ErrorMatcher { - return func(err error) bool { - return errors.Is(err, target) - } -} - -// IsSIGPIPE returns an `ErrorMatcher` that matches `*exec.ExitError`s -// that were caused by the specified signal. The match for -// `*exec.ExitError`s uses `errors.As()`. Note that under Windows this -// always returns false, because on that platform -// `WaitStatus.Signaled()` isn't implemented (it is hardcoded to -// return `false`). -func IsSignal(signal syscall.Signal) ErrorMatcher { - return func(err error) bool { - var eErr *exec.ExitError - - if !errors.As(err, &eErr) { - return false - } - - status, ok := eErr.Sys().(syscall.WaitStatus) - return ok && status.Signaled() && status.Signal() == signal - } -} - -var ( - // IsSIGPIPE is an `ErrorMatcher` that matches `*exec.ExitError`s - // that were caused by SIGPIPE. The match for `*exec.ExitError`s - // uses `errors.As()`. Use like - // - // p.Add(IgnoreError(someStage, IsSIGPIPE)) - IsSIGPIPE = IsSignal(syscall.SIGPIPE) - - // IsEPIPE is an `ErrorMatcher` that matches `syscall.EPIPE` using - // `errors.Is()`. Use like - // - // p.Add(IgnoreError(someStage, IsEPIPE)) - IsEPIPE = IsError(syscall.EPIPE) - - // IsErrClosedPipe is an `ErrorMatcher` that matches - // `io.ErrClosedPipe` using `errors.Is()`. (`io.ErrClosedPipe` is - // the error that results from writing to a closed - // `*io.PipeWriter`.) Use like - // - // p.Add(IgnoreError(someStage, IsErrClosedPipe)) - IsErrClosedPipe = IsError(io.ErrClosedPipe) - - // IsPipeError is an `ErrorMatcher` that matches a few different - // errors that typically result if a stage writes to a subsequent - // stage that has stopped reading from its stdin. Use like - // - // p.Add(IgnoreError(someStage, IsPipeError)) - IsPipeError = AnyError(IsSIGPIPE, IsEPIPE, IsErrClosedPipe) -) diff --git a/internal/pipe/function.go b/internal/pipe/function.go deleted file mode 100644 index bc5d0bd..0000000 --- a/internal/pipe/function.go +++ /dev/null @@ -1,66 +0,0 @@ -package pipe - -import ( - "context" - "fmt" - "io" -) - -// StageFunc is a function that can be used to power a `goStage`. It -// should read its input from `stdin` and write its output to -// `stdout`. `stdin` and `stdout` will be closed automatically (if -// necessary) once the function returns. -// -// Neither `stdin` nor `stdout` are necessarily buffered. If the -// `StageFunc` requires buffering, it needs to arrange that itself. -// -// A `StageFunc` is run in a separate goroutine, so it must be careful -// to synchronize any data access aside from reading and writing. -type StageFunc func(ctx context.Context, env Env, stdin io.Reader, stdout io.Writer) error - -// Function returns a pipeline `Stage` that will run a `StageFunc` in -// a separate goroutine to process the data. See `StageFunc` for more -// information. -func Function(name string, f StageFunc) Stage { - return &goStage{ - name: name, - f: f, - done: make(chan struct{}), - } -} - -// goStage is a `Stage` that does its work by running an arbitrary -// `stageFunc` in a goroutine. -type goStage struct { - name string - f StageFunc - done chan struct{} - err error -} - -func (s *goStage) Name() string { - return s.name -} - -func (s *goStage) Start(ctx context.Context, env Env, stdin io.ReadCloser) (io.ReadCloser, error) { - r, w := io.Pipe() - go func() { - s.err = s.f(ctx, env, stdin, w) - if err := w.Close(); err != nil && s.err == nil { - s.err = fmt.Errorf("error closing output pipe for stage %q: %w", s.Name(), err) - } - if stdin != nil { - if err := stdin.Close(); err != nil && s.err == nil { - s.err = fmt.Errorf("error closing stdin for stage %q: %w", s.Name(), err) - } - } - close(s.done) - }() - - return r, nil -} - -func (s *goStage) Wait() error { - <-s.done - return s.err -} diff --git a/internal/pipe/iocopier.go b/internal/pipe/iocopier.go deleted file mode 100644 index 26d5b0f..0000000 --- a/internal/pipe/iocopier.go +++ /dev/null @@ -1,62 +0,0 @@ -package pipe - -import ( - "context" - "errors" - "io" - "os" -) - -// ioCopier is a stage that copies its stdin to a specified -// `io.Writer`. It generates no stdout itself. -type ioCopier struct { - w io.WriteCloser - done chan struct{} - err error -} - -func newIOCopier(w io.WriteCloser) *ioCopier { - return &ioCopier{ - w: w, - done: make(chan struct{}), - } -} - -func (s *ioCopier) Name() string { - return "ioCopier" -} - -// This method always returns `nil, nil`. -func (s *ioCopier) Start(ctx context.Context, _ Env, r io.ReadCloser) (io.ReadCloser, error) { - go func() { - _, err := io.Copy(s.w, r) - // We don't consider `ErrClosed` an error (FIXME: is this - // correct?): - if err != nil && !errors.Is(err, os.ErrClosed) { - s.err = err - } - if err := r.Close(); err != nil && s.err == nil { - s.err = err - } - if err := s.w.Close(); err != nil && s.err == nil { - s.err = err - } - close(s.done) - }() - - // FIXME: if `s.w.Write()` is blocking (e.g., because there is a - // downstream process that is not reading from the other side), - // there's no way to terminate the copy when the context expires. - // This is not too bad, because the `io.Copy()` call will exit by - // itself when its input is closed. - // - // We could, however, be smarter about exiting more quickly if the - // context expires but `s.w.Write()` is not blocking. - - return nil, nil -} - -func (s *ioCopier) Wait() error { - <-s.done - return s.err -} diff --git a/internal/pipe/linewise.go b/internal/pipe/linewise.go deleted file mode 100644 index 7b5c6ef..0000000 --- a/internal/pipe/linewise.go +++ /dev/null @@ -1,74 +0,0 @@ -package pipe - -import ( - "bufio" - "bytes" - "context" - "io" -) - -// LinewiseStageFunc is a function that can be embedded in a -// `goStage`. It is called once per line in the input (where "line" -// can be defined via any `bufio.Scanner`). It should process the line -// and may write whatever it likes to `stdout`, which is a buffered -// writer whose contents are forwarded to the input of the next stage -// of the pipeline. The function needn't write one line of output per -// line of input. -// -// The function mustn't retain copies of `line`, since it may be -// overwritten every time the function is called. -// -// The function needn't flush or close `stdout` (this will be done -// automatically when all of the input has been processed). -// -// If there is an error parsing the input into lines, or if this -// function returns an error, then the whole pipeline will be aborted -// with that error. However, if the function returns the special error -// `pipe.FinishEarly`, the stage will stop processing immediately with -// a `nil` error value. -// -// The function will be called in a separate goroutine, so it must be -// careful to synchronize any data access aside from writing to -// `stdout`. -type LinewiseStageFunc func( - ctx context.Context, env Env, line []byte, stdout *bufio.Writer, -) error - -// LinewiseFunction returns a function-based `Stage`. The input will -// be split into LF-terminated lines and passed to the function one -// line at a time (without the LF). The function may emit output to -// its `stdout` argument. See the definition of `LinewiseStageFunc` -// for more information. -// -// Note that the stage will emit an error if any line (including its -// end-of-line terminator) exceeds 64 kiB in length. If this is too -// short, use `ScannerFunction()` directly with your own -// `NewScannerFunc` as argument, or use `Function()` directly with -// your own `StageFunc`. -func LinewiseFunction(name string, f LinewiseStageFunc) Stage { - return ScannerFunction( - name, - func(r io.Reader) (Scanner, error) { - scanner := bufio.NewScanner(r) - // Split based on strict LF (we don't accept CRLF): - scanner.Split(ScanLFTerminatedLines) - return scanner, nil - }, - f, - ) -} - -// ScanLFTerminatedLines is a `bufio.SplitFunc` that splits its input -// into lines at LF characters (not treating CR specially). -func ScanLFTerminatedLines(data []byte, atEOF bool) (advance int, token []byte, err error) { - if atEOF && len(data) == 0 { - return 0, nil, nil - } - if i := bytes.IndexByte(data, '\n'); i != -1 { - return i + 1, data[0:i], nil - } - if atEOF { - return len(data), data, nil - } - return 0, nil, nil -} diff --git a/internal/pipe/pipeline.go b/internal/pipe/pipeline.go deleted file mode 100644 index 4725907..0000000 --- a/internal/pipe/pipeline.go +++ /dev/null @@ -1,272 +0,0 @@ -package pipe - -import ( - "bytes" - "context" - "errors" - "fmt" - "io" - "sync/atomic" -) - -// Env represents the environment that a pipeline stage should run in. -// It is passed to `Stage.Start()`. -type Env struct { - // The directory in which external commands should be executed by - // default. - Dir string -} - -// FinishEarly is an error that can be returned by a `Stage` to -// request that the iteration be ended early (possibly without reading -// all of its input). This "error" is considered a successful return, -// and is not reported to the caller. -//nolint:errname -var FinishEarly = errors.New("finish stage early") - -// Pipeline represents a Unix-like pipe that can include multiple -// stages, including external processes but also and stages written in -// Go. -type Pipeline struct { - env Env - - stdin io.Reader - stdout io.WriteCloser - stages []Stage - cancel func() - - // Atomically written and read value, nonzero if the pipeline has - // been started. This is only used for lifecycle sanity checks but - // does not guarantee that clients are using the class correctly. - started uint32 -} - -type nopWriteCloser struct { - io.Writer -} - -func (w nopWriteCloser) Close() error { - return nil -} - -// NewPipeline returns a Pipeline struct with all of the `options` -// applied. -func New(options ...Option) *Pipeline { - p := &Pipeline{} - - for _, option := range options { - option(p) - } - - return p -} - -// Option is a type alias for Pipeline functional options. -type Option func(*Pipeline) - -// WithDir sets the default directory for running external commands. -func WithDir(dir string) Option { - return func(p *Pipeline) { - p.env.Dir = dir - } -} - -// WithStdin assigns stdin to the first command in the pipeline. -func WithStdin(stdin io.Reader) Option { - return func(p *Pipeline) { - p.stdin = stdin - } -} - -// WithStdout assigns stdout to the last command in the pipeline. -func WithStdout(stdout io.Writer) Option { - return func(p *Pipeline) { - p.stdout = nopWriteCloser{stdout} - } -} - -// WithStdoutCloser assigns stdout to the last command in the -// pipeline, and closes stdout when it's done. -func WithStdoutCloser(stdout io.WriteCloser) Option { - return func(p *Pipeline) { - p.stdout = stdout - } -} - -func (p *Pipeline) hasStarted() bool { - return atomic.LoadUint32(&p.started) != 0 -} - -// Add appends one or more stages to the pipeline. -func (p *Pipeline) Add(stages ...Stage) { - if p.hasStarted() { - panic("attempt to modify a pipeline that has already started") - } - - p.stages = append(p.stages, stages...) -} - -// AddWithIgnoredError appends one or more stages that are ignoring -// the passed in error to the pipeline. -func (p *Pipeline) AddWithIgnoredError(em ErrorMatcher, stages ...Stage) { - if p.hasStarted() { - panic("attempt to modify a pipeline that has already started") - } - - for _, stage := range stages { - p.stages = append(p.stages, IgnoreError(stage, em)) - } -} - -// Start starts the commands in the pipeline. If `Start()` exits -// without an error, `Wait()` must also be called, to allow all -// resources to be freed. -func (p *Pipeline) Start(ctx context.Context) error { - if p.hasStarted() { - panic("attempt to start a pipeline that has already started") - } - - atomic.StoreUint32(&p.started, 1) - ctx, p.cancel = context.WithCancel(ctx) - - var nextStdin io.ReadCloser - if p.stdin != nil { - // We don't want the first stage to actually close this, and - // it's not even an `io.ReadCloser`, so fake it: - nextStdin = io.NopCloser(p.stdin) - } - - for i, s := range p.stages { - var err error - stdout, err := s.Start(ctx, p.env, nextStdin) - if err != nil { - // Close the pipe that the previous stage was writing to. - // That should cause it to exit even if it's not minding - // its context. - if nextStdin != nil { - _ = nextStdin.Close() - } - - // Kill and wait for any stages that have been started - // already to finish: - p.cancel() - for _, s := range p.stages[:i] { - _ = s.Wait() - } - return fmt.Errorf("starting pipeline stage %q: %w", s.Name(), err) - } - nextStdin = stdout - } - - // If the pipeline was configured with a `stdout`, add a synthetic - // stage to copy the last stage's stdout to that writer: - if p.stdout != nil { - c := newIOCopier(p.stdout) - p.stages = append(p.stages, c) - // `ioCopier.Start()` never fails: - _, _ = c.Start(ctx, p.env, nextStdin) - } - - return nil -} - -func (p *Pipeline) Output(ctx context.Context) ([]byte, error) { - var buf bytes.Buffer - p.stdout = nopWriteCloser{&buf} - err := p.Run(ctx) - return buf.Bytes(), err -} - -// Wait waits for each stage in the pipeline to exit. -func (p *Pipeline) Wait() error { - if !p.hasStarted() { - panic("unable to wait on a pipeline that has not started") - } - - // Make sure that all of the cleanup eventually happens: - defer p.cancel() - - var earliestStageErr error - var earliestFailedStage Stage - - finishedEarly := false - for i := len(p.stages) - 1; i >= 0; i-- { - s := p.stages[i] - err := s.Wait() - - // Handle errors: - switch { - case err == nil: - // No error to handle. But unset the `finishedEarly` flag, - // because earlier stages shouldn't be affected by the - // later stage that finished early. - finishedEarly = false - continue - - case errors.Is(err, FinishEarly): - // We ignore `FinishEarly` errors because that is how a - // stage informs us that it intentionally finished early. - // Moreover, if we see a `FinishEarly` error, ignore any - // pipe error from the immediately preceding stage, - // because it probably came from trying to write to this - // stage after this stage closed its stdin. - finishedEarly = true - continue - - case IsPipeError(err): - switch { - case finishedEarly: - // A successor stage finished early. It is common for - // this to cause earlier stages to fail with pipe - // errors. Such errors are uninteresting, so ignore - // them. Leave the `finishedEarly` flag set, because - // the preceding stage might get a pipe error from - // trying to write to this one. - case earliestStageErr != nil: - // A later stage has already reported an error. This - // means that we don't want to report the error from - // this stage: - // - // * If the later error was also a pipe error: we want - // to report the _last_ pipe error seen, which would - // be the one already recorded. - // - // * If the later error was not a pipe error: non-pipe - // errors are always considered more important than - // pipe errors, so again we would want to keep the - // error that is already recorded. - default: - // In this case, the pipe error from this stage is the - // most important error that we have seen so far, so - // remember it: - earliestFailedStage, earliestStageErr = s, err - } - - default: - // This stage exited with a non-pipe error. If multiple - // stages exited with such errors, we want to report the - // one that is most informative. We take that to be the - // error from the earliest failing stage. Since we are - // iterating through stages in reverse order, overwrite - // any existing remembered errors (which would have come - // from a later stage): - earliestFailedStage, earliestStageErr = s, err - finishedEarly = false - } - } - - if earliestStageErr != nil { - return fmt.Errorf("%s: %w", earliestFailedStage.Name(), earliestStageErr) - } - - return nil -} - -// Run starts and waits for the commands in the pipeline. -func (p *Pipeline) Run(ctx context.Context) error { - if err := p.Start(ctx); err != nil { - return err - } - - return p.Wait() -} diff --git a/internal/pipe/pipeline_test.go b/internal/pipe/pipeline_test.go deleted file mode 100644 index faf1e31..0000000 --- a/internal/pipe/pipeline_test.go +++ /dev/null @@ -1,883 +0,0 @@ -package pipe_test - -import ( - "bufio" - "bytes" - "context" - "errors" - "fmt" - "io" - "io/ioutil" - "os" - "path/filepath" - "runtime" - "strconv" - "strings" - "testing" - "time" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "go.uber.org/goleak" - - "github.com/github/git-sizer/internal/pipe" -) - -func TestMain(m *testing.M) { - // Check whether this package's test suite leaks any goroutines: - goleak.VerifyTestMain(m) -} - -func TestPipelineFirstStageFailsToStart(t *testing.T) { - t.Parallel() - ctx := context.Background() - - startErr := errors.New("foo") - - p := pipe.New() - p.Add( - ErrorStartingStage{startErr}, - ErrorStartingStage{errors.New("this error should never happen")}, - ) - assert.ErrorIs(t, p.Run(ctx), startErr) -} - -func TestPipelineSecondStageFailsToStart(t *testing.T) { - t.Parallel() - ctx := context.Background() - - startErr := errors.New("foo") - - p := pipe.New() - p.Add( - seqFunction(20000), - ErrorStartingStage{startErr}, - ) - assert.ErrorIs(t, p.Run(ctx), startErr) -} - -func TestPipelineSingleCommandOutput(t *testing.T) { - t.Parallel() - ctx := context.Background() - - p := pipe.New() - p.Add(pipe.Command("echo", "hello world")) - out, err := p.Output(ctx) - if assert.NoError(t, err) { - assert.EqualValues(t, "hello world\n", out) - } -} - -func TestPipelineSingleCommandWithStdout(t *testing.T) { - t.Parallel() - ctx := context.Background() - - stdout := &bytes.Buffer{} - - p := pipe.New(pipe.WithStdout(stdout)) - p.Add(pipe.Command("echo", "hello world")) - if assert.NoError(t, p.Run(ctx)) { - assert.Equal(t, "hello world\n", stdout.String()) - } -} - -func TestNontrivialPipeline(t *testing.T) { - t.Parallel() - ctx := context.Background() - - p := pipe.New() - p.Add( - pipe.Command("echo", "hello world"), - pipe.Command("sed", "s/hello/goodbye/"), - ) - out, err := p.Output(ctx) - if assert.NoError(t, err) { - assert.EqualValues(t, "goodbye world\n", out) - } -} - -func TestPipelineReadFromSlowly(t *testing.T) { - t.Parallel() - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) - defer cancel() - - r, w := io.Pipe() - - var buf []byte - readErr := make(chan error, 1) - - go func() { - time.Sleep(200 * time.Millisecond) - var err error - buf, err = io.ReadAll(r) - readErr <- err - }() - - p := pipe.New(pipe.WithStdout(w)) - p.Add(pipe.Command("echo", "hello world")) - assert.NoError(t, p.Run(ctx)) - - time.Sleep(100 * time.Millisecond) - // It's not super-intuitive, but `w` has to be closed here so that - // the `ioutil.ReadAll()` call above knows that it's done: - _ = w.Close() - - assert.NoError(t, <-readErr) - assert.Equal(t, "hello world\n", string(buf)) -} - -func TestPipelineReadFromSlowly2(t *testing.T) { - if runtime.GOOS == "windows" { - t.Skip("FIXME: test skipped on Windows: 'seq' unavailable") - } - - t.Parallel() - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) - defer cancel() - - r, w := io.Pipe() - - var buf []byte - readErr := make(chan error, 1) - - go func() { - time.Sleep(100 * time.Millisecond) - for { - var c [1]byte - _, err := r.Read(c[:]) - if err != nil { - if err == io.EOF { - readErr <- nil - return - } - readErr <- err - return - } - buf = append(buf, c[0]) - time.Sleep(1 * time.Millisecond) - } - }() - - p := pipe.New(pipe.WithStdout(w)) - p.Add(pipe.Command("seq", "100")) - assert.NoError(t, p.Run(ctx)) - - time.Sleep(200 * time.Millisecond) - // It's not super-intuitive, but `w` has to be closed here so that - // the `ioutil.ReadAll()` call above knows that it's done: - _ = w.Close() - - assert.NoError(t, <-readErr) - assert.Equal(t, 292, len(buf)) -} - -func TestPipelineTwoCommandsPiping(t *testing.T) { - t.Parallel() - ctx := context.Background() - - p := pipe.New() - p.Add(pipe.Command("echo", "hello world")) - assert.Panics(t, func() { p.Add(pipe.Command("")) }) - out, err := p.Output(ctx) - if assert.NoError(t, err) { - assert.EqualValues(t, "hello world\n", out) - } -} - -func TestPipelineDir(t *testing.T) { - t.Parallel() - ctx := context.Background() - - wdir, err := os.Getwd() - require.NoError(t, err) - dir, err := ioutil.TempDir(wdir, "pipeline-test-") - require.NoError(t, err) - defer os.RemoveAll(dir) - - p := pipe.New(pipe.WithDir(dir)) - switch runtime.GOOS { - case "windows": - p.Add(pipe.Command("bash", "-c", "pwd -W")) - default: - p.Add(pipe.Command("pwd")) - } - - out, err := p.Output(ctx) - if assert.NoError(t, err) { - assert.Equal(t, filepath.Clean(dir), filepath.Clean(strings.TrimSuffix(string(out), "\n"))) - } -} - -func TestPipelineExit(t *testing.T) { - t.Parallel() - ctx := context.Background() - - p := pipe.New() - p.Add( - pipe.Command("false"), - pipe.Command("true"), - ) - assert.EqualError(t, p.Run(ctx), "false: exit status 1") -} - -func TestPipelineStderr(t *testing.T) { - t.Parallel() - ctx := context.Background() - - dir, err := ioutil.TempDir("", "pipeline-test-") - require.NoError(t, err) - defer os.RemoveAll(dir) - - p := pipe.New(pipe.WithDir(dir)) - p.Add(pipe.Command("ls", "doesnotexist")) - - _, err = p.Output(ctx) - if assert.Error(t, err) { - assert.Contains(t, err.Error(), "ls: exit status") - } -} - -func TestPipelineInterrupted(t *testing.T) { - if runtime.GOOS == "windows" { - t.Skip("FIXME: test skipped on Windows: 'sleep' unavailable") - } - - t.Parallel() - stdout := &bytes.Buffer{} - - p := pipe.New(pipe.WithStdout(stdout)) - p.Add(pipe.Command("sleep", "10")) - - ctx, cancel := context.WithTimeout(context.Background(), 20*time.Millisecond) - defer cancel() - - err := p.Start(ctx) - require.NoError(t, err) - - err = p.Wait() - assert.ErrorIs(t, err, context.DeadlineExceeded) -} - -func TestPipelineCanceled(t *testing.T) { - if runtime.GOOS == "windows" { - t.Skip("FIXME: test skipped on Windows: 'sleep' unavailable") - } - - t.Parallel() - - stdout := &bytes.Buffer{} - - p := pipe.New(pipe.WithStdout(stdout)) - p.Add(pipe.Command("sleep", "10")) - - ctx, cancel := context.WithCancel(context.Background()) - - err := p.Start(ctx) - require.NoError(t, err) - - cancel() - - err = p.Wait() - assert.ErrorIs(t, err, context.Canceled) -} - -// Verify the correct error if a command in the pipeline exits before -// reading all of its predecessor's output. Note that the amount of -// unread output in this case *does fit* within the OS-level pipe -// buffer. -func TestLittleEPIPE(t *testing.T) { - if runtime.GOOS == "windows" { - t.Skip("FIXME: test skipped on Windows: 'sleep' unavailable") - } - - t.Parallel() - - p := pipe.New() - p.Add( - pipe.Command("sh", "-c", "sleep 1; echo foo"), - pipe.Command("true"), - ) - - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) - defer cancel() - err := p.Run(ctx) - assert.EqualError(t, err, "sh: signal: broken pipe") -} - -// Verify the correct error if one command in the pipeline exits -// before reading all of its predecessor's output. Note that the -// amount of unread output in this case *does not fit* within the -// OS-level pipe buffer. -func TestBigEPIPE(t *testing.T) { - if runtime.GOOS == "windows" { - t.Skip("FIXME: test skipped on Windows: 'seq' unavailable") - } - - t.Parallel() - - p := pipe.New() - p.Add( - pipe.Command("seq", "100000"), - pipe.Command("true"), - ) - - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) - defer cancel() - err := p.Run(ctx) - assert.EqualError(t, err, "seq: signal: broken pipe") -} - -// Verify the correct error if one command in the pipeline exits -// before reading all of its predecessor's output. Note that the -// amount of unread output in this case *does not fit* within the -// OS-level pipe buffer. -func TestIgnoredSIGPIPE(t *testing.T) { - if runtime.GOOS == "windows" { - t.Skip("FIXME: test skipped on Windows: 'seq' unavailable") - } - - t.Parallel() - - p := pipe.New() - p.Add( - pipe.IgnoreError(pipe.Command("seq", "100000"), pipe.IsSIGPIPE), - pipe.Command("echo", "foo"), - ) - - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) - defer cancel() - out, err := p.Output(ctx) - assert.NoError(t, err) - assert.EqualValues(t, "foo\n", out) -} - -func TestFunction(t *testing.T) { - t.Parallel() - ctx := context.Background() - - p := pipe.New() - p.Add( - pipe.Print("hello world"), - pipe.Function( - "farewell", - func(_ context.Context, _ pipe.Env, stdin io.Reader, stdout io.Writer) error { - buf, err := io.ReadAll(stdin) - if err != nil { - return err - } - if string(buf) != "hello world" { - return fmt.Errorf("expected \"hello world\"; got %q", string(buf)) - } - _, err = stdout.Write([]byte("goodbye, cruel world")) - return err - }, - ), - ) - - out, err := p.Output(ctx) - assert.NoError(t, err) - assert.EqualValues(t, "goodbye, cruel world", out) -} - -func TestPipelineWithFunction(t *testing.T) { - t.Parallel() - ctx := context.Background() - - p := pipe.New() - p.Add( - pipe.Command("echo", "-n", "hello world"), - pipe.Function( - "farewell", - func(_ context.Context, _ pipe.Env, stdin io.Reader, stdout io.Writer) error { - buf, err := io.ReadAll(stdin) - if err != nil { - return err - } - if string(buf) != "hello world" { - return fmt.Errorf("expected \"hello world\"; got %q", string(buf)) - } - _, err = stdout.Write([]byte("goodbye, cruel world")) - return err - }, - ), - pipe.Command("tr", "a-z", "A-Z"), - ) - - out, err := p.Output(ctx) - assert.NoError(t, err) - assert.EqualValues(t, "GOODBYE, CRUEL WORLD", out) -} - -type ErrorStartingStage struct { - err error -} - -func (s ErrorStartingStage) Name() string { - return "errorStartingStage" -} - -func (s ErrorStartingStage) Start( - ctx context.Context, env pipe.Env, stdin io.ReadCloser, -) (io.ReadCloser, error) { - return io.NopCloser(&bytes.Buffer{}), s.err -} - -func (s ErrorStartingStage) Wait() error { - return nil -} - -func seqFunction(n int) pipe.Stage { - return pipe.Function( - "seq", - func(_ context.Context, _ pipe.Env, _ io.Reader, stdout io.Writer) error { - for i := 1; i <= n; i++ { - _, err := fmt.Fprintf(stdout, "%d\n", i) - if err != nil { - return err - } - } - return nil - }, - ) -} - -func TestPipelineWithLinewiseFunction(t *testing.T) { - t.Parallel() - ctx := context.Background() - - p := pipe.New() - // Print the numbers from 1 to 20 (generated from scratch): - p.Add( - seqFunction(20), - // Discard all but the multiples of 5, and emit the results - // separated by spaces on one line: - pipe.LinewiseFunction( - "multiples-of-5", - func(_ context.Context, _ pipe.Env, line []byte, w *bufio.Writer) error { - n, err := strconv.Atoi(string(line)) - if err != nil { - return err - } - if n%5 != 0 { - return nil - } - _, err = fmt.Fprintf(w, " %d", n) - return err - }, - ), - // Read the words and square them, emitting the results one per - // line: - pipe.ScannerFunction( - "square-multiples-of-5", - func(r io.Reader) (pipe.Scanner, error) { - scanner := bufio.NewScanner(r) - scanner.Split(bufio.ScanWords) - return scanner, nil - }, - func(_ context.Context, _ pipe.Env, line []byte, w *bufio.Writer) error { - n, err := strconv.Atoi(string(line)) - if err != nil { - return err - } - _, err = fmt.Fprintf(w, "%d\n", n*n) - return err - }, - ), - ) - - out, err := p.Output(ctx) - assert.NoError(t, err) - assert.EqualValues(t, "25\n100\n225\n400\n", out) -} - -func TestScannerAlwaysFlushes(t *testing.T) { - t.Parallel() - ctx := context.Background() - - var length int64 - - p := pipe.New() - // Print the numbers from 1 to 20 (generated from scratch): - p.Add( - pipe.IgnoreError( - seqFunction(20), - pipe.IsPipeError, - ), - // Pass the numbers through up to 7, then exit with an - // ignored error: - pipe.IgnoreError( - pipe.LinewiseFunction( - "error-after-7", - func(_ context.Context, _ pipe.Env, line []byte, w *bufio.Writer) error { - fmt.Fprintf(w, "%s\n", line) - if string(line) == "7" { - return errors.New("ignore") - } - return nil - }, - ), - func(err error) bool { - return err.Error() == "ignore" - }, - ), - // Read the numbers and add them into the sum: - pipe.Function( - "compute-length", - func(_ context.Context, _ pipe.Env, stdin io.Reader, _ io.Writer) error { - var err error - length, err = io.Copy(io.Discard, stdin) - return err - }, - ), - ) - - err := p.Run(ctx) - assert.NoError(t, err) - // Make sure that all of the bytes emitted before the second - // stage's error were received by the third stage: - assert.EqualValues(t, 14, length) -} - -func TestScannerFinishEarly(t *testing.T) { - t.Parallel() - ctx := context.Background() - - var length int64 - - p := pipe.New() - p.Add( - // Print the numbers from 1 to 20 (generated from scratch): - seqFunction(20), - - // Pass the numbers through up to 7, then exit with an ignored - // error: - pipe.LinewiseFunction( - "finish-after-7", - func(_ context.Context, _ pipe.Env, line []byte, w *bufio.Writer) error { - fmt.Fprintf(w, "%s\n", line) - if string(line) == "7" { - return pipe.FinishEarly - } - return nil - }, - ), - - // Read the numbers and add them into the sum: - pipe.Function( - "compute-length", - func(_ context.Context, _ pipe.Env, stdin io.Reader, _ io.Writer) error { - var err error - length, err = io.Copy(io.Discard, stdin) - return err - }, - ), - ) - - err := p.Run(ctx) - assert.NoError(t, err) - // Make sure that all of the bytes emitted before the second - // stage's error were received by the third stage: - assert.EqualValues(t, 14, length) -} - -func TestPrintln(t *testing.T) { - t.Parallel() - ctx := context.Background() - - p := pipe.New() - p.Add(pipe.Println("Look Ma, no hands!")) - out, err := p.Output(ctx) - if assert.NoError(t, err) { - assert.EqualValues(t, "Look Ma, no hands!\n", out) - } -} - -func TestPrintf(t *testing.T) { - t.Parallel() - ctx := context.Background() - - p := pipe.New() - p.Add(pipe.Printf("Strangely recursive: %T", p)) - out, err := p.Output(ctx) - if assert.NoError(t, err) { - assert.EqualValues(t, "Strangely recursive: *pipe.Pipeline", out) - } -} - -func TestErrors(t *testing.T) { - t.Parallel() - ctx := context.Background() - - err1 := errors.New("error1") - err2 := errors.New("error2") - - for _, tc := range []struct { - name string - stages []pipe.Stage - expectedErr error - }{ - { - name: "no-error", - stages: []pipe.Stage{ - pipe.Function("noop1", genErr(nil)), - pipe.Function("noop2", genErr(nil)), - pipe.Function("noop3", genErr(nil)), - }, - expectedErr: nil, - }, - { - name: "lonely-error", - stages: []pipe.Stage{ - pipe.Function("err1", genErr(err1)), - }, - expectedErr: err1, - }, - { - name: "error", - stages: []pipe.Stage{ - pipe.Function("noop1", genErr(nil)), - pipe.Function("err1", genErr(err1)), - pipe.Function("noop2", genErr(nil)), - }, - expectedErr: err1, - }, - { - name: "two-consecutive-errors", - stages: []pipe.Stage{ - pipe.Function("noop1", genErr(nil)), - pipe.Function("err1", genErr(err1)), - pipe.Function("err2", genErr(err2)), - pipe.Function("noop2", genErr(nil)), - }, - expectedErr: err1, - }, - { - name: "pipe-then-error", - stages: []pipe.Stage{ - pipe.Function("noop1", genErr(nil)), - pipe.Function("pipe-error", genErr(io.ErrClosedPipe)), - pipe.Function("err1", genErr(err1)), - pipe.Function("noop2", genErr(nil)), - }, - expectedErr: err1, - }, - { - name: "error-then-pipe", - stages: []pipe.Stage{ - pipe.Function("noop1", genErr(nil)), - pipe.Function("err1", genErr(err1)), - pipe.Function("pipe-error", genErr(io.ErrClosedPipe)), - pipe.Function("noop2", genErr(nil)), - }, - expectedErr: err1, - }, - { - name: "two-spaced-errors", - stages: []pipe.Stage{ - pipe.Function("noop1", genErr(nil)), - pipe.Function("err1", genErr(err1)), - pipe.Function("noop2", genErr(nil)), - pipe.Function("err2", genErr(err2)), - pipe.Function("noop3", genErr(nil)), - }, - expectedErr: err1, - }, - { - name: "finish-early-ignored", - stages: []pipe.Stage{ - pipe.Function("noop1", genErr(nil)), - pipe.Function("finish-early1", genErr(pipe.FinishEarly)), - pipe.Function("noop2", genErr(nil)), - pipe.Function("finish-early2", genErr(pipe.FinishEarly)), - pipe.Function("noop3", genErr(nil)), - }, - expectedErr: nil, - }, - { - name: "error-before-finish-early", - stages: []pipe.Stage{ - pipe.Function("err1", genErr(err1)), - pipe.Function("finish-early", genErr(pipe.FinishEarly)), - }, - expectedErr: err1, - }, - { - name: "error-after-finish-early", - stages: []pipe.Stage{ - pipe.Function("finish-early", genErr(pipe.FinishEarly)), - pipe.Function("err1", genErr(err1)), - }, - expectedErr: err1, - }, - { - name: "pipe-then-finish-early", - stages: []pipe.Stage{ - pipe.Function("pipe-error", genErr(io.ErrClosedPipe)), - pipe.Function("finish-early", genErr(pipe.FinishEarly)), - }, - expectedErr: nil, - }, - { - name: "pipe-then-two-finish-early", - stages: []pipe.Stage{ - pipe.Function("pipe-error", genErr(io.ErrClosedPipe)), - pipe.Function("finish-early1", genErr(pipe.FinishEarly)), - pipe.Function("finish-early2", genErr(pipe.FinishEarly)), - }, - expectedErr: nil, - }, - { - name: "two-pipe-then-finish-early", - stages: []pipe.Stage{ - pipe.Function("pipe-error1", genErr(io.ErrClosedPipe)), - pipe.Function("pipe-error2", genErr(io.ErrClosedPipe)), - pipe.Function("finish-early", genErr(pipe.FinishEarly)), - }, - expectedErr: nil, - }, - { - name: "pipe-then-finish-early-with-gap", - stages: []pipe.Stage{ - pipe.Function("pipe-error", genErr(io.ErrClosedPipe)), - pipe.Function("noop", genErr(nil)), - pipe.Function("finish-early1", genErr(pipe.FinishEarly)), - }, - expectedErr: io.ErrClosedPipe, - }, - { - name: "finish-early-then-pipe", - stages: []pipe.Stage{ - pipe.Function("finish-early", genErr(pipe.FinishEarly)), - pipe.Function("pipe-error", genErr(io.ErrClosedPipe)), - }, - expectedErr: io.ErrClosedPipe, - }, - { - name: "error-then-pipe-then-finish-early", - stages: []pipe.Stage{ - pipe.Function("err1", genErr(err1)), - pipe.Function("pipe-error", genErr(io.ErrClosedPipe)), - pipe.Function("finish-early", genErr(pipe.FinishEarly)), - }, - expectedErr: err1, - }, - { - name: "pipe-then-error-then-finish-early", - stages: []pipe.Stage{ - pipe.Function("pipe-error", genErr(io.ErrClosedPipe)), - pipe.Function("err1", genErr(err1)), - pipe.Function("finish-early", genErr(pipe.FinishEarly)), - }, - expectedErr: err1, - }, - } { - tc := tc - t.Run(tc.name, func(t *testing.T) { - t.Parallel() - - p := pipe.New() - p.Add(tc.stages...) - err := p.Run(ctx) - if tc.expectedErr == nil { - assert.NoError(t, err) - } else { - assert.ErrorIs(t, err, tc.expectedErr) - } - }) - } -} - -func BenchmarkSingleProgram(b *testing.B) { - ctx := context.Background() - - for i := 0; i < b.N; i++ { - p := pipe.New() - p.Add( - pipe.Command("true"), - ) - assert.NoError(b, p.Run(ctx)) - } -} - -func BenchmarkTenPrograms(b *testing.B) { - ctx := context.Background() - - for i := 0; i < b.N; i++ { - p := pipe.New() - p.Add( - pipe.Command("echo", "hello world"), - pipe.Command("cat"), - pipe.Command("cat"), - pipe.Command("cat"), - pipe.Command("cat"), - pipe.Command("cat"), - pipe.Command("cat"), - pipe.Command("cat"), - pipe.Command("cat"), - pipe.Command("cat"), - ) - out, err := p.Output(ctx) - if assert.NoError(b, err) { - assert.EqualValues(b, "hello world\n", out) - } - } -} - -func BenchmarkTenFunctions(b *testing.B) { - ctx := context.Background() - - for i := 0; i < b.N; i++ { - p := pipe.New() - p.Add( - pipe.Println("hello world"), - pipe.Function("copy1", catFn), - pipe.Function("copy2", catFn), - pipe.Function("copy3", catFn), - pipe.Function("copy4", catFn), - pipe.Function("copy5", catFn), - pipe.Function("copy6", catFn), - pipe.Function("copy7", catFn), - pipe.Function("copy8", catFn), - pipe.Function("copy9", catFn), - ) - out, err := p.Output(ctx) - if assert.NoError(b, err) { - assert.EqualValues(b, "hello world\n", out) - } - } -} - -func BenchmarkTenMixedStages(b *testing.B) { - ctx := context.Background() - - for i := 0; i < b.N; i++ { - p := pipe.New() - p.Add( - pipe.Command("echo", "hello world"), - pipe.Function("copy1", catFn), - pipe.Command("cat"), - pipe.Function("copy2", catFn), - pipe.Command("cat"), - pipe.Function("copy3", catFn), - pipe.Command("cat"), - pipe.Function("copy4", catFn), - pipe.Command("cat"), - pipe.Function("copy5", catFn), - ) - out, err := p.Output(ctx) - if assert.NoError(b, err) { - assert.EqualValues(b, "hello world\n", out) - } - } -} - -func catFn(_ context.Context, _ pipe.Env, stdin io.Reader, stdout io.Writer) error { - _, err := io.Copy(stdout, stdin) - return err -} - -func genErr(err error) pipe.StageFunc { - return func(_ context.Context, _ pipe.Env, _ io.Reader, _ io.Writer) error { - return err - } -} diff --git a/internal/pipe/print.go b/internal/pipe/print.go deleted file mode 100644 index 766418d..0000000 --- a/internal/pipe/print.go +++ /dev/null @@ -1,37 +0,0 @@ -package pipe - -import ( - "context" - "fmt" - "io" -) - -func Print(a ...interface{}) Stage { - return Function( - "print", - func(_ context.Context, _ Env, _ io.Reader, stdout io.Writer) error { - _, err := fmt.Fprint(stdout, a...) - return err - }, - ) -} - -func Println(a ...interface{}) Stage { - return Function( - "println", - func(_ context.Context, _ Env, _ io.Reader, stdout io.Writer) error { - _, err := fmt.Fprintln(stdout, a...) - return err - }, - ) -} - -func Printf(format string, a ...interface{}) Stage { - return Function( - "printf", - func(_ context.Context, _ Env, _ io.Reader, stdout io.Writer) error { - _, err := fmt.Fprintf(stdout, format, a...) - return err - }, - ) -} diff --git a/internal/pipe/scanner.go b/internal/pipe/scanner.go deleted file mode 100644 index b56b58c..0000000 --- a/internal/pipe/scanner.go +++ /dev/null @@ -1,67 +0,0 @@ -package pipe - -import ( - "bufio" - "context" - "io" -) - -// Scanner defines the interface (which is implemented by -// `bufio.Scanner`) that is needed by `AddScannerFunction()`. See -// `bufio.Scanner` for how these methods should behave. -type Scanner interface { - Scan() bool - Bytes() []byte - Err() error -} - -// NewScannerFunc is used to create a `Scanner` for scanning input -// that is coming from `r`. -type NewScannerFunc func(r io.Reader) (Scanner, error) - -// ScannerFunction creates a function-based `Stage`. The function will -// be passed input, one line at a time, and may emit output. See the -// definition of `LinewiseStageFunc` for more information. -func ScannerFunction( - name string, newScanner NewScannerFunc, f LinewiseStageFunc, -) Stage { - return Function( - name, - func(ctx context.Context, env Env, stdin io.Reader, stdout io.Writer) (theErr error) { - scanner, err := newScanner(stdin) - if err != nil { - return err - } - - var out *bufio.Writer - if stdout != nil { - out = bufio.NewWriter(stdout) - defer func() { - err := out.Flush() - if err != nil && theErr == nil { - // Note: this sets the named return value, - // thereby causing the whole stage to report - // the error. - theErr = err - } - }() - } - - for scanner.Scan() { - if ctx.Err() != nil { - return ctx.Err() - } - err := f(ctx, env, scanner.Bytes(), out) - if err != nil { - return err - } - } - if err := scanner.Err(); err != nil { - return err - } - - return nil - // `p.AddFunction()` arranges for `stdout` to be closed. - }, - ) -} diff --git a/internal/pipe/stage.go b/internal/pipe/stage.go deleted file mode 100644 index f3d74d9..0000000 --- a/internal/pipe/stage.go +++ /dev/null @@ -1,34 +0,0 @@ -package pipe - -import ( - "context" - "io" -) - -// Stage is an element of a `Pipeline`. -type Stage interface { - // Name returns the name of the stage. - Name() string - - // Start starts the stage in the background, in the environment - // described by `env`, and using `stdin` as input. (`stdin` should - // be set to `nil` if the stage is to receive no input, which - // might be the case for the first stage in a pipeline.) It - // returns an `io.ReadCloser` from which the stage's output can be - // read (or `nil` if it generates no output, which should only be - // the case for the last stage in a pipeline). It is the stages' - // responsibility to close `stdin` (if it is not nil) when it has - // read all of the input that it needs, and to close the write end - // of its output reader when it is done, as that is generally how - // the subsequent stage knows that it has received all of its - // input and can finish its work, too. - // - // If `Start()` returns without an error, `Wait()` must also be - // called, to allow all resources to be freed. - Start(ctx context.Context, env Env, stdin io.ReadCloser) (io.ReadCloser, error) - - // Wait waits for the stage to be done, either because it has - // finished or because it has been killed due to the expiration of - // the context passed to `Start()`. - Wait() error -} From 9849429feceedf21184ce57de4723fdef99b92e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20=C3=81ngel=20Pastor=20Olivar?= <migue@github.com> Date: Tue, 29 Nov 2022 09:47:23 +0100 Subject: [PATCH 03/28] Add the go-pipe dependency v1.0.1 --- go.mod | 11 +++++++---- go.sum | 26 +++++++++++++++++--------- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/go.mod b/go.mod index 926a1c8..0cba7b5 100644 --- a/go.mod +++ b/go.mod @@ -6,12 +6,15 @@ require ( github.com/cli/safeexec v1.0.0 github.com/davecgh/go-spew v1.1.1 // indirect github.com/spf13/pflag v1.0.5 - github.com/stretchr/testify v1.7.0 - go.uber.org/goleak v1.1.12 - golang.org/x/sync v0.0.0-20210220032951-036812b2e83c + github.com/stretchr/testify v1.8.1 + golang.org/x/sync v0.1.0 // indirect ) +require github.com/github/go-pipe v1.0.1 + require ( + github.com/kr/pretty v0.1.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect + gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index d977b15..f17f036 100644 --- a/go.sum +++ b/go.sum @@ -1,26 +1,33 @@ github.com/cli/safeexec v1.0.0 h1:0VngyaIyqACHdcMNWfo6+KdUYnqEr2Sg+bSP1pdF+dI= github.com/cli/safeexec v1.0.0/go.mod h1:Z/D4tTN8Vs5gXYHDCbaM1S/anmEDnJb1iW0+EJ5zx3Q= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/github/go-pipe v1.0.1 h1:EX3LIhOueA/GptjP8KoIQh/2tvKqKY5nn3QI5adpa+M= +github.com/github/go-pipe v1.0.1/go.mod h1:/GvNLA516QlfGGMtfv4PC/5/CdzL9X4af/AJYhmLD54= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= -go.uber.org/goleak v1.1.12 h1:gZAh5/EyT/HQwlpkCy6wTpqfH9H8Lz8zbm3dZh+OyzA= -go.uber.org/goleak v1.1.12/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= +go.uber.org/goleak v1.2.0 h1:xqgm/S+aQvhWFTtR0XK3Jvg7z8kGV8P4X14IzwN3Eqk= +go.uber.org/goleak v1.2.0/go.mod h1:XJYK+MuIchqpmGmUSAzotztawfKvYLUIgg7guXrwVUo= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/lint v0.0.0-20190930215403-16217165b5de h1:5hukYrvBGR8/eNkX5mdUezrA6JiaEZDtJb9Ei+1LlBs= golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= @@ -28,8 +35,9 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20210220032951-036812b2e83c h1:5KslGYwFpkhGh+Q16bwMP3cOontH8FOep7tGV86Y7SQ= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -41,7 +49,6 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.1.5 h1:ouewzE6p+/VEB31YYnTbEJdi8pFqKp4P4n85vwo3DHA= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -49,5 +56,6 @@ golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8T gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= From 36e307e503182d987c8487e7f590cede24a94d48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20=C3=81ngel=20Pastor=20Olivar?= <migue@github.com> Date: Tue, 29 Nov 2022 09:48:05 +0100 Subject: [PATCH 04/28] Update the source code to use the abstraction provided by the go-pipe dependency --- git/batch_obj_iter.go | 2 +- git/obj_iter.go | 2 +- git/ref_iter.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/git/batch_obj_iter.go b/git/batch_obj_iter.go index ee17337..05c6928 100644 --- a/git/batch_obj_iter.go +++ b/git/batch_obj_iter.go @@ -6,7 +6,7 @@ import ( "fmt" "io" - "github.com/github/git-sizer/internal/pipe" + "github.com/github/go-pipe/pipe" ) type ObjectRecord struct { diff --git a/git/obj_iter.go b/git/obj_iter.go index 268280b..cecdc2a 100644 --- a/git/obj_iter.go +++ b/git/obj_iter.go @@ -6,7 +6,7 @@ import ( "fmt" "io" - "github.com/github/git-sizer/internal/pipe" + "github.com/github/go-pipe/pipe" ) // ObjectIter iterates over objects in a Git repository. diff --git a/git/ref_iter.go b/git/ref_iter.go index 955499b..74e8415 100644 --- a/git/ref_iter.go +++ b/git/ref_iter.go @@ -6,7 +6,7 @@ import ( "fmt" "io" - "github.com/github/git-sizer/internal/pipe" + "github.com/github/go-pipe/pipe" ) // ReferenceIter is an iterator that interates over references. From 35f14cc271d73f65f049fb2fae64d6d736b27592 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20=C3=81ngel=20Pastor=20Olivar?= <migue@github.com> Date: Tue, 29 Nov 2022 13:40:06 +0100 Subject: [PATCH 05/28] Use the main branch in go-pipe If everything works as expected we will tag 1.0.2 in go-pipe --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 0cba7b5..ddb40e0 100644 --- a/go.mod +++ b/go.mod @@ -10,7 +10,7 @@ require ( golang.org/x/sync v0.1.0 // indirect ) -require github.com/github/go-pipe v1.0.1 +require github.com/github/go-pipe v1.0.2-0.20221129123738-3f37633cc05b require ( github.com/kr/pretty v0.1.0 // indirect diff --git a/go.sum b/go.sum index f17f036..349c471 100644 --- a/go.sum +++ b/go.sum @@ -4,8 +4,8 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/github/go-pipe v1.0.1 h1:EX3LIhOueA/GptjP8KoIQh/2tvKqKY5nn3QI5adpa+M= -github.com/github/go-pipe v1.0.1/go.mod h1:/GvNLA516QlfGGMtfv4PC/5/CdzL9X4af/AJYhmLD54= +github.com/github/go-pipe v1.0.2-0.20221129123738-3f37633cc05b h1:aPBpibgZUa5ITdtlMWWgkv9VrYAk5Kpd2Djo0E2Bo2w= +github.com/github/go-pipe v1.0.2-0.20221129123738-3f37633cc05b/go.mod h1:/GvNLA516QlfGGMtfv4PC/5/CdzL9X4af/AJYhmLD54= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= From 842cb88a3eac240a8b1c45f2eaaa2c60bf419534 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20=C3=81ngel=20Pastor=20Olivar?= <migue@github.com> Date: Tue, 29 Nov 2022 13:45:29 +0100 Subject: [PATCH 06/28] Update go-pipe to v1.0.2. This new versions fixes the compilation problems found in Windows --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index ddb40e0..9db294d 100644 --- a/go.mod +++ b/go.mod @@ -10,7 +10,7 @@ require ( golang.org/x/sync v0.1.0 // indirect ) -require github.com/github/go-pipe v1.0.2-0.20221129123738-3f37633cc05b +require github.com/github/go-pipe v1.0.2 require ( github.com/kr/pretty v0.1.0 // indirect diff --git a/go.sum b/go.sum index 349c471..5c5d0a9 100644 --- a/go.sum +++ b/go.sum @@ -4,8 +4,8 @@ github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ3 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/github/go-pipe v1.0.2-0.20221129123738-3f37633cc05b h1:aPBpibgZUa5ITdtlMWWgkv9VrYAk5Kpd2Djo0E2Bo2w= -github.com/github/go-pipe v1.0.2-0.20221129123738-3f37633cc05b/go.mod h1:/GvNLA516QlfGGMtfv4PC/5/CdzL9X4af/AJYhmLD54= +github.com/github/go-pipe v1.0.2 h1:befTXflsc6ir/h9f6Q7QCDmfojoBswD1MfQrPhmmSoA= +github.com/github/go-pipe v1.0.2/go.mod h1:/GvNLA516QlfGGMtfv4PC/5/CdzL9X4af/AJYhmLD54= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= From 6238db186d89690d02f4a1ebfd9c499eff0a9b9e Mon Sep 17 00:00:00 2001 From: Michael Haggerty <mhagger@github.com> Date: Sat, 3 Dec 2022 15:20:38 +0100 Subject: [PATCH 07/28] git.Repository: invoke `git config` more backwards-compatibly Some `git-config` options that we were using (`--default`, `--type=bool`, and `--type=int`) were only added in git 2.18, released 2018-06-21. This means that some fairly recent platforms, like Ubuntu 18.04 "bionic", don't have those features in their default `git`. Change `git.Repository` to invoke `git config` without using those newer options. --- git/gitconfig.go | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/git/gitconfig.go b/git/gitconfig.go index d3378ae..76b8422 100644 --- a/git/gitconfig.go +++ b/git/gitconfig.go @@ -4,6 +4,7 @@ import ( "bytes" "errors" "fmt" + "os/exec" "strconv" "strings" ) @@ -114,14 +115,18 @@ func configKeyMatchesPrefix(key, prefix string) (bool, string) { } func (repo *Repository) ConfigStringDefault(key string, defaultValue string) (string, error) { + // Note that `git config --get` didn't get `--default` until Git + // 2.18 (released 2018-06-21). cmd := repo.GitCommand( - "config", - "--default", defaultValue, - key, + "config", "--get", key, ) out, err := cmd.Output() if err != nil { + if err, ok := err.(*exec.ExitError); ok && err.ExitCode() == 1 { + // This indicates that the value was not found. + return defaultValue, nil + } return defaultValue, fmt.Errorf("running 'git config': %w", err) } @@ -133,15 +138,18 @@ func (repo *Repository) ConfigStringDefault(key string, defaultValue string) (st } func (repo *Repository) ConfigBoolDefault(key string, defaultValue bool) (bool, error) { + // Note that `git config --get` didn't get `--type=bool` or + // `--default` until Git 2.18 (released 2018-06-21). cmd := repo.GitCommand( - "config", - "--type", "bool", - "--default", strconv.FormatBool(defaultValue), - key, + "config", "--get", "--bool", key, ) out, err := cmd.Output() if err != nil { + if err, ok := err.(*exec.ExitError); ok && err.ExitCode() == 1 { + // This indicates that the value was not found. + return defaultValue, nil + } return defaultValue, fmt.Errorf("running 'git config': %w", err) } @@ -155,15 +163,18 @@ func (repo *Repository) ConfigBoolDefault(key string, defaultValue bool) (bool, } func (repo *Repository) ConfigIntDefault(key string, defaultValue int) (int, error) { + // Note that `git config --get` didn't get `--type=int` or + // `--default` until Git 2.18 (released 2018-06-21). cmd := repo.GitCommand( - "config", - "--type", "int", - "--default", strconv.Itoa(defaultValue), - key, + "config", "--get", "--int", key, ) out, err := cmd.Output() if err != nil { + if err, ok := err.(*exec.ExitError); ok && err.ExitCode() == 1 { + // This indicates that the value was not found. + return defaultValue, nil + } return defaultValue, fmt.Errorf("running 'git config': %w", err) } From 9ed78b17d5dd41a9525bd6a57b2dc321b806f265 Mon Sep 17 00:00:00 2001 From: Michael Haggerty <mhagger@github.com> Date: Mon, 14 Aug 2023 14:03:17 +0200 Subject: [PATCH 08/28] Graph.rg: remove member It wasn't used. --- sizes/graph.go | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sizes/graph.go b/sizes/graph.go index 7e923f6..9187907 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -69,7 +69,7 @@ func ScanRepositoryUsingGraph( ctx, cancel := context.WithCancel(context.TODO()) defer cancel() - graph := NewGraph(rg, nameStyle) + graph := NewGraph(nameStyle) refIter, err := repo.NewReferenceIter(ctx) if err != nil { @@ -337,8 +337,6 @@ func ScanRepositoryUsingGraph( // Graph is an object graph that is being built up. type Graph struct { - rg RefGrouper - blobLock sync.Mutex blobSizes map[git.OID]BlobSize @@ -361,10 +359,8 @@ type Graph struct { } // NewGraph creates and returns a new `*Graph` instance. -func NewGraph(rg RefGrouper, nameStyle NameStyle) *Graph { +func NewGraph(nameStyle NameStyle) *Graph { return &Graph{ - rg: rg, - blobSizes: make(map[git.OID]BlobSize), treeRecords: make(map[git.OID]*treeRecord), From 559b030c9aa7b8fbc8803863e20aae4a720cbb18 Mon Sep 17 00:00:00 2001 From: Michael Haggerty <mhagger@github.com> Date: Mon, 14 Aug 2023 16:57:25 +0200 Subject: [PATCH 09/28] Collect references before starting the object traversal This provides a better separation of concerns, which will be taken advantage of shortly. --- sizes/graph.go | 79 +++++----------------------------------------- sizes/grouper.go | 82 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 71 deletions(-) create mode 100644 sizes/grouper.go diff --git a/sizes/graph.go b/sizes/graph.go index 9187907..a56cbc2 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -11,50 +11,6 @@ import ( "github.com/github/git-sizer/meter" ) -// RefGroupSymbol is the string "identifier" that is used to refer to -// a refgroup, for example in the gitconfig. Nesting of refgroups is -// inferred from their names, using "." as separator between -// components. For example, if there are three refgroups with symbols -// "tags", "tags.releases", and "foo.bar", then "tags.releases" is -// considered to be nested within "tags", and "foo.bar" is considered -// to be nested within "foo", the latter being created automatically -// if it was not configured explicitly. -type RefGroupSymbol string - -// RefGroup is a group of references, for example "branches" or -// "tags". Reference groups might overlap. -type RefGroup struct { - // Symbol is the unique string by which this `RefGroup` is - // identified and configured. It consists of dot-separated - // components, which implicitly makes a nested tree-like - // structure. - Symbol RefGroupSymbol - - // Name is the name for this `ReferenceGroup` to be presented - // in user-readable output. - Name string -} - -// RefGrouper describes a type that can collate reference names into -// groups and decide which ones to walk. -type RefGrouper interface { - // Categorize tells whether `refname` should be walked at all, - // and if so, the symbols of the reference groups to which it - // belongs. - Categorize(refname string) (bool, []RefGroupSymbol) - - // Groups returns the list of `ReferenceGroup`s, in the order - // that they should be presented. The return value might - // depend on which references have been seen so far. - Groups() []RefGroup -} - -type refSeen struct { - git.Reference - walked bool - groups []RefGroupSymbol -} - // ScanRepositoryUsingGraph scans `repo`, using `rg` to decide which // references to scan and how to group them. `nameStyle` specifies // whether the output should include full names, hashes only, or @@ -71,9 +27,9 @@ func ScanRepositoryUsingGraph( graph := NewGraph(nameStyle) - refIter, err := repo.NewReferenceIter(ctx) + refsSeen, err := CollectReferences(ctx, repo, rg) if err != nil { - return HistorySize{}, err + return HistorySize{}, fmt.Errorf("reading references: %w", err) } objIter, err := repo.NewObjectIter(context.TODO()) @@ -82,41 +38,22 @@ func ScanRepositoryUsingGraph( } errChan := make(chan error, 1) - var refsSeen []refSeen - // Feed the references that we want into the stdin of the object - // iterator: + // Feed the references that we want to walk into the stdin of the + // object iterator: go func() { defer objIter.Close() errChan <- func() error { - for { - ref, ok, err := refIter.Next() - if err != nil { - return err - } - if !ok { - return nil - } - - walk, groups := rg.Categorize(ref.Refname) - - refsSeen = append( - refsSeen, - refSeen{ - Reference: ref, - walked: walk, - groups: groups, - }, - ) - - if !walk { + for _, refSeen := range refsSeen { + if !refSeen.walked { continue } - if err := objIter.AddRoot(ref.OID); err != nil { + if err := objIter.AddRoot(refSeen.OID); err != nil { return err } } + return nil }() }() diff --git a/sizes/grouper.go b/sizes/grouper.go new file mode 100644 index 0000000..a5b8a26 --- /dev/null +++ b/sizes/grouper.go @@ -0,0 +1,82 @@ +package sizes + +import ( + "context" + + "github.com/github/git-sizer/git" +) + +// RefGroupSymbol is the string "identifier" that is used to refer to +// a refgroup, for example in the gitconfig. Nesting of refgroups is +// inferred from their names, using "." as separator between +// components. For example, if there are three refgroups with symbols +// "tags", "tags.releases", and "foo.bar", then "tags.releases" is +// considered to be nested within "tags", and "foo.bar" is considered +// to be nested within "foo", the latter being created automatically +// if it was not configured explicitly. +type RefGroupSymbol string + +// RefGroup is a group of references, for example "branches" or +// "tags". Reference groups might overlap. +type RefGroup struct { + // Symbol is the unique string by which this `RefGroup` is + // identified and configured. It consists of dot-separated + // components, which implicitly makes a nested tree-like + // structure. + Symbol RefGroupSymbol + + // Name is the name for this `ReferenceGroup` to be presented + // in user-readable output. + Name string +} + +// RefGrouper describes a type that can collate reference names into +// groups and decide which ones to walk. +type RefGrouper interface { + // Categorize tells whether `refname` should be walked at all, + // and if so, the symbols of the reference groups to which it + // belongs. + Categorize(refname string) (bool, []RefGroupSymbol) + + // Groups returns the list of `ReferenceGroup`s, in the order + // that they should be presented. The return value might + // depend on which references have been seen so far. + Groups() []RefGroup +} + +type refSeen struct { + git.Reference + walked bool + groups []RefGroupSymbol +} + +func CollectReferences( + ctx context.Context, repo *git.Repository, rg RefGrouper, +) ([]refSeen, error) { + refIter, err := repo.NewReferenceIter(ctx) + if err != nil { + return nil, err + } + + var refsSeen []refSeen + for { + ref, ok, err := refIter.Next() + if err != nil { + return nil, err + } + if !ok { + return refsSeen, nil + } + + walk, groups := rg.Categorize(ref.Refname) + + refsSeen = append( + refsSeen, + refSeen{ + Reference: ref, + walked: walk, + groups: groups, + }, + ) + } +} From fdfa791791c392324ec0cde0e42d070f6c9b96c3 Mon Sep 17 00:00:00 2001 From: Michael Haggerty <mhagger@github.com> Date: Mon, 14 Aug 2023 17:57:31 +0200 Subject: [PATCH 10/28] ScanRepositoryUsingGraph(): take a context argument --- git-sizer.go | 10 +++++++--- git_sizer_test.go | 11 ++++++----- sizes/graph.go | 6 ++---- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index d1e075c..6c9e7a3 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -1,6 +1,7 @@ package main import ( + "context" "encoding/json" "errors" "fmt" @@ -93,14 +94,17 @@ var ReleaseVersion string var BuildVersion string func main() { - err := mainImplementation(os.Stdout, os.Stderr, os.Args[1:]) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + err := mainImplementation(ctx, os.Stdout, os.Stderr, os.Args[1:]) if err != nil { fmt.Fprintf(os.Stderr, "error: %s\n", err) os.Exit(1) } } -func mainImplementation(stdout, stderr io.Writer, args []string) error { +func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []string) error { var nameStyle sizes.NameStyle = sizes.NameStyleFull var cpuprofile string var jsonOutput bool @@ -288,7 +292,7 @@ func mainImplementation(stdout, stderr io.Writer, args []string) error { progressMeter = meter.NewProgressMeter(stderr, 100*time.Millisecond) } - historySize, err := sizes.ScanRepositoryUsingGraph(repo, rg, nameStyle, progressMeter) + historySize, err := sizes.ScanRepositoryUsingGraph(ctx, repo, rg, nameStyle, progressMeter) if err != nil { return fmt.Errorf("error scanning repository: %w", err) } diff --git a/git_sizer_test.go b/git_sizer_test.go index 6ab132f..b08985b 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -2,6 +2,7 @@ package main_test import ( "bytes" + "context" "encoding/json" "fmt" "io" @@ -563,7 +564,7 @@ func TestBomb(t *testing.T) { newGitBomb(t, repo, 10, 10, "boom!\n") h, err := sizes.ScanRepositoryUsingGraph( - repo.Repository(t), + context.Background(), repo.Repository(t), refGrouper{}, sizes.NameStyleFull, meter.NoProgressMeter, ) require.NoError(t, err) @@ -636,7 +637,7 @@ func TestTaggedTags(t *testing.T) { require.NoError(t, cmd.Run(), "creating tag 3") h, err := sizes.ScanRepositoryUsingGraph( - repo.Repository(t), + context.Background(), repo.Repository(t), refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") @@ -658,7 +659,7 @@ func TestFromSubdir(t *testing.T) { require.NoError(t, cmd.Run(), "creating commit") h, err := sizes.ScanRepositoryUsingGraph( - repo.Repository(t), + context.Background(), repo.Repository(t), refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") @@ -711,7 +712,7 @@ func TestSubmodule(t *testing.T) { // Analyze the main repo: h, err := sizes.ScanRepositoryUsingGraph( - mainRepo.Repository(t), + context.Background(), mainRepo.Repository(t), refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") @@ -724,7 +725,7 @@ func TestSubmodule(t *testing.T) { Path: filepath.Join(mainRepo.Path, "sub"), } h, err = sizes.ScanRepositoryUsingGraph( - submRepo2.Repository(t), + context.Background(), submRepo2.Repository(t), refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") diff --git a/sizes/graph.go b/sizes/graph.go index a56cbc2..1b908cc 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -19,12 +19,10 @@ import ( // // It returns the size data for the repository. func ScanRepositoryUsingGraph( + ctx context.Context, repo *git.Repository, rg RefGrouper, nameStyle NameStyle, progressMeter meter.Progress, ) (HistorySize, error) { - ctx, cancel := context.WithCancel(context.TODO()) - defer cancel() - graph := NewGraph(nameStyle) refsSeen, err := CollectReferences(ctx, repo, rg) @@ -32,7 +30,7 @@ func ScanRepositoryUsingGraph( return HistorySize{}, fmt.Errorf("reading references: %w", err) } - objIter, err := repo.NewObjectIter(context.TODO()) + objIter, err := repo.NewObjectIter(ctx) if err != nil { return HistorySize{}, err } From 1a2c0b51069b8eedecac2fccf532b7e6da11a1d3 Mon Sep 17 00:00:00 2001 From: Michael Haggerty <mhagger@github.com> Date: Mon, 14 Aug 2023 18:00:39 +0200 Subject: [PATCH 11/28] refSeen: make type and its members public and rename it to `RefRoot` --- sizes/graph.go | 12 ++++++------ sizes/grouper.go | 16 ++++++++-------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/sizes/graph.go b/sizes/graph.go index 1b908cc..59a6365 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -25,7 +25,7 @@ func ScanRepositoryUsingGraph( ) (HistorySize, error) { graph := NewGraph(nameStyle) - refsSeen, err := CollectReferences(ctx, repo, rg) + refRoots, err := CollectReferences(ctx, repo, rg) if err != nil { return HistorySize{}, fmt.Errorf("reading references: %w", err) } @@ -42,12 +42,12 @@ func ScanRepositoryUsingGraph( defer objIter.Close() errChan <- func() error { - for _, refSeen := range refsSeen { - if !refSeen.walked { + for _, refRoot := range refRoots { + if !refRoot.Walk { continue } - if err := objIter.AddRoot(refSeen.OID); err != nil { + if err := objIter.AddRoot(refRoot.OID); err != nil { return err } } @@ -261,9 +261,9 @@ func ScanRepositoryUsingGraph( } progressMeter.Start("Processing references: %d") - for _, refSeen := range refsSeen { + for _, refRoot := range refRoots { progressMeter.Inc() - graph.RegisterReference(refSeen.Reference, refSeen.walked, refSeen.groups) + graph.RegisterReference(refRoot.Reference, refRoot.Walk, refRoot.Groups) } progressMeter.Done() diff --git a/sizes/grouper.go b/sizes/grouper.go index a5b8a26..3807b0e 100644 --- a/sizes/grouper.go +++ b/sizes/grouper.go @@ -44,21 +44,21 @@ type RefGrouper interface { Groups() []RefGroup } -type refSeen struct { +type RefRoot struct { git.Reference - walked bool - groups []RefGroupSymbol + Walk bool + Groups []RefGroupSymbol } func CollectReferences( ctx context.Context, repo *git.Repository, rg RefGrouper, -) ([]refSeen, error) { +) ([]RefRoot, error) { refIter, err := repo.NewReferenceIter(ctx) if err != nil { return nil, err } - var refsSeen []refSeen + var refsSeen []RefRoot for { ref, ok, err := refIter.Next() if err != nil { @@ -72,10 +72,10 @@ func CollectReferences( refsSeen = append( refsSeen, - refSeen{ + RefRoot{ Reference: ref, - walked: walk, - groups: groups, + Walk: walk, + Groups: groups, }, ) } From 757866b5adda4d0cff52d917d48eab0dc92275ae Mon Sep 17 00:00:00 2001 From: Michael Haggerty <mhagger@github.com> Date: Mon, 14 Aug 2023 18:13:34 +0200 Subject: [PATCH 12/28] ScanRepositoryUsingGraph(): take a list of `RefRoot`s as argument --- git-sizer.go | 9 +++- git_sizer_test.go | 110 ++++++++++++++++++++++++++++++---------------- sizes/graph.go | 7 +-- 3 files changed, 81 insertions(+), 45 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index 6c9e7a3..0336d13 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -292,7 +292,14 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st progressMeter = meter.NewProgressMeter(stderr, 100*time.Millisecond) } - historySize, err := sizes.ScanRepositoryUsingGraph(ctx, repo, rg, nameStyle, progressMeter) + refRoots, err := sizes.CollectReferences(ctx, repo, rg) + if err != nil { + return fmt.Errorf("determining which reference to scan: %w", err) + } + + historySize, err := sizes.ScanRepositoryUsingGraph( + ctx, repo, refRoots, nameStyle, progressMeter, + ) if err != nil { return fmt.Errorf("error scanning repository: %w", err) } diff --git a/git_sizer_test.go b/git_sizer_test.go index b08985b..54d90d5 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -558,14 +558,21 @@ func (rg refGrouper) Groups() []sizes.RefGroup { func TestBomb(t *testing.T) { t.Parallel() - repo := testutils.NewTestRepo(t, true, "bomb") - t.Cleanup(func() { repo.Remove(t) }) + ctx := context.Background() + + testRepo := testutils.NewTestRepo(t, true, "bomb") + t.Cleanup(func() { testRepo.Remove(t) }) + + newGitBomb(t, testRepo, 10, 10, "boom!\n") - newGitBomb(t, repo, 10, 10, "boom!\n") + repo := testRepo.Repository(t) + + refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) h, err := sizes.ScanRepositoryUsingGraph( - context.Background(), repo.Repository(t), - refGrouper{}, sizes.NameStyleFull, meter.NoProgressMeter, + ctx, repo, + refRoots, sizes.NameStyleFull, meter.NoProgressMeter, ) require.NoError(t, err) @@ -613,32 +620,39 @@ func TestBomb(t *testing.T) { func TestTaggedTags(t *testing.T) { t.Parallel() - repo := testutils.NewTestRepo(t, false, "tagged-tags") - defer repo.Remove(t) + ctx := context.Background() + + testRepo := testutils.NewTestRepo(t, false, "tagged-tags") + defer testRepo.Remove(t) timestamp := time.Unix(1112911993, 0) - cmd := repo.GitCommand(t, "commit", "-m", "initial", "--allow-empty") + cmd := testRepo.GitCommand(t, "commit", "-m", "initial", "--allow-empty") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating commit") // The lexicographical order of these tags is important, hence // their strange names. - cmd = repo.GitCommand(t, "tag", "-m", "tag 1", "tag", "master") + cmd = testRepo.GitCommand(t, "tag", "-m", "tag 1", "tag", "master") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 1") - cmd = repo.GitCommand(t, "tag", "-m", "tag 2", "bag", "tag") + cmd = testRepo.GitCommand(t, "tag", "-m", "tag 2", "bag", "tag") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 2") - cmd = repo.GitCommand(t, "tag", "-m", "tag 3", "wag", "bag") + cmd = testRepo.GitCommand(t, "tag", "-m", "tag 3", "wag", "bag") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating tag 3") + repo := testRepo.Repository(t) + + refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) + h, err := sizes.ScanRepositoryUsingGraph( - context.Background(), repo.Repository(t), - refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, + context.Background(), repo, + refRoots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(3), h.MaxTagDepth, "tag depth") @@ -647,20 +661,27 @@ func TestTaggedTags(t *testing.T) { func TestFromSubdir(t *testing.T) { t.Parallel() - repo := testutils.NewTestRepo(t, false, "subdir") - defer repo.Remove(t) + ctx := context.Background() + + testRepo := testutils.NewTestRepo(t, false, "subdir") + defer testRepo.Remove(t) timestamp := time.Unix(1112911993, 0) - repo.AddFile(t, "subdir/file.txt", "Hello, world!\n") + testRepo.AddFile(t, "subdir/file.txt", "Hello, world!\n") - cmd := repo.GitCommand(t, "commit", "-m", "initial") + cmd := testRepo.GitCommand(t, "commit", "-m", "initial") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating commit") + repo := testRepo.Repository(t) + + refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) + h, err := sizes.ScanRepositoryUsingGraph( - context.Background(), repo.Repository(t), - refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, + context.Background(), testRepo.Repository(t), + refRoots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.MaxPathDepth, "max path depth") @@ -669,6 +690,8 @@ func TestFromSubdir(t *testing.T) { func TestSubmodule(t *testing.T) { t.Parallel() + ctx := context.Background() + tmp, err := ioutil.TempDir("", "submodule") require.NoError(t, err, "creating temporary directory") @@ -678,42 +701,47 @@ func TestSubmodule(t *testing.T) { timestamp := time.Unix(1112911993, 0) - submRepo := testutils.TestRepo{ + submTestRepo := testutils.TestRepo{ Path: filepath.Join(tmp, "subm"), } - submRepo.Init(t, false) - submRepo.AddFile(t, "submfile1.txt", "Hello, submodule!\n") - submRepo.AddFile(t, "submfile2.txt", "Hello again, submodule!\n") - submRepo.AddFile(t, "submfile3.txt", "Hello again, submodule!\n") + submTestRepo.Init(t, false) + submTestRepo.AddFile(t, "submfile1.txt", "Hello, submodule!\n") + submTestRepo.AddFile(t, "submfile2.txt", "Hello again, submodule!\n") + submTestRepo.AddFile(t, "submfile3.txt", "Hello again, submodule!\n") - cmd := submRepo.GitCommand(t, "commit", "-m", "subm initial") + cmd := submTestRepo.GitCommand(t, "commit", "-m", "subm initial") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating subm commit") - mainRepo := testutils.TestRepo{ + mainTestRepo := testutils.TestRepo{ Path: filepath.Join(tmp, "main"), } - mainRepo.Init(t, false) + mainTestRepo.Init(t, false) - mainRepo.AddFile(t, "mainfile.txt", "Hello, main!\n") + mainTestRepo.AddFile(t, "mainfile.txt", "Hello, main!\n") - cmd = mainRepo.GitCommand(t, "commit", "-m", "main initial") + cmd = mainTestRepo.GitCommand(t, "commit", "-m", "main initial") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "creating main commit") // Make subm a submodule of main: - cmd = mainRepo.GitCommand(t, "-c", "protocol.file.allow=always", "submodule", "add", submRepo.Path, "sub") - cmd.Dir = mainRepo.Path + cmd = mainTestRepo.GitCommand(t, "-c", "protocol.file.allow=always", "submodule", "add", submTestRepo.Path, "sub") + cmd.Dir = mainTestRepo.Path require.NoError(t, cmd.Run(), "adding submodule") - cmd = mainRepo.GitCommand(t, "commit", "-m", "add submodule") + cmd = mainTestRepo.GitCommand(t, "commit", "-m", "add submodule") testutils.AddAuthorInfo(cmd, ×tamp) require.NoError(t, cmd.Run(), "committing submodule to main") + mainRepo := mainTestRepo.Repository(t) + + mainRefRoots, err := sizes.CollectReferences(ctx, mainRepo, refGrouper{}) + require.NoError(t, err) + // Analyze the main repo: h, err := sizes.ScanRepositoryUsingGraph( - context.Background(), mainRepo.Repository(t), - refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, + context.Background(), mainTestRepo.Repository(t), + mainRefRoots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") @@ -721,12 +749,18 @@ func TestSubmodule(t *testing.T) { assert.Equal(t, counts.Count32(1), h.MaxExpandedSubmoduleCount, "max expanded submodule count") // Analyze the submodule: - submRepo2 := testutils.TestRepo{ - Path: filepath.Join(mainRepo.Path, "sub"), + submTestRepo2 := testutils.TestRepo{ + Path: filepath.Join(mainTestRepo.Path, "sub"), } + + submRepo2 := submTestRepo2.Repository(t) + + submRefRoots2, err := sizes.CollectReferences(ctx, submRepo2, refGrouper{}) + require.NoError(t, err) + h, err = sizes.ScanRepositoryUsingGraph( - context.Background(), submRepo2.Repository(t), - refGrouper{}, sizes.NameStyleNone, meter.NoProgressMeter, + context.Background(), submRepo2, + submRefRoots2, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") diff --git a/sizes/graph.go b/sizes/graph.go index 59a6365..e9033ef 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -20,16 +20,11 @@ import ( // It returns the size data for the repository. func ScanRepositoryUsingGraph( ctx context.Context, - repo *git.Repository, rg RefGrouper, nameStyle NameStyle, + repo *git.Repository, refRoots []RefRoot, nameStyle NameStyle, progressMeter meter.Progress, ) (HistorySize, error) { graph := NewGraph(nameStyle) - refRoots, err := CollectReferences(ctx, repo, rg) - if err != nil { - return HistorySize{}, fmt.Errorf("reading references: %w", err) - } - objIter, err := repo.NewObjectIter(ctx) if err != nil { return HistorySize{}, err From 897baa1a96585fbc44238d0a536c92bf8a11f3ec Mon Sep 17 00:00:00 2001 From: Michael Haggerty <mhagger@github.com> Date: Mon, 14 Aug 2023 18:28:54 +0200 Subject: [PATCH 13/28] RefRoot: add some methods We want to add another type of root, so start the virtualization process. --- sizes/graph.go | 6 +++--- sizes/grouper.go | 17 +++++++++++------ 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/sizes/graph.go b/sizes/graph.go index e9033ef..660f682 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -38,11 +38,11 @@ func ScanRepositoryUsingGraph( errChan <- func() error { for _, refRoot := range refRoots { - if !refRoot.Walk { + if !refRoot.Walk() { continue } - if err := objIter.AddRoot(refRoot.OID); err != nil { + if err := objIter.AddRoot(refRoot.OID()); err != nil { return err } } @@ -258,7 +258,7 @@ func ScanRepositoryUsingGraph( progressMeter.Start("Processing references: %d") for _, refRoot := range refRoots { progressMeter.Inc() - graph.RegisterReference(refRoot.Reference, refRoot.Walk, refRoot.Groups) + graph.RegisterReference(refRoot.Reference(), refRoot.Walk(), refRoot.Groups()) } progressMeter.Done() diff --git a/sizes/grouper.go b/sizes/grouper.go index 3807b0e..32d63ca 100644 --- a/sizes/grouper.go +++ b/sizes/grouper.go @@ -45,11 +45,16 @@ type RefGrouper interface { } type RefRoot struct { - git.Reference - Walk bool - Groups []RefGroupSymbol + ref git.Reference + walk bool + groups []RefGroupSymbol } +func (rr RefRoot) OID() git.OID { return rr.ref.OID } +func (rr RefRoot) Reference() git.Reference { return rr.ref } +func (rr RefRoot) Walk() bool { return rr.walk } +func (rr RefRoot) Groups() []RefGroupSymbol { return rr.groups } + func CollectReferences( ctx context.Context, repo *git.Repository, rg RefGrouper, ) ([]RefRoot, error) { @@ -73,9 +78,9 @@ func CollectReferences( refsSeen = append( refsSeen, RefRoot{ - Reference: ref, - Walk: walk, - Groups: groups, + ref: ref, + walk: walk, + groups: groups, }, ) } From 9e8b14fe3012f05c163ffdf79a32bcb2b48ea422 Mon Sep 17 00:00:00 2001 From: Michael Haggerty <mhagger@github.com> Date: Mon, 14 Aug 2023 20:14:59 +0200 Subject: [PATCH 14/28] Allow arbitrary reachability roots to be fed in Instead of only traversing objects starting at references, allow the user to specify explicit Git objects via the command line. In that case, the traversal includes objects reachable from those objects. --- git-sizer.go | 50 ++++++-- git/obj_resolver.go | 20 +++ git/ref_filter.go | 16 ++- git_sizer_test.go | 178 ++++++++++++++++++-------- internal/refopts/ref_group_builder.go | 9 +- sizes/explicit_root.go | 19 +++ sizes/graph.go | 41 ++++-- sizes/grouper.go | 1 + sizes/path_resolver.go | 60 +++++---- 9 files changed, 290 insertions(+), 104 deletions(-) create mode 100644 git/obj_resolver.go create mode 100644 sizes/explicit_root.go diff --git a/git-sizer.go b/git-sizer.go index 0336d13..7cfd6ff 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -20,7 +20,9 @@ import ( "github.com/github/git-sizer/sizes" ) -const usage = `usage: git-sizer [OPTS] +const usage = `usage: git-sizer [OPTS] [ROOT...] + + Scan objects in your Git repository and emit statistics about them. --threshold THRESHOLD minimum level of concern (i.e., number of stars) that should be reported. Default: @@ -46,12 +48,29 @@ const usage = `usage: git-sizer [OPTS] be set via gitconfig: 'sizer.progress'. --version only report the git-sizer version number + Object selection: + + git-sizer traverses through your Git history to find objects to + process. By default, it processes all objects that are reachable from + any reference. You can tell it to process only some of your + references; see "Reference selection" below. + + If explicit ROOTs are specified on the command line, each one should + be a string that 'git rev-parse' can convert into a single Git object + ID, like 'main', 'main~:src', or an abbreviated SHA-1. See + git-rev-parse(1) for details. In that case, git-sizer also treats + those objects as starting points for its traversal, and also includes + the Git objects that are reachable from those roots in the analysis. + + As a special case, if one or more ROOTs are specified on the command + line but _no_ reference selection options, then _only_ the specified + ROOTs are traversed, and no references. + Reference selection: - By default, git-sizer processes all Git objects that are reachable - from any reference. The following options can be used to limit which - references to process. The last rule matching a reference determines - whether that reference is processed. + The following options can be used to limit which references to + process. The last rule matching a reference determines whether that + reference is processed. --[no-]branches process [don't process] branches --[no-]tags process [don't process] tags @@ -220,10 +239,6 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st return nil } - if len(flags.Args()) != 0 { - return errors.New("excess arguments") - } - if repoErr != nil { return fmt.Errorf("couldn't open Git repository: %w", repoErr) } @@ -277,7 +292,7 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st progress = v } - rg, err := rgb.Finish() + rg, err := rgb.Finish(len(flags.Args()) == 0) if err != nil { return err } @@ -297,8 +312,21 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st return fmt.Errorf("determining which reference to scan: %w", err) } + roots := make([]sizes.Root, 0, len(refRoots)+len(flags.Args())) + for _, refRoot := range refRoots { + roots = append(roots, refRoot) + } + + for _, arg := range flags.Args() { + oid, err := repo.ResolveObject(arg) + if err != nil { + return fmt.Errorf("resolving command-line argument %q: %w", arg, err) + } + roots = append(roots, sizes.NewExplicitRoot(arg, oid)) + } + historySize, err := sizes.ScanRepositoryUsingGraph( - ctx, repo, refRoots, nameStyle, progressMeter, + ctx, repo, roots, nameStyle, progressMeter, ) if err != nil { return fmt.Errorf("error scanning repository: %w", err) diff --git a/git/obj_resolver.go b/git/obj_resolver.go new file mode 100644 index 0000000..418e293 --- /dev/null +++ b/git/obj_resolver.go @@ -0,0 +1,20 @@ +package git + +import ( + "bytes" + "fmt" +) + +func (repo *Repository) ResolveObject(name string) (OID, error) { + cmd := repo.GitCommand("rev-parse", "--verify", "--end-of-options", name) + output, err := cmd.Output() + if err != nil { + return NullOID, fmt.Errorf("resolving object %q: %w", name, err) + } + oidString := string(bytes.TrimSpace(output)) + oid, err := NewOID(oidString) + if err != nil { + return NullOID, fmt.Errorf("parsing output %q from 'rev-parse': %w", oidString, err) + } + return oid, nil +} diff --git a/git/ref_filter.go b/git/ref_filter.go index 8eb8a9b..46aff66 100644 --- a/git/ref_filter.go +++ b/git/ref_filter.go @@ -83,15 +83,23 @@ func (_ allReferencesFilter) Filter(_ string) bool { var AllReferencesFilter allReferencesFilter +type noReferencesFilter struct{} + +func (_ noReferencesFilter) Filter(_ string) bool { + return false +} + +var NoReferencesFilter noReferencesFilter + // PrefixFilter returns a `ReferenceFilter` that matches references // whose names start with the specified `prefix`, which must match at // a component boundary. For example, // -// * Prefix "refs/foo" matches "refs/foo" and "refs/foo/bar" but not -// "refs/foobar". +// - Prefix "refs/foo" matches "refs/foo" and "refs/foo/bar" but not +// "refs/foobar". // -// * Prefix "refs/foo/" matches "refs/foo/bar" but not "refs/foo" or -// "refs/foobar". +// - Prefix "refs/foo/" matches "refs/foo/bar" but not "refs/foo" or +// "refs/foobar". func PrefixFilter(prefix string) ReferenceFilter { if prefix == "" { return AllReferencesFilter diff --git a/git_sizer_test.go b/git_sizer_test.go index 54d90d5..16d58c9 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -567,54 +567,112 @@ func TestBomb(t *testing.T) { repo := testRepo.Repository(t) - refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) - require.NoError(t, err) + t.Run("full", func(t *testing.T) { + refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) - h, err := sizes.ScanRepositoryUsingGraph( - ctx, repo, - refRoots, sizes.NameStyleFull, meter.NoProgressMeter, - ) - require.NoError(t, err) + roots := make([]sizes.Root, 0, len(refRoots)) + for _, refRoot := range refRoots { + roots = append(roots, refRoot) + } + + h, err := sizes.ScanRepositoryUsingGraph( + ctx, repo, roots, sizes.NameStyleFull, meter.NoProgressMeter, + ) + require.NoError(t, err) + + assert.Equal(t, counts.Count32(1), h.UniqueCommitCount, "unique commit count") + assert.Equal(t, counts.Count64(172), h.UniqueCommitSize, "unique commit size") + assert.Equal(t, counts.Count32(172), h.MaxCommitSize, "max commit size") + assert.Equal(t, "refs/heads/master", h.MaxCommitSizeCommit.BestPath(), "max commit size commit") + assert.Equal(t, counts.Count32(1), h.MaxHistoryDepth, "max history depth") + assert.Equal(t, counts.Count32(0), h.MaxParentCount, "max parent count") + assert.Equal(t, "refs/heads/master", h.MaxParentCountCommit.BestPath(), "max parent count commit") + + assert.Equal(t, counts.Count32(10), h.UniqueTreeCount, "unique tree count") + assert.Equal(t, counts.Count64(2910), h.UniqueTreeSize, "unique tree size") + assert.Equal(t, counts.Count64(100), h.UniqueTreeEntries, "unique tree entries") + assert.Equal(t, counts.Count32(10), h.MaxTreeEntries, "max tree entries") + assert.Equal(t, "refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0", h.MaxTreeEntriesTree.BestPath(), "max tree entries tree") + + assert.Equal(t, counts.Count32(1), h.UniqueBlobCount, "unique blob count") + assert.Equal(t, counts.Count64(6), h.UniqueBlobSize, "unique blob size") + assert.Equal(t, counts.Count32(6), h.MaxBlobSize, "max blob size") + assert.Equal(t, "refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0/f0", h.MaxBlobSizeBlob.BestPath(), "max blob size blob") + + assert.Equal(t, counts.Count32(0), h.UniqueTagCount, "unique tag count") + assert.Equal(t, counts.Count32(0), h.MaxTagDepth, "max tag depth") + + assert.Equal(t, counts.Count32(1), h.ReferenceCount, "reference count") + + assert.Equal(t, counts.Count32(10), h.MaxPathDepth, "max path depth") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxPathDepthTree.BestPath(), "max path depth tree") + assert.Equal(t, counts.Count32(29), h.MaxPathLength, "max path length") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxPathLengthTree.BestPath(), "max path length tree") + + assert.Equal(t, counts.Count32((pow(10, 10)-1)/(10-1)), h.MaxExpandedTreeCount, "max expanded tree count") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedTreeCountTree.BestPath(), "max expanded tree count tree") + assert.Equal(t, counts.Count32(0xffffffff), h.MaxExpandedBlobCount, "max expanded blob count") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedBlobCountTree.BestPath(), "max expanded blob count tree") + assert.Equal(t, counts.Count64(6*pow(10, 10)), h.MaxExpandedBlobSize, "max expanded blob size") + assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedBlobSizeTree.BestPath(), "max expanded blob size tree") + assert.Equal(t, counts.Count32(0), h.MaxExpandedLinkCount, "max expanded link count") + assert.Nil(t, h.MaxExpandedLinkCountTree, "max expanded link count tree") + assert.Equal(t, counts.Count32(0), h.MaxExpandedSubmoduleCount, "max expanded submodule count") + assert.Nil(t, h.MaxExpandedSubmoduleCountTree, "max expanded submodule count tree") + }) + + t.Run("partial", func(t *testing.T) { + name := "master:d0/d0" + oid, err := repo.ResolveObject(name) + require.NoError(t, err) + roots := []sizes.Root{sizes.NewExplicitRoot(name, oid)} - assert.Equal(t, counts.Count32(1), h.UniqueCommitCount, "unique commit count") - assert.Equal(t, counts.Count64(172), h.UniqueCommitSize, "unique commit size") - assert.Equal(t, counts.Count32(172), h.MaxCommitSize, "max commit size") - assert.Equal(t, "refs/heads/master", h.MaxCommitSizeCommit.Path(), "max commit size commit") - assert.Equal(t, counts.Count32(1), h.MaxHistoryDepth, "max history depth") - assert.Equal(t, counts.Count32(0), h.MaxParentCount, "max parent count") - assert.Equal(t, "refs/heads/master", h.MaxParentCountCommit.Path(), "max parent count commit") - - assert.Equal(t, counts.Count32(10), h.UniqueTreeCount, "unique tree count") - assert.Equal(t, counts.Count64(2910), h.UniqueTreeSize, "unique tree size") - assert.Equal(t, counts.Count64(100), h.UniqueTreeEntries, "unique tree entries") - assert.Equal(t, counts.Count32(10), h.MaxTreeEntries, "max tree entries") - assert.Equal(t, "refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0", h.MaxTreeEntriesTree.Path(), "max tree entries tree") - - assert.Equal(t, counts.Count32(1), h.UniqueBlobCount, "unique blob count") - assert.Equal(t, counts.Count64(6), h.UniqueBlobSize, "unique blob size") - assert.Equal(t, counts.Count32(6), h.MaxBlobSize, "max blob size") - assert.Equal(t, "refs/heads/master:d0/d0/d0/d0/d0/d0/d0/d0/d0/f0", h.MaxBlobSizeBlob.Path(), "max blob size blob") - - assert.Equal(t, counts.Count32(0), h.UniqueTagCount, "unique tag count") - assert.Equal(t, counts.Count32(0), h.MaxTagDepth, "max tag depth") - - assert.Equal(t, counts.Count32(1), h.ReferenceCount, "reference count") - - assert.Equal(t, counts.Count32(10), h.MaxPathDepth, "max path depth") - assert.Equal(t, "refs/heads/master^{tree}", h.MaxPathDepthTree.Path(), "max path depth tree") - assert.Equal(t, counts.Count32(29), h.MaxPathLength, "max path length") - assert.Equal(t, "refs/heads/master^{tree}", h.MaxPathLengthTree.Path(), "max path length tree") - - assert.Equal(t, counts.Count32((pow(10, 10)-1)/(10-1)), h.MaxExpandedTreeCount, "max expanded tree count") - assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedTreeCountTree.Path(), "max expanded tree count tree") - assert.Equal(t, counts.Count32(0xffffffff), h.MaxExpandedBlobCount, "max expanded blob count") - assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedBlobCountTree.Path(), "max expanded blob count tree") - assert.Equal(t, counts.Count64(6*pow(10, 10)), h.MaxExpandedBlobSize, "max expanded blob size") - assert.Equal(t, "refs/heads/master^{tree}", h.MaxExpandedBlobSizeTree.Path(), "max expanded blob size tree") - assert.Equal(t, counts.Count32(0), h.MaxExpandedLinkCount, "max expanded link count") - assert.Nil(t, h.MaxExpandedLinkCountTree, "max expanded link count tree") - assert.Equal(t, counts.Count32(0), h.MaxExpandedSubmoduleCount, "max expanded submodule count") - assert.Nil(t, h.MaxExpandedSubmoduleCountTree, "max expanded submodule count tree") + h, err := sizes.ScanRepositoryUsingGraph( + ctx, repo, roots, sizes.NameStyleFull, meter.NoProgressMeter, + ) + require.NoError(t, err) + + assert.Equal(t, counts.Count32(0), h.UniqueCommitCount, "unique commit count") + assert.Equal(t, counts.Count64(0), h.UniqueCommitSize, "unique commit size") + assert.Equal(t, counts.Count32(0), h.MaxCommitSize, "max commit size") + assert.Nil(t, h.MaxCommitSizeCommit) + assert.Equal(t, counts.Count32(0), h.MaxHistoryDepth, "max history depth") + assert.Equal(t, counts.Count32(0), h.MaxParentCount, "max parent count") + assert.Nil(t, h.MaxParentCountCommit, "max parent count commit") + + assert.Equal(t, counts.Count32(8), h.UniqueTreeCount, "unique tree count") + assert.Equal(t, counts.Count64(2330), h.UniqueTreeSize, "unique tree size") + assert.Equal(t, counts.Count64(80), h.UniqueTreeEntries, "unique tree entries") + assert.Equal(t, counts.Count32(10), h.MaxTreeEntries, "max tree entries") + assert.Equal(t, "master:d0/d0/d0/d0/d0/d0/d0/d0/d0", h.MaxTreeEntriesTree.BestPath(), "max tree entries tree") + + assert.Equal(t, counts.Count32(1), h.UniqueBlobCount, "unique blob count") + assert.Equal(t, counts.Count64(6), h.UniqueBlobSize, "unique blob size") + assert.Equal(t, counts.Count32(6), h.MaxBlobSize, "max blob size") + assert.Equal(t, "master:d0/d0/d0/d0/d0/d0/d0/d0/d0/f0", h.MaxBlobSizeBlob.BestPath(), "max blob size blob") + + assert.Equal(t, counts.Count32(0), h.UniqueTagCount, "unique tag count") + assert.Equal(t, counts.Count32(0), h.MaxTagDepth, "max tag depth") + + assert.Equal(t, counts.Count32(0), h.ReferenceCount, "reference count") + + assert.Equal(t, counts.Count32(8), h.MaxPathDepth, "max path depth") + assert.Equal(t, "master:d0/d0", h.MaxPathDepthTree.BestPath(), "max path depth tree") + assert.Equal(t, counts.Count32(23), h.MaxPathLength, "max path length") + assert.Equal(t, "master:d0/d0", h.MaxPathLengthTree.BestPath(), "max path length tree") + + assert.Equal(t, counts.Count32((pow(10, 8)-1)/(10-1)), h.MaxExpandedTreeCount, "max expanded tree count") + assert.Equal(t, "master:d0/d0", h.MaxExpandedTreeCountTree.BestPath(), "max expanded tree count tree") + assert.Equal(t, counts.Count32(pow(10, 8)), h.MaxExpandedBlobCount, "max expanded blob count") + assert.Equal(t, "master:d0/d0", h.MaxExpandedBlobCountTree.BestPath(), "max expanded blob count tree") + assert.Equal(t, counts.Count64(6*pow(10, 8)), h.MaxExpandedBlobSize, "max expanded blob size") + assert.Equal(t, "master:d0/d0", h.MaxExpandedBlobSizeTree.BestPath(), "max expanded blob size tree") + assert.Equal(t, counts.Count32(0), h.MaxExpandedLinkCount, "max expanded link count") + assert.Nil(t, h.MaxExpandedLinkCountTree, "max expanded link count tree") + assert.Equal(t, counts.Count32(0), h.MaxExpandedSubmoduleCount, "max expanded submodule count") + assert.Nil(t, h.MaxExpandedSubmoduleCountTree, "max expanded submodule count tree") + }) } func TestTaggedTags(t *testing.T) { @@ -650,9 +708,14 @@ func TestTaggedTags(t *testing.T) { refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) require.NoError(t, err) + roots := make([]sizes.Root, 0, len(refRoots)) + for _, refRoot := range refRoots { + roots = append(roots, refRoot) + } + h, err := sizes.ScanRepositoryUsingGraph( context.Background(), repo, - refRoots, sizes.NameStyleNone, meter.NoProgressMeter, + roots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(3), h.MaxTagDepth, "tag depth") @@ -679,9 +742,14 @@ func TestFromSubdir(t *testing.T) { refRoots, err := sizes.CollectReferences(ctx, repo, refGrouper{}) require.NoError(t, err) + roots := make([]sizes.Root, 0, len(refRoots)) + for _, refRoot := range refRoots { + roots = append(roots, refRoot) + } + h, err := sizes.ScanRepositoryUsingGraph( context.Background(), testRepo.Repository(t), - refRoots, sizes.NameStyleNone, meter.NoProgressMeter, + roots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.MaxPathDepth, "max path depth") @@ -738,10 +806,15 @@ func TestSubmodule(t *testing.T) { mainRefRoots, err := sizes.CollectReferences(ctx, mainRepo, refGrouper{}) require.NoError(t, err) + mainRoots := make([]sizes.Root, 0, len(mainRefRoots)) + for _, refRoot := range mainRefRoots { + mainRoots = append(mainRoots, refRoot) + } + // Analyze the main repo: h, err := sizes.ScanRepositoryUsingGraph( context.Background(), mainTestRepo.Repository(t), - mainRefRoots, sizes.NameStyleNone, meter.NoProgressMeter, + mainRoots, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") @@ -758,9 +831,14 @@ func TestSubmodule(t *testing.T) { submRefRoots2, err := sizes.CollectReferences(ctx, submRepo2, refGrouper{}) require.NoError(t, err) + submRoots2 := make([]sizes.Root, 0, len(submRefRoots2)) + for _, refRoot := range submRefRoots2 { + submRoots2 = append(submRoots2, refRoot) + } + h, err = sizes.ScanRepositoryUsingGraph( context.Background(), submRepo2, - submRefRoots2, sizes.NameStyleNone, meter.NoProgressMeter, + submRoots2, sizes.NameStyleNone, meter.NoProgressMeter, ) require.NoError(t, err, "scanning repository") assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") diff --git a/internal/refopts/ref_group_builder.go b/internal/refopts/ref_group_builder.go index 3c3179e..48f1190 100644 --- a/internal/refopts/ref_group_builder.go +++ b/internal/refopts/ref_group_builder.go @@ -254,9 +254,14 @@ func (rgb *RefGroupBuilder) AddRefopts(flags *pflag.FlagSet) { // Finish collects the information gained from processing the options // and returns a `sizes.RefGrouper`. -func (rgb *RefGroupBuilder) Finish() (sizes.RefGrouper, error) { +func (rgb *RefGroupBuilder) Finish(defaultAll bool) (sizes.RefGrouper, error) { if rgb.topLevelGroup.filter == nil { - rgb.topLevelGroup.filter = git.AllReferencesFilter + // User didn't specify any reference options. + if defaultAll { + rgb.topLevelGroup.filter = git.AllReferencesFilter + } else { + rgb.topLevelGroup.filter = git.NoReferencesFilter + } } refGrouper := refGrouper{ diff --git a/sizes/explicit_root.go b/sizes/explicit_root.go new file mode 100644 index 0000000..09348db --- /dev/null +++ b/sizes/explicit_root.go @@ -0,0 +1,19 @@ +package sizes + +import "github.com/github/git-sizer/git" + +type ExplicitRoot struct { + name string + oid git.OID +} + +func NewExplicitRoot(name string, oid git.OID) ExplicitRoot { + return ExplicitRoot{ + name: name, + oid: oid, + } +} + +func (er ExplicitRoot) Name() string { return er.name } +func (er ExplicitRoot) OID() git.OID { return er.oid } +func (er ExplicitRoot) Walk() bool { return true } diff --git a/sizes/graph.go b/sizes/graph.go index 660f682..0fb1c8a 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -11,6 +11,18 @@ import ( "github.com/github/git-sizer/meter" ) +type Root interface { + Name() string + OID() git.OID + Walk() bool +} + +type ReferenceRoot interface { + Root + Reference() git.Reference + Groups() []RefGroupSymbol +} + // ScanRepositoryUsingGraph scans `repo`, using `rg` to decide which // references to scan and how to group them. `nameStyle` specifies // whether the output should include full names, hashes only, or @@ -20,7 +32,9 @@ import ( // It returns the size data for the repository. func ScanRepositoryUsingGraph( ctx context.Context, - repo *git.Repository, refRoots []RefRoot, nameStyle NameStyle, + repo *git.Repository, + roots []Root, + nameStyle NameStyle, progressMeter meter.Progress, ) (HistorySize, error) { graph := NewGraph(nameStyle) @@ -37,12 +51,12 @@ func ScanRepositoryUsingGraph( defer objIter.Close() errChan <- func() error { - for _, refRoot := range refRoots { - if !refRoot.Walk() { + for _, root := range roots { + if !root.Walk() { continue } - if err := objIter.AddRoot(refRoot.OID()); err != nil { + if err := objIter.AddRoot(root.OID()); err != nil { return err } } @@ -256,9 +270,15 @@ func ScanRepositoryUsingGraph( } progressMeter.Start("Processing references: %d") - for _, refRoot := range refRoots { + for _, root := range roots { progressMeter.Inc() - graph.RegisterReference(refRoot.Reference(), refRoot.Walk(), refRoot.Groups()) + if refRoot, ok := root.(ReferenceRoot); ok { + graph.RegisterReference(refRoot.Reference(), refRoot.Groups()) + } + + if root.Walk() { + graph.pathResolver.RecordName(root.Name(), root.OID()) + } } progressMeter.Done() @@ -310,17 +330,18 @@ func NewGraph(nameStyle NameStyle) *Graph { } // RegisterReference records the specified reference in `g`. -func (g *Graph) RegisterReference(ref git.Reference, walked bool, groups []RefGroupSymbol) { +func (g *Graph) RegisterReference(ref git.Reference, groups []RefGroupSymbol) { g.historyLock.Lock() g.historySize.recordReference(g, ref) for _, group := range groups { g.historySize.recordReferenceGroup(g, group) } g.historyLock.Unlock() +} - if walked { - g.pathResolver.RecordReference(ref) - } +// Register a name that can be used for the specified OID. +func (g *Graph) RegisterName(name string, oid git.OID) { + g.pathResolver.RecordName(name, oid) } // HistorySize returns the size data that have been collected. diff --git a/sizes/grouper.go b/sizes/grouper.go index 32d63ca..fdaa927 100644 --- a/sizes/grouper.go +++ b/sizes/grouper.go @@ -50,6 +50,7 @@ type RefRoot struct { groups []RefGroupSymbol } +func (rr RefRoot) Name() string { return rr.ref.Refname } func (rr RefRoot) OID() git.OID { return rr.ref.OID } func (rr RefRoot) Reference() git.Reference { return rr.ref } func (rr RefRoot) Walk() bool { return rr.walk } diff --git a/sizes/path_resolver.go b/sizes/path_resolver.go index 2a3bb1c..275d19a 100644 --- a/sizes/path_resolver.go +++ b/sizes/path_resolver.go @@ -12,15 +12,15 @@ import ( // `rev-parse` input, including commit and/or file path) by which // specified objects are reachable. It is used as follows: // -// * Request an object's path using `RequestPath()`. The returned -// `Path` object is a placeholder for the object's path. +// - Request an object's path using `RequestPath()`. The returned +// `Path` object is a placeholder for the object's path. // -// * Tell the `PathResolver` about objects that might be along the -// object's reachability path, *in depth-first* order (i.e., -// referents before referers) by calling `RecordTree()`, -// `RecordCommit()`, `RecordTag()`, and `RecordReference()`,. +// - Tell the `PathResolver` about objects that might be along the +// object's reachability path, *in depth-first* order (i.e., +// referents before referers) by calling `RecordTree()`, +// `RecordCommit()`, `RecordTag()`, and `RecordReference()`,. // -// * Read the path out of the `Path` object using `Path.Path()`. +// - Read the path out of the `Path` object using `Path.Path()`. // // Multiple objects can be processed at once. // @@ -34,7 +34,7 @@ import ( type PathResolver interface { RequestPath(oid git.OID, objectType string) *Path ForgetPath(p *Path) - RecordReference(ref git.Reference) + RecordName(name string, oid git.OID) RecordTreeEntry(oid git.OID, name string, childOID git.OID) RecordCommit(oid, tree git.OID) RecordTag(oid git.OID, tag *git.Tag) @@ -60,7 +60,7 @@ func (n NullPathResolver) RequestPath(oid git.OID, objectType string) *Path { func (_ NullPathResolver) ForgetPath(p *Path) {} -func (_ NullPathResolver) RecordReference(ref git.Reference) {} +func (_ NullPathResolver) RecordName(name string, oid git.OID) {} func (_ NullPathResolver) RecordTreeEntry(oid git.OID, name string, childOID git.OID) {} @@ -77,19 +77,19 @@ type InOrderPathResolver struct { // (e.g., the biggest blob, or a tree containing the biggest blob, or // a commit whose tree contains the biggest blob). Valid states: // -// * `parent == nil && relativePath == ""`—we have not yet found -// anything that refers to this object. +// - `parent == nil && relativePath == ""`—we have not yet found +// anything that refers to this object. // -// * `parent != nil && relativePath == ""`—this object is a tree, and -// we have found a commit that refers to it. +// - `parent != nil && relativePath == ""`—this object is a tree, and +// we have found a commit that refers to it. // -// * `parent == nil && relativePath != ""`—we have found a reference -// that points directly at this object; `relativePath` is the full -// name of the reference. +// - `parent == nil && relativePath != ""`—we have found a reference +// that points directly at this object; `relativePath` is the full +// name of the reference. // -// * `parent != nil && relativePath != ""`—this object is a blob or -// tree, and we have found another tree that refers to it; -// `relativePath` is the corresponding tree entry name. +// - `parent != nil && relativePath != ""`—this object is a blob or +// tree, and we have found another tree that refers to it; +// `relativePath` is the corresponding tree entry name. type Path struct { // The OID of the object whose path we seek. This member is always // set. @@ -122,7 +122,8 @@ type Path struct { func (p *Path) TreePrefix() string { switch p.objectType { case "blob", "tree": - if p.parent != nil { + switch { + case p.parent != nil: if p.relativePath == "" { // This is a top-level tree or blob. return p.parent.TreePrefix() @@ -130,7 +131,9 @@ func (p *Path) TreePrefix() string { // The parent is also a tree. return p.parent.TreePrefix() + p.relativePath + "/" } - } else { + case p.relativePath != "": + return p.relativePath + "/" + default: return "???" } case "commit", "tag": @@ -153,7 +156,8 @@ func (p *Path) TreePrefix() string { func (p *Path) Path() string { switch p.objectType { case "blob", "tree": - if p.parent != nil { + switch { + case p.parent != nil: if p.relativePath == "" { // This is a top-level tree or blob. return fmt.Sprintf("%s^{%s}", p.parent.BestPath(), p.objectType) @@ -161,7 +165,9 @@ func (p *Path) Path() string { // The parent is also a tree. return p.parent.TreePrefix() + p.relativePath } - } else { + case p.relativePath != "": + return p.relativePath + default: return "" } case "commit", "tag": @@ -274,18 +280,18 @@ func (pr *InOrderPathResolver) forgetPathLocked(p *Path) { } } -func (pr *InOrderPathResolver) RecordReference(ref git.Reference) { +func (pr *InOrderPathResolver) RecordName(name string, oid git.OID) { pr.lock.Lock() defer pr.lock.Unlock() - p, ok := pr.soughtPaths[ref.OID] + p, ok := pr.soughtPaths[oid] if !ok { // Nobody is looking for the path to the referent. return } - p.relativePath = ref.Refname - delete(pr.soughtPaths, ref.OID) + p.relativePath = name + delete(pr.soughtPaths, oid) } // Record that the tree with OID `oid` has an entry with the specified From 5d339ec292a3cc126f802efa98de90ea6a804626 Mon Sep 17 00:00:00 2001 From: Michael Haggerty <mhagger@github.com> Date: Sat, 19 Aug 2023 15:25:51 +0200 Subject: [PATCH 15/28] There's no reason to make this context cancelable --- git-sizer.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index 7cfd6ff..0888d78 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -113,8 +113,7 @@ var ReleaseVersion string var BuildVersion string func main() { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() + ctx := context.Background() err := mainImplementation(ctx, os.Stdout, os.Stderr, os.Args[1:]) if err != nil { From e8d9c2eebde3389f80ec8a67a9d45f907d57298a Mon Sep 17 00:00:00 2001 From: rajhawaldar <rajhawaldar.in@gmail.com> Date: Sat, 23 Sep 2023 10:34:59 +0530 Subject: [PATCH 16/28] Update the installation steps to use 'go install' Signed-off-by: rajhawaldar <rajhawaldar.in@gmail.com> --- docs/BUILDING.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/BUILDING.md b/docs/BUILDING.md index a977a2c..7f9fdef 100644 --- a/docs/BUILDING.md +++ b/docs/BUILDING.md @@ -7,11 +7,11 @@ Most people can just install a released version of `git-sizer`, [as described in 1. Make sure that you have a recent version of the [Go language toolchain](https://golang.org/doc/install) installed and that you have set `GOPATH`. -2. Get `git-sizer` using `go get`: +2. Get `git-sizer` using `go install`: - go get github.com/github/git-sizer + go install github.com/github/git-sizer@latest - This should fetch and compile the source code and write the executable file to `$GOPATH/bin/`. + This should install the executable file to `$GOPATH/bin/`. 3. Either add `$GOPATH/bin` to your `PATH`, or copy the executable file (`git-sizer` or `git-sizer.exe`) to a directory that is already in your `PATH`. From 1b0ecde670f17563805ee2f297155f9faf2c1f24 Mon Sep 17 00:00:00 2001 From: elhmn <elhmn@github.com> Date: Fri, 3 Nov 2023 11:26:36 +0100 Subject: [PATCH 17/28] Upgrade build scripts to go1.21 --- script/ensure-go-installed.sh | 6 +++--- script/install-vendored-go | 9 +++++---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/script/ensure-go-installed.sh b/script/ensure-go-installed.sh index 3111b9e..1e301fd 100644 --- a/script/ensure-go-installed.sh +++ b/script/ensure-go-installed.sh @@ -4,17 +4,17 @@ if [ -z "$ROOTDIR" ]; then echo 1>&2 'ensure-go-installed.sh invoked without ROOTDIR set!' fi -# Is go installed, and at least 1.16? +# Is go installed, and at least 1.21? go_ok() { set -- $(go version 2>/dev/null | sed -n 's/.*go\([0-9][0-9]*\)\.\([0-9][0-9]*\).*/\1 \2/p' | head -n 1) - [ $# -eq 2 ] && [ "$1" -eq 1 ] && [ "$2" -ge 16 ] + [ $# -eq 2 ] && [ "$1" -eq 1 ] && [ "$2" -ge 21 ] } # If a local go is installed, use it. set_up_vendored_go() { - GO_VERSION=go1.16.3 + GO_VERSION=go1.21.3 VENDORED_GOROOT="$ROOTDIR/vendor/$GO_VERSION/go" if [ -x "$VENDORED_GOROOT/bin/go" ]; then export GOROOT="$VENDORED_GOROOT" diff --git a/script/install-vendored-go b/script/install-vendored-go index 2407618..45ace01 100755 --- a/script/install-vendored-go +++ b/script/install-vendored-go @@ -1,20 +1,21 @@ #!/bin/sh # The checksums below must correspond to the downloads for this version. -GO_VERSION=go1.16.3 +# The checksums can be found on https://go.dev/dl +GO_VERSION=go1.21.3 case "$(uname -s):$(uname -m)" in Linux:x86_64) GO_PKG=${GO_VERSION}.linux-amd64.tar.gz - GO_PKG_SHA=951a3c7c6ce4e56ad883f97d9db74d3d6d80d5fec77455c6ada6c1f7ac4776d2 + GO_PKG_SHA=1241381b2843fae5a9707eec1f8fb2ef94d827990582c7c7c32f5bdfbfd420c8 ;; Darwin:x86_64) GO_PKG=${GO_VERSION}.darwin-amd64.tar.gz - GO_PKG_SHA=6bb1cf421f8abc2a9a4e39140b7397cdae6aca3e8d36dcff39a1a77f4f1170ac + GO_PKG_SHA=27014fc69e301d7588a169ca239b3cc609f0aa1abf38528bf0d20d3b259211eb ;; Darwin:arm64) GO_PKG=${GO_VERSION}.darwin-arm64.tar.gz - GO_PKG_SHA=f4e96bbcd5d2d1942f5b55d9e4ab19564da4fad192012f6d7b0b9b055ba4208f + GO_PKG_SHA=65302a7a9f7a4834932b3a7a14cb8be51beddda757b567a2f9e0cbd0d7b5a6ab ;; *) echo 1>&2 "I don't know how to install Go on your platform." From b1712756e47dd4f761b26fa326afab2e9b47f252 Mon Sep 17 00:00:00 2001 From: elhmn <elhmn@github.com> Date: Thu, 9 Nov 2023 16:15:51 +0100 Subject: [PATCH 18/28] Generate automatic draft release We needed a way to generate draft releases for git-sizer binaries. This commit adds a new `.github/workflows/release.yml` github action that will generate a draft release when a new tag version is pushed. the action will be triggered After the tag is created and pushed using: ``` git tag -as v$VERSION git push origin v$VERSION ``` --- .github/workflows/release.yml | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..58af3d6 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,34 @@ +name: Release + +on: + push: + tags: + - "v*" + +permissions: + contents: write + +jobs: + lint: + name: Release + runs-on: ubuntu-latest + steps: + - name: Setup + uses: + actions/setup-go@v4 + with: + go-version: 1.21 + + - name: Checkout + uses: actions/checkout@v4 + + - name: Build releases + run: | + make releases VERSION=$GITHUB_REF_NAME + + - name: Release + uses: softprops/action-gh-release@v1 + with: + draft: true + files: | + releases/git-sizer-* From 2ed1053ff9d440ec0e405e2f25157e25926633dd Mon Sep 17 00:00:00 2001 From: Michael Haggerty <mhagger@github.com> Date: Wed, 13 Dec 2023 13:45:06 +0100 Subject: [PATCH 19/28] Stop using deprecated function `ioutil.TempDir()` --- git_sizer_test.go | 3 +-- internal/testutils/repoutils.go | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index 16d58c9..fbf470d 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -6,7 +6,6 @@ import ( "encoding/json" "fmt" "io" - "io/ioutil" "os" "os/exec" "path/filepath" @@ -760,7 +759,7 @@ func TestSubmodule(t *testing.T) { ctx := context.Background() - tmp, err := ioutil.TempDir("", "submodule") + tmp, err := os.MkdirTemp("", "submodule") require.NoError(t, err, "creating temporary directory") defer func() { diff --git a/internal/testutils/repoutils.go b/internal/testutils/repoutils.go index 60a2f9b..954cff4 100644 --- a/internal/testutils/repoutils.go +++ b/internal/testutils/repoutils.go @@ -4,7 +4,6 @@ import ( "bytes" "fmt" "io" - "io/ioutil" "os" "os/exec" "path/filepath" @@ -29,7 +28,7 @@ type TestRepo struct { func NewTestRepo(t *testing.T, bare bool, pattern string) *TestRepo { t.Helper() - path, err := ioutil.TempDir("", pattern) + path, err := os.MkdirTemp("", pattern) require.NoError(t, err) repo := TestRepo{Path: path} @@ -73,7 +72,7 @@ func (repo *TestRepo) Remove(t *testing.T) { func (repo *TestRepo) Clone(t *testing.T, pattern string) *TestRepo { t.Helper() - path, err := ioutil.TempDir("", pattern) + path, err := os.MkdirTemp("", pattern) require.NoError(t, err) err = repo.GitCommand( From c20cbb8693f82594b73d4a279390a1c7aa2b7644 Mon Sep 17 00:00:00 2001 From: Michael Haggerty <mhagger@github.com> Date: Wed, 13 Dec 2023 12:45:42 +0100 Subject: [PATCH 20/28] Repository.gitDir: rename member from `path` The name `gitDir` is less ambiguous. Also rename method `Path()` to `GitDir()`. --- git/git.go | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/git/git.go b/git/git.go index f451c54..5531a6f 100644 --- a/git/git.go +++ b/git/git.go @@ -15,7 +15,10 @@ type ObjectType string // Repository represents a Git repository on disk. type Repository struct { - path string + // gitDir is the path to the `GIT_DIR` for this repository. It + // might be absolute or it might be relative to the current + // directory. + gitDir string // gitBin is the path of the `git` executable that should be used // when running commands in this repository. @@ -79,7 +82,7 @@ func NewRepository(path string) (*Repository, error) { } return &Repository{ - path: gitDir, + gitDir: gitDir, gitBin: gitBin, }, nil } @@ -103,7 +106,7 @@ func (repo *Repository) GitCommand(callerArgs ...string) *exec.Cmd { cmd.Env = append( os.Environ(), - "GIT_DIR="+repo.path, + "GIT_DIR="+repo.gitDir, // Disable grafts when running our commands: "GIT_GRAFT_FILE="+os.DevNull, ) @@ -111,7 +114,8 @@ func (repo *Repository) GitCommand(callerArgs ...string) *exec.Cmd { return cmd } -// Path returns the path to `repo`. -func (repo *Repository) Path() string { - return repo.path +// GitDir returns the path to `repo`'s `GIT_DIR`. It might be absolute +// or it might be relative to the current directory. +func (repo *Repository) GitDir() string { + return repo.gitDir } From 29fc88208a3a38f54fda3e7e555469bd6c8fff29 Mon Sep 17 00:00:00 2001 From: Michael Haggerty <mhagger@github.com> Date: Wed, 13 Dec 2023 12:58:23 +0100 Subject: [PATCH 21/28] Repository.GitPath(): new method, extracted from `NewRepository()` Add a method `Repository.GitPath(relPath)`, which invokes `git rev-parse --git-path $relPath` to find the path to a file within the Git repository. In `NewRepository()`, instantiate the `Repository` object earlier so that the new method can be used to find the path to `shallow`. --- git/git.go | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/git/git.go b/git/git.go index 5531a6f..cba262d 100644 --- a/git/git.go +++ b/git/git.go @@ -66,25 +66,22 @@ func NewRepository(path string) (*Repository, error) { } gitDir := smartJoin(path, string(bytes.TrimSpace(out))) - //nolint:gosec // `gitBin` is chosen carefully. - cmd = exec.Command(gitBin, "rev-parse", "--git-path", "shallow") - cmd.Dir = gitDir - out, err = cmd.Output() + repo := Repository{ + gitDir: gitDir, + gitBin: gitBin, + } + + shallow, err := repo.GitPath("shallow") if err != nil { - return nil, fmt.Errorf( - "could not run 'git rev-parse --git-path shallow': %w", err, - ) + return nil, err } - shallow := smartJoin(gitDir, string(bytes.TrimSpace(out))) + _, err = os.Lstat(shallow) if err == nil { return nil, errors.New("this appears to be a shallow clone; full clone required") } - return &Repository{ - gitDir: gitDir, - gitBin: gitBin, - }, nil + return &repo, nil } func (repo *Repository) GitCommand(callerArgs ...string) *exec.Cmd { @@ -119,3 +116,20 @@ func (repo *Repository) GitCommand(callerArgs ...string) *exec.Cmd { func (repo *Repository) GitDir() string { return repo.gitDir } + +// GitPath returns that path of a file within the git repository, by +// calling `git rev-parse --git-path $relPath`. The returned path is +// relative to the current directory. +func (repo *Repository) GitPath(relPath string) (string, error) { + cmd := repo.GitCommand("rev-parse", "--git-path", relPath) + out, err := cmd.Output() + if err != nil { + return "", fmt.Errorf( + "running 'git rev-parse --git-path %s': %w", relPath, err, + ) + } + // `git rev-parse --git-path` is documented to return the path + // relative to the current directory. Since we haven't changed the + // current directory, we can use it as-is: + return string(bytes.TrimSpace(out)), nil +} From 1d75c744e2ed1ad45f469a356897b0e07ba9b7a2 Mon Sep 17 00:00:00 2001 From: Michael Haggerty <mhagger@github.com> Date: Wed, 13 Dec 2023 13:15:08 +0100 Subject: [PATCH 22/28] Repository.IsFull(): new method Extract a method to determine whether the repository seems to be a full clone. Call it from `NewRepository()`. --- git/git.go | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/git/git.go b/git/git.go index cba262d..a82d14c 100644 --- a/git/git.go +++ b/git/git.go @@ -4,6 +4,7 @@ import ( "bytes" "errors" "fmt" + "io/fs" "os" "os/exec" "path/filepath" @@ -71,17 +72,36 @@ func NewRepository(path string) (*Repository, error) { gitBin: gitBin, } + full, err := repo.IsFull() + if err != nil { + return nil, fmt.Errorf("determining whether the repository is a full clone: %w", err) + } + if !full { + return nil, errors.New("this appears to be a shallow clone; full clone required") + } + + return &repo, nil +} + +// IsFull returns `true` iff `repo` appears to be a full clone. +func (repo *Repository) IsFull() (bool, error) { shallow, err := repo.GitPath("shallow") if err != nil { - return nil, err + return false, err } _, err = os.Lstat(shallow) if err == nil { - return nil, errors.New("this appears to be a shallow clone; full clone required") + return false, nil } - return &repo, nil + if !errors.Is(err, fs.ErrNotExist) { + return false, err + } + + // The `shallow` file is absent, which is what we expect + // for a full clone. + return true, nil } func (repo *Repository) GitCommand(callerArgs ...string) *exec.Cmd { From 39102dfaa3c2fc57e53c9a909042bee382af1d11 Mon Sep 17 00:00:00 2001 From: Michael Haggerty <mhagger@github.com> Date: Wed, 13 Dec 2023 13:27:01 +0100 Subject: [PATCH 23/28] findGitBin(): memoize the result --- git/git_bin.go | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/git/git_bin.go b/git/git_bin.go index fc03435..526e9bb 100644 --- a/git/git_bin.go +++ b/git/git_bin.go @@ -2,10 +2,20 @@ package git import ( "path/filepath" + "sync" "github.com/cli/safeexec" ) +// This variable will be used to memoize the result of `findGitBin()`, +// since its return value only depends on the environment. +var gitBinMemo struct { + once sync.Once + + gitBin string + err error +} + // findGitBin finds the `git` binary in PATH that should be used by // the rest of `git-sizer`. It uses `safeexec` to find the executable, // because on Windows, `exec.Cmd` looks not only in PATH, but also in @@ -13,15 +23,20 @@ import ( // being scanned is hostile and non-bare because it might possibly // contain an executable file named `git`. func findGitBin() (string, error) { - gitBin, err := safeexec.LookPath("git") - if err != nil { - return "", err - } + gitBinMemo.once.Do(func() { + p, err := safeexec.LookPath("git") + if err != nil { + gitBinMemo.err = err + return + } - gitBin, err = filepath.Abs(gitBin) - if err != nil { - return "", err - } + p, err = filepath.Abs(p) + if err != nil { + gitBinMemo.err = err + return + } - return gitBin, nil + gitBinMemo.gitBin = p + }) + return gitBinMemo.gitBin, gitBinMemo.err } From 51cf26bdfd5f80d278cc274427d91d593b585235 Mon Sep 17 00:00:00 2001 From: Michael Haggerty <mhagger@github.com> Date: Wed, 13 Dec 2023 17:53:15 +0100 Subject: [PATCH 24/28] smartJoin(): improve docstring --- git/git.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/git/git.go b/git/git.go index a82d14c..3f51c53 100644 --- a/git/git.go +++ b/git/git.go @@ -26,9 +26,10 @@ type Repository struct { gitBin string } -// smartJoin returns the path that can be described as `relPath` -// relative to `path`, given that `path` is either absolute or is -// relative to the current directory. +// smartJoin returns `relPath` if it is an absolute path. If not, it +// assumes that `relPath` is relative to `path`, so it joins them +// together and returns the result. In that case, if `path` itself is +// relative, then the return value is also relative. func smartJoin(path, relPath string) string { if filepath.IsAbs(relPath) { return relPath From 02928f10bf9a42654333abc5d288c3e36b405477 Mon Sep 17 00:00:00 2001 From: Michael Haggerty <mhagger@github.com> Date: Wed, 13 Dec 2023 13:35:19 +0100 Subject: [PATCH 25/28] NewRepositoryFromGitDir(): new function If you already have the desired `GIT_DIR`, there's no need to determine it from the current path. --- git/git.go | 47 +++++++++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/git/git.go b/git/git.go index 3f51c53..60a468a 100644 --- a/git/git.go +++ b/git/git.go @@ -37,8 +37,36 @@ func smartJoin(path, relPath string) string { return filepath.Join(path, relPath) } -// NewRepository creates a new repository object that can be used for -// running `git` commands within that repository. +// NewRepositoryFromGitDir creates a new `Repository` object that can +// be used for running `git` commands, given the value of `GIT_DIR` +// for the repository. +func NewRepositoryFromGitDir(gitDir string) (*Repository, error) { + // Find the `git` executable to be used: + gitBin, err := findGitBin() + if err != nil { + return nil, fmt.Errorf( + "could not find 'git' executable (is it in your PATH?): %w", err, + ) + } + + repo := Repository{ + gitDir: gitDir, + gitBin: gitBin, + } + + full, err := repo.IsFull() + if err != nil { + return nil, fmt.Errorf("determining whether the repository is a full clone: %w", err) + } + if !full { + return nil, errors.New("this appears to be a shallow clone; full clone required") + } + + return &repo, nil +} + +// NewRepository creates a new `Repository` object that can be used +// for running `git` commands within `path`. func NewRepository(path string) (*Repository, error) { // Find the `git` executable to be used: gitBin, err := findGitBin() @@ -68,20 +96,7 @@ func NewRepository(path string) (*Repository, error) { } gitDir := smartJoin(path, string(bytes.TrimSpace(out))) - repo := Repository{ - gitDir: gitDir, - gitBin: gitBin, - } - - full, err := repo.IsFull() - if err != nil { - return nil, fmt.Errorf("determining whether the repository is a full clone: %w", err) - } - if !full { - return nil, errors.New("this appears to be a shallow clone; full clone required") - } - - return &repo, nil + return NewRepositoryFromGitDir(gitDir) } // IsFull returns `true` iff `repo` appears to be a full clone. From f9aec5023a77e9336b6ec2f29bad7804caca57a6 Mon Sep 17 00:00:00 2001 From: Michael Haggerty <mhagger@github.com> Date: Wed, 13 Dec 2023 17:56:00 +0100 Subject: [PATCH 26/28] NewRepositoryFromPath(): function renamed from `NewRepository()` --- git-sizer.go | 2 +- git/git.go | 9 +++++---- internal/testutils/repoutils.go | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/git-sizer.go b/git-sizer.go index 0888d78..1ef9812 100644 --- a/git-sizer.go +++ b/git-sizer.go @@ -134,7 +134,7 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st // Try to open the repository, but it's not an error yet if this // fails, because the user might only be asking for `--help`. - repo, repoErr := git.NewRepository(".") + repo, repoErr := git.NewRepositoryFromPath(".") flags := pflag.NewFlagSet("git-sizer", pflag.ContinueOnError) flags.Usage = func() { diff --git a/git/git.go b/git/git.go index 60a468a..096ce81 100644 --- a/git/git.go +++ b/git/git.go @@ -65,10 +65,11 @@ func NewRepositoryFromGitDir(gitDir string) (*Repository, error) { return &repo, nil } -// NewRepository creates a new `Repository` object that can be used -// for running `git` commands within `path`. -func NewRepository(path string) (*Repository, error) { - // Find the `git` executable to be used: +// NewRepositoryFromPath creates a new `Repository` object that can be +// used for running `git` commands within `path`. It does so by asking +// `git` what `GIT_DIR` to use. Git, in turn, bases its decision on +// the path and the environment. +func NewRepositoryFromPath(path string) (*Repository, error) { gitBin, err := findGitBin() if err != nil { return nil, fmt.Errorf( diff --git a/internal/testutils/repoutils.go b/internal/testutils/repoutils.go index 954cff4..e530925 100644 --- a/internal/testutils/repoutils.go +++ b/internal/testutils/repoutils.go @@ -89,7 +89,7 @@ func (repo *TestRepo) Clone(t *testing.T, pattern string) *TestRepo { func (repo *TestRepo) Repository(t *testing.T) *git.Repository { t.Helper() - r, err := git.NewRepository(repo.Path) + r, err := git.NewRepositoryFromPath(repo.Path) require.NoError(t, err) return r } From d605cdb7c5e61f2d24cc29445f30255488a046c0 Mon Sep 17 00:00:00 2001 From: Michael Haggerty <mhagger@github.com> Date: Wed, 13 Dec 2023 17:56:31 +0100 Subject: [PATCH 27/28] TestRepo: for bare repositories, use `NewRepositoryFromGitDir()` There's no need to deduce the `GIT_DIR` for a bare repository. --- internal/testutils/repoutils.go | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/internal/testutils/repoutils.go b/internal/testutils/repoutils.go index e530925..48a8759 100644 --- a/internal/testutils/repoutils.go +++ b/internal/testutils/repoutils.go @@ -20,6 +20,7 @@ import ( // TestRepo represents a git repository used for tests. type TestRepo struct { Path string + bare bool } // NewTestRepo creates and initializes a test repository in a @@ -37,6 +38,7 @@ func NewTestRepo(t *testing.T, bare bool, pattern string) *TestRepo { return &TestRepo{ Path: path, + bare: bare, } } @@ -89,9 +91,15 @@ func (repo *TestRepo) Clone(t *testing.T, pattern string) *TestRepo { func (repo *TestRepo) Repository(t *testing.T) *git.Repository { t.Helper() - r, err := git.NewRepositoryFromPath(repo.Path) - require.NoError(t, err) - return r + if repo.bare { + r, err := git.NewRepositoryFromGitDir(repo.Path) + require.NoError(t, err) + return r + } else { + r, err := git.NewRepositoryFromPath(repo.Path) + require.NoError(t, err) + return r + } } // localEnvVars is a list of the variable names that should be cleared From fb78b414e22c5c95dfb4c4847b6e7cee58b1b1af Mon Sep 17 00:00:00 2001 From: Johannes Schindelin <johannes.schindelin@gmx.de> Date: Tue, 12 Dec 2023 11:27:10 +0100 Subject: [PATCH 28/28] Be mindful of `safe.bareRepository` in the tests As of Git v2.38.0, there is an option to prevent Git from accessing bare repositories unless asked for explicitly (via `--git-dir` or `GIT_DIR`): `safe.bareRepository`. The tests of `git sizer`, however, assume that Git will access a bare repository when the current directory points inside that repository. This only works if `safe.bareRepository` indicates that this is safe. If that is not the case, i.e. if `safe.bareRepository` is set to `explicit`, Git demands that the environment variable `GIT_DIR` is set (either explicitly, or via `--git-dir`) when accessing bare repositories. So let's set `GIT_DIR` for the test cases that work on bare repositories. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> --- git_sizer_test.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index fbf470d..8a7a2d2 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -272,7 +272,10 @@ func TestRefSelections(t *testing.T) { args := []string{"--show-refs", "--no-progress", "--json", "--json-version=2"} args = append(args, p.args...) cmd := exec.Command(executable, args...) - cmd.Dir = repo.Path + cmd.Env = append( + os.Environ(), + "GIT_DIR="+repo.Path, + ) var stdout bytes.Buffer cmd.Stdout = &stdout var stderr bytes.Buffer @@ -519,7 +522,10 @@ References (included references marked with '+'): args := append([]string{"--show-refs", "-v", "--no-progress"}, p.args...) cmd := exec.Command(executable, args...) - cmd.Dir = repo.Path + cmd.Env = append( + os.Environ(), + "GIT_DIR="+repo.Path, + ) var stdout bytes.Buffer cmd.Stdout = &stdout var stderr bytes.Buffer