From 84642449db9968f3af63201d6b1c144b7af65355 Mon Sep 17 00:00:00 2001 From: Miccah Castorina Date: Thu, 18 Jan 2024 12:45:33 -0800 Subject: [PATCH 1/5] Walk directories in filesystem source enumeration --- pkg/sources/filesystem/filesystem.go | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/pkg/sources/filesystem/filesystem.go b/pkg/sources/filesystem/filesystem.go index 5f47ffa47c7f..01310f89bf6b 100644 --- a/pkg/sources/filesystem/filesystem.go +++ b/pkg/sources/filesystem/filesystem.go @@ -223,6 +223,26 @@ func (s *Source) scanFile(ctx context.Context, path string, chunksChan chan *sou // filepath or a directory. func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) error { for _, path := range s.paths { + fileInfo, err := os.Stat(filepath.Clean(path)) + if err != nil { + if err := reporter.UnitErr(ctx, err); err != nil { + return err + } + continue + } + if fileInfo.IsDir() { + return fs.WalkDir(os.DirFS(path), ".", func(relativePath string, d fs.DirEntry, err error) error { + if err != nil || relativePath == "." { + return nil + } + fullPath := filepath.Join(path, relativePath) + item := sources.CommonSourceUnit{ID: fullPath} + if err := reporter.UnitOk(ctx, item); err != nil { + return err + } + return nil + }) + } item := sources.CommonSourceUnit{ID: path} if err := reporter.UnitOk(ctx, item); err != nil { return err From 4daaf97b4ca1142a21e6f02399fddc330b5930e9 Mon Sep 17 00:00:00 2001 From: Miccah Castorina Date: Thu, 18 Jan 2024 19:44:45 -0800 Subject: [PATCH 2/5] Ignore all directories instead of just the root --- pkg/sources/filesystem/filesystem.go | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/pkg/sources/filesystem/filesystem.go b/pkg/sources/filesystem/filesystem.go index 01310f89bf6b..8a6cfd6f11df 100644 --- a/pkg/sources/filesystem/filesystem.go +++ b/pkg/sources/filesystem/filesystem.go @@ -119,12 +119,7 @@ func (s *Source) scanDir(ctx context.Context, path string, chunksChan chan *sour // Skip over non-regular files. We do this check here to suppress noisy // logs for trying to scan directories and other non-regular files in // our traversal. - fileStat, err := os.Stat(fullPath) - if err != nil { - ctx.Logger().Info("unable to stat file", "path", fullPath, "error", err) - return nil - } - if !fileStat.Mode().IsRegular() { + if !d.Type().IsRegular() { return nil } if s.filter != nil && !s.filter.Pass(fullPath) { @@ -232,15 +227,12 @@ func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) e } if fileInfo.IsDir() { return fs.WalkDir(os.DirFS(path), ".", func(relativePath string, d fs.DirEntry, err error) error { - if err != nil || relativePath == "." { + if err != nil || d.IsDir() { return nil } fullPath := filepath.Join(path, relativePath) item := sources.CommonSourceUnit{ID: fullPath} - if err := reporter.UnitOk(ctx, item); err != nil { - return err - } - return nil + return reporter.UnitOk(ctx, item) }) } item := sources.CommonSourceUnit{ID: path} From ce9000ccc9a0e520eaa557c99b6752f6a63f130d Mon Sep 17 00:00:00 2001 From: Miccah Castorina Date: Thu, 18 Jan 2024 20:00:59 -0800 Subject: [PATCH 3/5] Fix bug with multiple directories --- pkg/sources/filesystem/filesystem.go | 30 +++++++++++++++++----------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/pkg/sources/filesystem/filesystem.go b/pkg/sources/filesystem/filesystem.go index 8a6cfd6f11df..f251f549f210 100644 --- a/pkg/sources/filesystem/filesystem.go +++ b/pkg/sources/filesystem/filesystem.go @@ -225,19 +225,25 @@ func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) e } continue } - if fileInfo.IsDir() { - return fs.WalkDir(os.DirFS(path), ".", func(relativePath string, d fs.DirEntry, err error) error { - if err != nil || d.IsDir() { - return nil - } - fullPath := filepath.Join(path, relativePath) - item := sources.CommonSourceUnit{ID: fullPath} - return reporter.UnitOk(ctx, item) - }) + if !fileInfo.IsDir() { + item := sources.CommonSourceUnit{ID: path} + if err := reporter.UnitOk(ctx, item); err != nil { + return err + } + continue } - item := sources.CommonSourceUnit{ID: path} - if err := reporter.UnitOk(ctx, item); err != nil { - return err + err = fs.WalkDir(os.DirFS(path), ".", func(relativePath string, d fs.DirEntry, err error) error { + if err != nil || d.IsDir() { + return nil + } + fullPath := filepath.Join(path, relativePath) + item := sources.CommonSourceUnit{ID: fullPath} + return reporter.UnitOk(ctx, item) + }) + if err != nil { + if err := reporter.UnitErr(ctx, err); err != nil { + return err + } } } return nil From 95fc54f0f3a1a69fe640b5ec067957f4f39db078 Mon Sep 17 00:00:00 2001 From: Miccah Castorina Date: Fri, 19 Jan 2024 18:19:43 -0800 Subject: [PATCH 4/5] Skip filesystem TestEnumerate --- pkg/sources/filesystem/filesystem.go | 5 ++++- pkg/sources/filesystem/filesystem_test.go | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/pkg/sources/filesystem/filesystem.go b/pkg/sources/filesystem/filesystem.go index f251f549f210..9a1c4ceb9975 100644 --- a/pkg/sources/filesystem/filesystem.go +++ b/pkg/sources/filesystem/filesystem.go @@ -233,7 +233,10 @@ func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) e continue } err = fs.WalkDir(os.DirFS(path), ".", func(relativePath string, d fs.DirEntry, err error) error { - if err != nil || d.IsDir() { + if err != nil { + return reporter.UnitErr(ctx, err) + } + if d.IsDir() { return nil } fullPath := filepath.Join(path, relativePath) diff --git a/pkg/sources/filesystem/filesystem_test.go b/pkg/sources/filesystem/filesystem_test.go index e6925fafe45a..577fdcd44227 100644 --- a/pkg/sources/filesystem/filesystem_test.go +++ b/pkg/sources/filesystem/filesystem_test.go @@ -115,6 +115,7 @@ func TestScanFile(t *testing.T) { } func TestEnumerate(t *testing.T) { + t.Skip("TODO: refactor to allow a virtual filesystem") t.Parallel() ctx := context.Background() From afc0d177a1fdfab2c3e7b1119df3850c2f5af5eb Mon Sep 17 00:00:00 2001 From: Miccah Castorina Date: Tue, 23 Jan 2024 12:16:07 -0800 Subject: [PATCH 5/5] Update filesystem enumeration test to create files and folders --- pkg/sources/filesystem/filesystem_test.go | 24 ++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/pkg/sources/filesystem/filesystem_test.go b/pkg/sources/filesystem/filesystem_test.go index 577fdcd44227..0dccb3ee5393 100644 --- a/pkg/sources/filesystem/filesystem_test.go +++ b/pkg/sources/filesystem/filesystem_test.go @@ -2,6 +2,7 @@ package filesystem import ( "os" + "path/filepath" "strings" "testing" "time" @@ -115,15 +116,36 @@ func TestScanFile(t *testing.T) { } func TestEnumerate(t *testing.T) { - t.Skip("TODO: refactor to allow a virtual filesystem") + // TODO: refactor to allow a virtual filesystem. t.Parallel() ctx := context.Background() // Setup the connection to test enumeration. + dir, err := os.MkdirTemp("", "trufflehog-test-enumerate") + assert.NoError(t, err) + defer os.RemoveAll(dir) + units := []string{ "/one", "/two", "/three", "/path/to/dir/", "/path/to/another/dir/", } + // Prefix the units with the tempdir and create files on disk. + for i, unit := range units { + fullPath := filepath.Join(dir, unit) + units[i] = fullPath + if i < 3 { + f, err := os.Create(fullPath) + assert.NoError(t, err) + f.Close() + } else { + assert.NoError(t, os.MkdirAll(fullPath, 0755)) + // Create a file in the directory for enumeration to find. + f, err := os.CreateTemp(fullPath, "file") + assert.NoError(t, err) + units[i] = f.Name() + f.Close() + } + } conn, err := anypb.New(&sourcespb.Filesystem{ Paths: units[0:3], Directories: units[3:],