Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adding archive entry paths #3638

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
Prev Previous commit
Next Next commit
updated to combine archive paths + depth
  • Loading branch information
joeleonjr committed Nov 20, 2024
commit fc528d23a300acd060736c2281a9f391ecec5e6c
2 changes: 1 addition & 1 deletion pkg/engine/engine.go
Original file line number Diff line number Diff line change
@@ -1155,7 +1155,7 @@ func (e *Engine) processResult(
return
}

// Add in handler metadata
// Add in handler metadata. Existing extra data is not overwritten.
if res.ExtraData == nil && data.chunk.HandleMetadata != nil {
res.ExtraData = data.chunk.HandleMetadata
} else {
32 changes: 12 additions & 20 deletions pkg/handlers/archive.go
Original file line number Diff line number Diff line change
@@ -4,6 +4,7 @@ import (
"context"
"errors"
"fmt"
"path/filepath"
"time"

"github.com/mholt/archiver/v4"
@@ -86,7 +87,7 @@ func (h *archiveHandler) HandleFile(ctx logContext.Context, input fileReader) ch
}()

start := time.Now()
err := h.openArchive(ctx, 0, emptyFilePath, input, dataOrErrChan)
err := h.openArchive(ctx, []string{}, input, dataOrErrChan)
if err == nil {
h.metrics.incFilesProcessed()
}
@@ -106,26 +107,25 @@ var ErrMaxDepthReached = errors.New("max archive depth reached")
// Returns an error if the archive cannot be processed due to issues like exceeding maximum depth or unsupported formats.
func (h *archiveHandler) openArchive(
ctx logContext.Context,
depth int,
archiveEntryPath string,
archiveEntryPaths []string,
reader fileReader,
dataOrErrChan chan DataOrErr,
) error {
ctx.Logger().V(4).Info("Starting archive processing", "depth", depth)
defer ctx.Logger().V(4).Info("Finished archive processing", "depth", depth)
ctx.Logger().V(4).Info("Starting archive processing", "depth", len(archiveEntryPaths))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

imo just attach the paths here now that you have them

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean change depth to paths in the logs?

defer ctx.Logger().V(4).Info("Finished archive processing", "depth", len(archiveEntryPaths))

if common.IsDone(ctx) {
return ctx.Err()
}

if depth >= maxDepth {
if len(archiveEntryPaths) >= maxDepth {
h.metrics.incMaxArchiveDepthCount()
return ErrMaxDepthReached
}

if reader.format == nil {
if depth > 0 {
return h.handleNonArchiveContent(ctx, archiveEntryPath, newMimeTypeReaderFromFileReader(reader), dataOrErrChan)
if len(archiveEntryPaths) > 0 {
return h.handleNonArchiveContent(ctx, filepath.Join(archiveEntryPaths...), newMimeTypeReaderFromFileReader(reader), dataOrErrChan)
}
return fmt.Errorf("unknown archive format")
}
@@ -154,12 +154,9 @@ func (h *archiveHandler) openArchive(
defer rdr.Close()

// Note: We're limited in our ability to add file names to the archiveEntryPath here, as the decompressor doesn't have access to a fileName value. Instead, we're adding a generic string to indicate that the file is decompressed. This could be improved.
if depth > 0 {
archiveEntryPath = archiveEntryPath + "(decompressed " + reader.format.Name() + " file)"
}
return h.openArchive(ctx, depth+1, archiveEntryPath, rdr, dataOrErrChan)
return h.openArchive(ctx, append(archiveEntryPaths, "(decompressed "+reader.format.Name()+" file)"), rdr, dataOrErrChan)
case archiver.Extractor:
err := archive.Extract(logContext.WithValue(ctx, depthKey, depth+1), reader, nil, h.extractorHandler(archiveEntryPath, dataOrErrChan))
err := archive.Extract(ctx, reader, nil, h.extractorHandler(archiveEntryPaths, dataOrErrChan))
if err != nil {
return fmt.Errorf("error extracting archive with format: %s: %w", reader.format.Name(), err)
}
@@ -173,7 +170,7 @@ func (h *archiveHandler) openArchive(
// It logs the extraction, checks for cancellation, and decides whether to skip the file based on its name or type,
// particularly for binary files if configured to skip. If the file is not skipped, it recursively calls openArchive
// to handle nested archives or to continue processing based on the file's content and depth in the archive structure.
func (h *archiveHandler) extractorHandler(archiveEntryPath string, dataOrErrChan chan DataOrErr) func(context.Context, archiver.File) error {
func (h *archiveHandler) extractorHandler(archiveEntryPaths []string, dataOrErrChan chan DataOrErr) func(context.Context, archiver.File) error {
return func(ctx context.Context, file archiver.File) error {
lCtx := logContext.WithValues(
logContext.AddLogger(ctx),
@@ -191,11 +188,6 @@ func (h *archiveHandler) extractorHandler(archiveEntryPath string, dataOrErrChan
return ctx.Err()
}

depth := 0
if ctxDepth, ok := ctx.Value(depthKey).(int); ok {
depth = ctxDepth
}

fileSize := file.Size()
if int(fileSize) > maxSize {
lCtx.Logger().V(2).Info("skipping file: size exceeds max allowed", "size", fileSize, "limit", maxSize)
@@ -245,6 +237,6 @@ func (h *archiveHandler) extractorHandler(archiveEntryPath string, dataOrErrChan
h.metrics.observeFileSize(fileSize)

lCtx.Logger().V(4).Info("Processed file successfully", "filename", file.Name(), "size", file.Size())
return h.openArchive(lCtx, depth, (archiveEntryPath + "/" + file.Name()), rdr, dataOrErrChan)
return h.openArchive(lCtx, append(archiveEntryPaths, file.Name()), rdr, dataOrErrChan)
}
}
16 changes: 8 additions & 8 deletions pkg/handlers/archive_test.go
Original file line number Diff line number Diff line change
@@ -26,15 +26,15 @@ func TestArchiveHandler(t *testing.T) {
1,
"AKIAYVP4CIPPH5TNP3SW",
false,
"",
"(decompressed .gz file)",
},
"gzip-nested": {
"https://raw.githubusercontent.com/bill-rich/bad-secrets/master/double-zip.gz",
1,
"AKIAYVP4CIPPH5TNP3SW",
false,
// This is b/c we can't get file path from nested archiver.OpenReader()
"(decompressed .gz file)",
"(decompressed .gz file)/(decompressed .gz file)",
},
"gzip-too-deep": {
"https://raw.githubusercontent.com/bill-rich/bad-secrets/master/six-zip.gz",
@@ -48,14 +48,14 @@ func TestArchiveHandler(t *testing.T) {
1,
"AKIAYVP4CIPPH5TNP3SW",
false,
"/aws-canary-creds",
"aws-canary-creds",
},
"tar-nested": {
"https://raw.githubusercontent.com/bill-rich/bad-secrets/master/two.tar",
1,
"AKIAYVP4CIPPH5TNP3SW",
false,
"/one.tar/aws-canary-creds",
"one.tar/aws-canary-creds",
},
"tar-too-deep": {
"https://raw.githubusercontent.com/bill-rich/bad-secrets/master/six.tar",
@@ -69,21 +69,21 @@ func TestArchiveHandler(t *testing.T) {
1,
"AKIAYVP4CIPPH5TNP3SW",
false,
"/aws-canary-creds",
"(decompressed .tar.gz file)/aws-canary-creds",
},
"gzip-large": {
"https://raw.githubusercontent.com/bill-rich/bad-secrets/master/FifteenMB.gz",
1543,
"AKIAYVP4CIPPH5TNP3SW",
false,
"",
"(decompressed .gz file)",
},
"zip-single": {
"https://raw.githubusercontent.com/bill-rich/bad-secrets/master/aws-canary-creds.zip",
1,
"AKIAYVP4CIPPH5TNP3SW",
false,
"/aws-canary-creds",
"aws-canary-creds",
},
}

@@ -137,6 +137,6 @@ func TestOpenInvalidArchive(t *testing.T) {

dataOrErrChan := make(chan DataOrErr)

err = handler.openArchive(ctx, 0, emptyFilePath, rdr, dataOrErrChan)
err = handler.openArchive(ctx, []string{}, rdr, dataOrErrChan)
assert.Error(t, err)
}
2 changes: 1 addition & 1 deletion pkg/handlers/rpm.go
Original file line number Diff line number Diff line change
@@ -115,7 +115,7 @@ func (h *rpmHandler) processRPMFiles(
return fmt.Errorf("error creating mime-type reader: %w", err)
}

// ToDo: Update processRPMFiles to accomdate nested archives. Once completed,
// ToDo: Update processRPMFiles to accommodate nested archives. Once completed,
// adjust the emptyFilePath value to reflect the actual file path.
if err := h.handleNonArchiveContent(fileCtx, emptyFilePath, rdr, dataOrErrChan); err != nil {
dataOrErrChan <- DataOrErr{
Loading
Oops, something went wrong.