-
Notifications
You must be signed in to change notification settings - Fork 1.9k
adding archive entry paths #3638
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 9 commits
8c4c83b
fc528d2
6cfae75
05fcfe6
6c1feea
6ec0688
704937a
48d9323
8ab76fa
4283b1d
ceefd1e
36bdef0
a37a6dd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,7 @@ import ( | |
"context" | ||
"errors" | ||
"fmt" | ||
"path/filepath" | ||
"time" | ||
|
||
"github.com/mholt/archiver/v4" | ||
|
@@ -86,7 +87,7 @@ func (h *archiveHandler) HandleFile(ctx logContext.Context, input fileReader) ch | |
}() | ||
|
||
start := time.Now() | ||
err := h.openArchive(ctx, 0, input, dataOrErrChan) | ||
err := h.openArchive(ctx, []string{}, input, dataOrErrChan) | ||
if err == nil { | ||
h.metrics.incFilesProcessed() | ||
} | ||
|
@@ -101,37 +102,37 @@ func (h *archiveHandler) HandleFile(ctx logContext.Context, input fileReader) ch | |
var ErrMaxDepthReached = errors.New("max archive depth reached") | ||
|
||
// openArchive recursively extracts content from an archive up to a maximum depth, handling nested archives if necessary. | ||
// It takes a reader from which it attempts to identify and process the archive format. Depending on the archive type, | ||
// it either decompresses or extracts the contents directly, sending data to the provided channel. | ||
// It takes a string representing the path to the archive and a reader from which it attempts to identify and process the archive format. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This comment reads "a string" but the actual function takes a string slice. Which is correct? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll fix that. That comment was from a previous version. |
||
// Depending on the archive type, it either decompresses or extracts the contents directly, sending data to the provided channel. | ||
// Returns an error if the archive cannot be processed due to issues like exceeding maximum depth or unsupported formats. | ||
func (h *archiveHandler) openArchive( | ||
ctx logContext.Context, | ||
depth int, | ||
archiveEntryPaths []string, | ||
reader fileReader, | ||
dataOrErrChan chan DataOrErr, | ||
) error { | ||
ctx.Logger().V(4).Info("Starting archive processing", "depth", depth) | ||
defer ctx.Logger().V(4).Info("Finished archive processing", "depth", depth) | ||
ctx.Logger().V(4).Info("Starting archive processing", "depth", len(archiveEntryPaths)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. imo just attach the paths here now that you have them There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you mean change |
||
defer ctx.Logger().V(4).Info("Finished archive processing", "depth", len(archiveEntryPaths)) | ||
|
||
if common.IsDone(ctx) { | ||
return ctx.Err() | ||
} | ||
|
||
if depth >= maxDepth { | ||
if len(archiveEntryPaths) >= maxDepth { | ||
h.metrics.incMaxArchiveDepthCount() | ||
return ErrMaxDepthReached | ||
} | ||
|
||
if reader.format == nil { | ||
if depth > 0 { | ||
return h.handleNonArchiveContent(ctx, newMimeTypeReaderFromFileReader(reader), dataOrErrChan) | ||
if len(archiveEntryPaths) > 0 { | ||
return h.handleNonArchiveContent(ctx, filepath.Join(archiveEntryPaths...), newMimeTypeReaderFromFileReader(reader), dataOrErrChan) | ||
} | ||
return fmt.Errorf("unknown archive format") | ||
} | ||
|
||
switch archive := reader.format.(type) { | ||
case archiver.Decompressor: | ||
// Decompress tha archive and feed the decompressed data back into the archive handler to extract any nested archives. | ||
// Decompress the archive and feed the decompressed data back into the archive handler to extract any nested archives. | ||
compReader, err := archive.OpenReader(reader) | ||
if err != nil { | ||
return fmt.Errorf("error opening decompressor with format: %s %w", reader.format.Name(), err) | ||
|
@@ -152,9 +153,11 @@ func (h *archiveHandler) openArchive( | |
} | ||
defer rdr.Close() | ||
|
||
return h.openArchive(ctx, depth+1, rdr, dataOrErrChan) | ||
// Note: We're limited in our ability to add file names to the archiveEntryPath here, as the decompressor doesn't have access to a fileName value. | ||
// We add a empty string so we can keep track of the archive depth. | ||
return h.openArchive(ctx, append(archiveEntryPaths, ""), rdr, dataOrErrChan) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we ever end up stringifying the slice of path parts, an empty string isn't going to maximally visually obvious as another level of depth, compared to like a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Commit 36bdef0 fixes an oversight from an earlier version and is relevant to this discussion. The main change is using
|
||
case archiver.Extractor: | ||
err := archive.Extract(logContext.WithValue(ctx, depthKey, depth+1), reader, nil, h.extractorHandler(dataOrErrChan)) | ||
err := archive.Extract(ctx, reader, nil, h.extractorHandler(archiveEntryPaths, dataOrErrChan)) | ||
if err != nil { | ||
return fmt.Errorf("error extracting archive with format: %s: %w", reader.format.Name(), err) | ||
} | ||
|
@@ -168,7 +171,7 @@ func (h *archiveHandler) openArchive( | |
// It logs the extraction, checks for cancellation, and decides whether to skip the file based on its name or type, | ||
// particularly for binary files if configured to skip. If the file is not skipped, it recursively calls openArchive | ||
// to handle nested archives or to continue processing based on the file's content and depth in the archive structure. | ||
func (h *archiveHandler) extractorHandler(dataOrErrChan chan DataOrErr) func(context.Context, archiver.File) error { | ||
func (h *archiveHandler) extractorHandler(archiveEntryPaths []string, dataOrErrChan chan DataOrErr) func(context.Context, archiver.File) error { | ||
return func(ctx context.Context, file archiver.File) error { | ||
lCtx := logContext.WithValues( | ||
logContext.AddLogger(ctx), | ||
|
@@ -186,11 +189,6 @@ func (h *archiveHandler) extractorHandler(dataOrErrChan chan DataOrErr) func(con | |
return ctx.Err() | ||
} | ||
|
||
depth := 0 | ||
if ctxDepth, ok := ctx.Value(depthKey).(int); ok { | ||
depth = ctxDepth | ||
} | ||
|
||
fileSize := file.Size() | ||
if int(fileSize) > maxSize { | ||
lCtx.Logger().V(2).Info("skipping file: size exceeds max allowed", "size", fileSize, "limit", maxSize) | ||
|
@@ -240,6 +238,6 @@ func (h *archiveHandler) extractorHandler(dataOrErrChan chan DataOrErr) func(con | |
h.metrics.observeFileSize(fileSize) | ||
|
||
lCtx.Logger().V(4).Info("Processed file successfully", "filename", file.Name(), "size", file.Size()) | ||
return h.openArchive(lCtx, depth, rdr, dataOrErrChan) | ||
return h.openArchive(lCtx, append(archiveEntryPaths, file.Name()), rdr, dataOrErrChan) | ||
} | ||
} |
Uh oh!
There was an error while loading. Please reload this page.