diff --git a/.github/workflows/secrets.yml b/.github/workflows/secrets.yml index 6f9c90780035..bd564fccd5b5 100644 --- a/.github/workflows/secrets.yml +++ b/.github/workflows/secrets.yml @@ -24,4 +24,4 @@ jobs: uses: ./ id: dogfood with: - extra_args: --only-verified \ No newline at end of file + extra_args: --results=verified,unknown diff --git a/README.md b/README.md index 89ada6fce731..0322999dc7ae 100644 --- a/README.md +++ b/README.md @@ -267,9 +267,11 @@ Flags: -j, --json Output in JSON format. --json-legacy Use the pre-v3.0 JSON format. Only works with git, gitlab, and github sources. --github-actions Output in GitHub Actions format. - --concurrency=8 Number of concurrent workers. + --concurrency=20 Number of concurrent workers. --no-verification Don't verify the results. --only-verified Only output verified results. + --allow-verification-overlap + Allow verification of similar credentials across detectors --filter-unverified Only output first unverified result per chunk per detector if there are more than one results. --filter-entropy=FILTER-ENTROPY Filter unverified results with Shannon entropy. Start with 3.0. @@ -279,6 +281,7 @@ Flags: --no-update Don't check for updates. --fail Exit with code 183 if results are found. --verifier=VERIFIER ... Set custom verification endpoints. + --custom-verifiers-only Only use custom verification endpoints. --archive-max-size=ARCHIVE-MAX-SIZE Maximum size of archive to scan. (Byte units eg. 512B, 2KB, 4MB) --archive-max-depth=ARCHIVE-MAX-DEPTH diff --git a/main.go b/main.go index 47a233b89ecc..650f57551710 100644 --- a/main.go +++ b/main.go @@ -38,18 +38,20 @@ import ( ) var ( - cli = kingpin.New("TruffleHog", "TruffleHog is a tool for finding credentials.") - cmd string - debug = cli.Flag("debug", "Run in debug mode.").Bool() - trace = cli.Flag("trace", "Run in trace mode.").Bool() - profile = cli.Flag("profile", "Enables profiling and sets a pprof and fgprof server on :18066.").Bool() - localDev = cli.Flag("local-dev", "Hidden feature to disable overseer for local dev.").Hidden().Bool() - jsonOut = cli.Flag("json", "Output in JSON format.").Short('j').Bool() - jsonLegacy = cli.Flag("json-legacy", "Use the pre-v3.0 JSON format. Only works with git, gitlab, and github sources.").Bool() - gitHubActionsFormat = cli.Flag("github-actions", "Output in GitHub Actions format.").Bool() - concurrency = cli.Flag("concurrency", "Number of concurrent workers.").Default(strconv.Itoa(runtime.NumCPU())).Int() - noVerification = cli.Flag("no-verification", "Don't verify the results.").Bool() - onlyVerified = cli.Flag("only-verified", "Only output verified results.").Bool() + cli = kingpin.New("TruffleHog", "TruffleHog is a tool for finding credentials.") + cmd string + debug = cli.Flag("debug", "Run in debug mode.").Bool() + trace = cli.Flag("trace", "Run in trace mode.").Bool() + profile = cli.Flag("profile", "Enables profiling and sets a pprof and fgprof server on :18066.").Bool() + localDev = cli.Flag("local-dev", "Hidden feature to disable overseer for local dev.").Hidden().Bool() + jsonOut = cli.Flag("json", "Output in JSON format.").Short('j').Bool() + jsonLegacy = cli.Flag("json-legacy", "Use the pre-v3.0 JSON format. Only works with git, gitlab, and github sources.").Bool() + gitHubActionsFormat = cli.Flag("github-actions", "Output in GitHub Actions format.").Bool() + concurrency = cli.Flag("concurrency", "Number of concurrent workers.").Default(strconv.Itoa(runtime.NumCPU())).Int() + noVerification = cli.Flag("no-verification", "Don't verify the results.").Bool() + onlyVerified = cli.Flag("only-verified", "Only output verified results.").Bool() + results = cli.Flag("results", "Specifies which type(s) of results to output: verified, unknown, unverified. Defaults to all types.").Hidden().String() + allowVerificationOverlap = cli.Flag("allow-verification-overlap", "Allow verification of similar credentials across detectors").Bool() filterUnverified = cli.Flag("filter-unverified", "Only output first unverified result per chunk per detector if there are more than one results.").Bool() filterEntropy = cli.Flag("filter-entropy", "Filter unverified results with Shannon entropy. Start with 3.0.").Float64() @@ -405,6 +407,17 @@ func run(state overseer.State) { if *jobReportFile != nil { jobReportWriter = *jobReportFile } + + // Parse --results flag. + if *onlyVerified { + r := "verified" + results = &r + } + parsedResults, err := parseResults(results) + if err != nil { + logFatal(err, "failed to configure results flag") + } + e, err := engine.Start(ctx, engine.WithConcurrency(*concurrency), engine.WithDecoders(decoders.DefaultDecoders()...), @@ -415,7 +428,7 @@ func run(state overseer.State) { engine.WithFilterDetectors(excludeFilter), engine.WithFilterDetectors(endpointCustomizer), engine.WithFilterUnverified(*filterUnverified), - engine.WithOnlyVerified(*onlyVerified), + engine.WithResults(parsedResults), engine.WithPrintAvgDetectorTime(*printAvgDetectorTime), engine.WithPrinter(printer), engine.WithFilterEntropy(*filterEntropy), @@ -594,6 +607,33 @@ func run(state overseer.State) { } } +// parseResults ensures that users provide valid CSV input to `--results`. +// +// This is a work-around to kingpin not supporting CSVs. +// See: https://github.com/trufflesecurity/trufflehog/pull/2372#issuecomment-1983868917 +func parseResults(input *string) (map[string]struct{}, error) { + if *input == "" { + return nil, nil + } + + + var ( + values = strings.Split(strings.ToLower(*input), ",") + results = make(map[string]struct{}, 3) + ) + for _, value := range values { + switch value { + case "verified": + case "unknown": + case "unverified": + results[value] = struct{}{} + default: + return nil, fmt.Errorf("invalid value '%s', valid values are 'verified,unknown,unverified'", value) + } + } + return results, nil +} + // logFatalFunc returns a log.Fatal style function. Calling the returned // function will terminate the program without cleanup. func logFatalFunc(logger logr.Logger) func(error, string, ...any) { diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index 4acc65b6ef60..9d6e1d609278 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -68,10 +68,12 @@ type Engine struct { // only the first one will be kept. filterUnverified bool // entropyFilter is used to filter out unverified results using Shannon entropy. - filterEntropy *float64 - onlyVerified bool - verificationOverlap bool - printAvgDetectorTime bool + filterEntropy *float64 + notifyVerifiedResults bool + notifyUnverifiedResults bool + notifyUnknownResults bool + verificationOverlap bool + printAvgDetectorTime bool // ahoCorasickHandler manages the Aho-Corasick trie and related keyword lookups. ahoCorasickCore *ahocorasick.AhoCorasickCore @@ -164,11 +166,21 @@ func WithFilterEntropy(entropy float64) Option { } } -// WithOnlyVerified sets the onlyVerified flag on the engine. If set to true, -// the engine will only print verified results. -func WithOnlyVerified(onlyVerified bool) Option { +// WithResults defines which results will be printed by the engine. +func WithResults(results map[string]struct{}) Option { return func(e *Engine) { - e.onlyVerified = onlyVerified + if len(results) == 0 { + return + } + + _, ok := results["verified"] + e.notifyVerifiedResults = ok + + _, ok = results["unknown"] + e.notifyUnknownResults = ok + + _, ok = results["unverified"] + e.notifyUnverifiedResults = ok } } @@ -364,6 +376,9 @@ func (e *Engine) initialize(ctx context.Context, options ...Option) error { // The buffer sizes for these channels are set to multiples of defaultChannelBuffer, // considering the expected concurrency and workload in the system. e.detectableChunksChan = make(chan detectableChunk, defaultChannelBuffer*detectableChunksChanMultiplier) + e.notifyVerifiedResults = true + e.notifyUnknownResults = true + e.notifyUnverifiedResults = true e.verificationOverlapChunksChan = make(chan verificationOverlapChunk, defaultChannelBuffer*verificationOverlapChunksChanMultiplier) e.results = make(chan detectors.ResultWithMetadata, defaultChannelBuffer) e.dedupeCache = cache @@ -849,7 +864,20 @@ func (e *Engine) processResult(ctx context.Context, data detectableChunk, res de func (e *Engine) notifyResults(ctx context.Context) { for r := range e.ResultsChan() { - if e.onlyVerified && !r.Verified { + // Filter unwanted results, based on `--results`. + if !r.Verified { + if r.VerificationError() != nil { + if !e.notifyUnknownResults { + // Skip results with verification errors. + continue + } + } else if !e.notifyUnverifiedResults { + // Skip unverified results. + continue + } + } else if !e.notifyVerifiedResults { + // Skip verified results. + // TODO: Is this a legitimate use case? continue } atomic.AddUint32(&e.numFoundResults, 1) diff --git a/pkg/output/plain.go b/pkg/output/plain.go index fe2ce3778fe4..81afc832f5da 100644 --- a/pkg/output/plain.go +++ b/pkg/output/plain.go @@ -51,7 +51,7 @@ func (p *PlainPrinter) Print(_ context.Context, r *detectors.ResultWithMetadata) boldGreenPrinter.Print("✅ Found verified result 🐷🔑\n") } else if out.VerificationError != nil { printer = yellowPrinter - boldYellowPrinter.Print("⚠️ Found result - unable to verify due to error 🐷🔑❗️\n") + boldYellowPrinter.Print("⚠️ Found result - unable to verify due to error 🐷🔑❗️\n") printer.Printf("Verification Error: %s\n", out.VerificationError) } else { printer = whitePrinter diff --git a/pkg/tui/pages/source_configure/trufflehog_configure.go b/pkg/tui/pages/source_configure/trufflehog_configure.go index a603e1c24e66..c3de73d535b4 100644 --- a/pkg/tui/pages/source_configure/trufflehog_configure.go +++ b/pkg/tui/pages/source_configure/trufflehog_configure.go @@ -69,7 +69,7 @@ func (m truffleCmdModel) Cmd() string { } if isTrue(inputs["only-verified"].Value) { - command = append(command, "--only-verified") + command = append(command, "--results=verified") } if inputs["exclude_detectors"].Value != "" {