Skip to content

Commit

Permalink
optimize keywords (#841)
Browse files Browse the repository at this point in the history
* optimize keywords

* use defaults for concurrency again
  • Loading branch information
zricethezav committed Apr 16, 2022
1 parent e23f7f5 commit 48b79fa
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 17 deletions.
10 changes: 8 additions & 2 deletions config/config.go
Expand Up @@ -43,10 +43,14 @@ type Config struct {
Description string
Rules []*Rule
Allowlist Allowlist
Keywords []string
}

func (vc *ViperConfig) Translate() (Config, error) {
var rules []*Rule
var (
rules []*Rule
keywords []string
)
for _, r := range vc.Rules {
var allowlistRegexes []*regexp.Regexp
for _, a := range r.Allowlist.Regexes {
Expand All @@ -59,6 +63,8 @@ func (vc *ViperConfig) Translate() (Config, error) {

if r.Keywords == nil {
r.Keywords = []string{}
} else {
keywords = append(keywords, r.Keywords...)
}

if r.Tags == nil {
Expand Down Expand Up @@ -96,7 +102,6 @@ func (vc *ViperConfig) Translate() (Config, error) {
return Config{}, fmt.Errorf("%s invalid regex secret group %d, max regex secret group %d", r.Description, r.SecretGroup, r.Regex.NumSubexp())
}
rules = append(rules, r)

}
var allowlistRegexes []*regexp.Regexp
for _, a := range vc.Allowlist.Regexes {
Expand All @@ -114,5 +119,6 @@ func (vc *ViperConfig) Translate() (Config, error) {
Paths: allowlistPaths,
Commits: vc.Allowlist.Commits,
},
Keywords: keywords,
}, nil
}
60 changes: 45 additions & 15 deletions detect/detect.go
Expand Up @@ -16,6 +16,7 @@ import (
"github.com/fatih/semgroup"
"github.com/gitleaks/go-gitdiff/gitdiff"
"github.com/h2non/filetype"
ahocorasick "github.com/petar-dambovaliev/aho-corasick"
"github.com/rs/zerolog/log"
"github.com/spf13/viper"
)
Expand Down Expand Up @@ -59,6 +60,10 @@ type Detector struct {
// of the detector's scan which can then be used to generate a
// report.
findings []report.Finding

// prefilter is a ahocorasick struct used for doing efficient string
// matching given a set of words (keywords from the rules in the config)
prefilter ahocorasick.AhoCorasick
}

// Fragment contains the data to be scanned
Expand All @@ -75,15 +80,27 @@ type Fragment struct {
// newlineIndices is a list of indices of newlines in the raw content.
// This is used to calculate the line location of a finding
newlineIndices [][]int

// keywords is a map of all the keywords contain within the contents
// of this fragment
keywords map[string]bool
}

// NewDetector creates a new detector with the given config
func NewDetector(cfg config.Config) *Detector {
builder := ahocorasick.NewAhoCorasickBuilder(ahocorasick.Opts{
AsciiCaseInsensitive: true,
MatchOnlyWholeWords: false,
MatchKind: ahocorasick.LeftMostLongestMatch,
DFA: true,
})

return &Detector{
commitMap: make(map[string]bool),
findingMutex: &sync.Mutex{},
findings: make([]report.Finding, 0),
Config: cfg,
prefilter: builder.Build(cfg.Keywords),
}
}

Expand Down Expand Up @@ -154,18 +171,6 @@ func (d *Detector) detectRule(fragment Fragment, rule *config.Rule) []report.Fin
return findings
}

containsKeyword := false
for _, k := range rule.Keywords {
if strings.Contains(strings.ToLower(fragment.Raw),
strings.ToLower(k)) {
containsKeyword = true
break
}
}
if !containsKeyword && len(rule.Keywords) != 0 {
return findings
}

matchIndices := rule.Regex.FindAllStringIndex(fragment.Raw, -1)
for _, matchIndex := range matchIndices {
// extract secret from match
Expand Down Expand Up @@ -194,13 +199,13 @@ func (d *Detector) detectRule(fragment Fragment, rule *config.Rule) []report.Fin
gitleaksAllowSignature) {
continue
}

// check if the secret is in the allowlist
if rule.Allowlist.RegexAllowed(finding.Secret) ||
d.Config.Allowlist.RegexAllowed(finding.Secret) {
continue
}

// extract secret from secret group if set
if rule.SecretGroup != 0 {
groups := rule.Regex.FindStringSubmatch(secret)
Expand Down Expand Up @@ -374,6 +379,10 @@ func (d *Detector) DetectFiles(source string) ([]report.Finding, error) {
// Detect scans the given fragment and returns a list of findings
func (d *Detector) Detect(fragment Fragment) []report.Finding {
var findings []report.Finding

// initiate fragment keywords
fragment.keywords = make(map[string]bool)

// check if filepath is allowed
if fragment.FilePath != "" && (d.Config.Allowlist.PathAllowed(fragment.FilePath) ||
fragment.FilePath == d.Config.Path) {
Expand All @@ -383,8 +392,29 @@ func (d *Detector) Detect(fragment Fragment) []report.Finding {
// add newline indices for location calculation in detectRule
fragment.newlineIndices = regexp.MustCompile("\n").FindAllStringIndex(fragment.Raw, -1)

// build keyword map for prefiltering rules
matches := d.prefilter.FindAll(strings.ToLower(fragment.Raw))
for _, m := range matches {
fragment.keywords[strings.ToLower(fragment.Raw[m.Start():m.End()])] = true
}

for _, rule := range d.Config.Rules {
findings = append(findings, d.detectRule(fragment, rule)...)
if len(rule.Keywords) == 0 {
// if not keywords are associated with the rule always scan the
// fragment using the rule
findings = append(findings, d.detectRule(fragment, rule)...)
continue
}
fragmentContainsKeyword := false
// check if keywords are in the fragment
for _, k := range rule.Keywords {
if _, ok := fragment.keywords[strings.ToLower(k)]; ok {
fragmentContainsKeyword = true
}
}
if fragmentContainsKeyword {
findings = append(findings, d.detectRule(fragment, rule)...)
}
}
return filter(findings, d.Redact)
}
Expand Down
1 change: 1 addition & 0 deletions go.mod
Expand Up @@ -20,6 +20,7 @@ require (
github.com/magiconair/properties v1.8.5 // indirect
github.com/mitchellh/mapstructure v1.4.1 // indirect
github.com/pelletier/go-toml v1.9.3 // indirect
github.com/petar-dambovaliev/aho-corasick v0.0.0-20211021192214-5ab2d9280aa9 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/spf13/afero v1.6.0 // indirect
github.com/spf13/cast v1.3.1 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Expand Up @@ -203,6 +203,8 @@ github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3Rllmb
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
github.com/pelletier/go-toml v1.9.3 h1:zeC5b1GviRUyKYd6OJPvBU/mcVDVoL1OhT17FCt5dSQ=
github.com/pelletier/go-toml v1.9.3/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
github.com/petar-dambovaliev/aho-corasick v0.0.0-20211021192214-5ab2d9280aa9 h1:lL+y4Xv20pVlCGyLzNHRC0I0rIHhIL1lTvHizoS/dU8=
github.com/petar-dambovaliev/aho-corasick v0.0.0-20211021192214-5ab2d9280aa9/go.mod h1:EHPiTAKtiFmrMldLUNswFwfZ2eJIYBHktdaUTZxYWRw=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/sftp v1.10.1/go.mod h1:lYOWFsE0bwd1+KfKJaKeuokY15vzFx25BLbzYYoAxZI=
Expand Down

0 comments on commit 48b79fa

Please sign in to comment.