-
Notifications
You must be signed in to change notification settings - Fork 1.7k
/
detectors.go
130 lines (113 loc) · 3.58 KB
/
detectors.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
package detectors
import (
"context"
"os"
"path/filepath"
"runtime"
"strings"
"unicode"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/source_metadatapb"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb"
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
)
// Detector defines an interface for scanning for and verifying secrets.
type Detector interface {
// FromData will scan bytes for results, and optionally verify them.
FromData(ctx context.Context, verify bool, data []byte) ([]Result, error)
// Keywords are used for efficiently pre-filtering chunks using substring operations.
// Use unique identifiers that are part of the secret if you can, or the provider name.
Keywords() []string
}
type Result struct {
// DetectorType is the type of Detector.
DetectorType detectorspb.DetectorType
Verified bool
// Raw contains the raw secret identifier data. Prefer IDs over secrets since it is used for deduping after hashing.
Raw []byte
// Redacted contains the redacted version of the raw secret identification data for display purposes.
// A secret ID should be used if available.
Redacted string
ExtraData map[string]string
StructuredData *detectorspb.StructuredData
}
type ResultWithMetadata struct {
// SourceMetadata contains source-specific contextual information.
SourceMetadata *source_metadatapb.MetaData
// SourceID is the ID of the source that the API uses to map secrets to specific sources.
SourceID int64
// SourceType is the type of Source.
SourceType sourcespb.SourceType
// SourceName is the name of the Source.
SourceName string
Result
}
// CopyMetadata returns a detector result with included metadata from the source chunk.
func CopyMetadata(chunk *sources.Chunk, result Result) ResultWithMetadata {
return ResultWithMetadata{
SourceMetadata: chunk.SourceMetadata,
SourceID: chunk.SourceID,
SourceType: chunk.SourceType,
SourceName: chunk.SourceName,
Result: result,
}
}
// CleanResults returns all verified secrets, and if there are no verified secrets,
// just one unverified secret if there are any.
func CleanResults(results []Result) []Result {
if len(results) == 0 {
return results
}
var cleaned = make(map[string]Result, 0)
for _, s := range results {
if s.Verified {
cleaned[s.Redacted] = s
}
}
if len(cleaned) == 0 {
return results[:1]
}
results = results[:0]
for _, r := range cleaned {
results = append(results, r)
}
return results
}
// PrefixRegex ensures that at least one of the given keywords is within
// 20 characters of the capturing group that follows.
// This can help prevent false positives.
func PrefixRegex(keywords []string) string {
pre := `(?i)(?:`
middle := strings.Join(keywords, "|")
post := `)(?:.|[\n\r]){0,40}`
return pre + middle + post
}
//KeyIsRandom is a Low cost check to make sure that 'keys' include a number to reduce FPs.
//Golang doesnt support regex lookaheads, so must be done in separate calls.
//TODO improve checks. Shannon entropy did not work well.
func KeyIsRandom(key string) bool {
for _, ch := range key {
if unicode.IsDigit(ch) {
return true
}
}
return false
}
func MustGetBenchmarkData() map[string][]byte {
_, filename, _, _ := runtime.Caller(0)
dir := filepath.Dir(filename)
small := make([]byte, 0)
medium, err := os.ReadFile(filepath.Join(dir, "detectors.go"))
if err != nil {
panic(err)
}
big := make([]byte, 0)
for i := 0; i < 25; i++ {
big = append(big, medium...)
}
return map[string][]byte{
"small": small,
"medium": medium,
"big": big,
}
}