Skip to content

Commit 1cdadf1

Browse files
Fixed Kontent Detector (#4122)
* fixed kontent detector * avoid false positives for env uuid
1 parent e42153d commit 1cdadf1

File tree

3 files changed

+94
-48
lines changed

3 files changed

+94
-48
lines changed

pkg/detectors/kontent/kontent.go

Lines changed: 75 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@ package kontent
33
import (
44
"context"
55
"fmt"
6-
regexp "github.com/wasilibs/go-re2"
6+
"io"
77
"net/http"
88
"strings"
99

10+
regexp "github.com/wasilibs/go-re2"
11+
1012
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
1113
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
1214
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
@@ -19,9 +21,12 @@ var _ detectors.Detector = (*Scanner)(nil)
1921

2022
var (
2123
client = common.SaneHttpClient()
22-
2324
// Make sure that your group is surrounded in boundary characters such as below to reduce false positives.
24-
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"kontent"}) + `\b([a-z0-9-]{36})\b`)
25+
apiKeyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"kontent"}) + common.BuildRegexJWT("30,34", "200,400", "40,43"))
26+
envIDPat = regexp.MustCompile(detectors.PrefixRegex([]string{"kontent", "env"}) + common.UUIDPattern)
27+
28+
// API return this error when the environment does not exist or the api key does not have the persmission to access that environment
29+
envErr = "The specified API key does not provide the permissions required to access the environment"
2530
)
2631

2732
// Keywords are used for efficiently pre-filtering chunks.
@@ -34,31 +39,40 @@ func (s Scanner) Keywords() []string {
3439
func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
3540
dataStr := string(data)
3641

37-
matches := keyPat.FindAllStringSubmatch(dataStr, -1)
42+
var uniqueAPIKeys, uniqueEnvIDs = make(map[string]struct{}), make(map[string]struct{})
3843

39-
for _, match := range matches {
40-
resMatch := strings.TrimSpace(match[1])
44+
for _, apiKey := range apiKeyPat.FindAllStringSubmatch(dataStr, -1) {
45+
uniqueAPIKeys[apiKey[1]] = struct{}{}
46+
}
4147

42-
s1 := detectors.Result{
43-
DetectorType: detectorspb.DetectorType_Kontent,
44-
Raw: []byte(resMatch),
48+
for _, envID := range envIDPat.FindAllStringSubmatch(dataStr, -1) {
49+
uniqueEnvIDs[envID[1]] = struct{}{}
50+
}
51+
52+
for envID := range uniqueEnvIDs {
53+
if _, ok := detectors.UuidFalsePositives[detectors.FalsePositive(envID)]; ok {
54+
continue
55+
}
56+
57+
if detectors.StringShannonEntropy(envID) < 3 {
58+
continue
4559
}
4660

47-
if verify {
48-
req, err := http.NewRequestWithContext(ctx, "GET", fmt.Sprintf("https://deliver.kontent.ai/%s/items", resMatch), nil)
49-
if err != nil {
50-
continue
61+
for apiKey := range uniqueAPIKeys {
62+
s1 := detectors.Result{
63+
DetectorType: detectorspb.DetectorType_Kontent,
64+
Raw: []byte(envID),
65+
RawV2: []byte(envID + apiKey),
5166
}
52-
res, err := client.Do(req)
53-
if err == nil {
54-
defer res.Body.Close()
55-
if res.StatusCode >= 200 && res.StatusCode < 300 {
56-
s1.Verified = true
57-
}
67+
68+
if verify {
69+
isVerified, verificationErr := verifyKontentAPIKey(client, envID, apiKey)
70+
s1.Verified = isVerified
71+
s1.SetVerificationError(verificationErr)
5872
}
59-
}
6073

61-
results = append(results, s1)
74+
results = append(results, s1)
75+
}
6276
}
6377

6478
return results, nil
@@ -71,3 +85,43 @@ func (s Scanner) Type() detectorspb.DetectorType {
7185
func (s Scanner) Description() string {
7286
return "Kontent is a headless CMS (Content Management System) that allows users to manage and deliver content to any device or application. Kontent API keys can be used to access and manage this content."
7387
}
88+
89+
// api docs: https://kontent.ai/learn/docs/apis/openapi/management-api-v2/#operation/retrieve-environment-information
90+
func verifyKontentAPIKey(client *http.Client, envID, apiKey string) (bool, error) {
91+
req, err := http.NewRequest(http.MethodGet, fmt.Sprintf("https://manage.kontent.ai/v2/projects/%s", envID), nil)
92+
if err != nil {
93+
return false, nil
94+
}
95+
96+
req.Header.Add("Authorization", "Bearer "+apiKey)
97+
98+
resp, err := client.Do(req)
99+
if err != nil {
100+
return false, err
101+
}
102+
103+
defer func() {
104+
_, _ = io.Copy(io.Discard, resp.Body)
105+
_ = resp.Body.Close()
106+
}()
107+
108+
switch resp.StatusCode {
109+
case http.StatusOK:
110+
return true, nil
111+
case http.StatusForbidden:
112+
bodyBytes, err := io.ReadAll(resp.Body)
113+
if err != nil {
114+
return false, err
115+
}
116+
117+
if strings.Contains(string(bodyBytes), envErr) {
118+
return true, nil
119+
}
120+
121+
return false, nil
122+
case http.StatusUnauthorized:
123+
return false, nil
124+
default:
125+
return false, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
126+
}
127+
}

pkg/detectors/kontent/kontent_integration_test.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,13 @@ import (
1919
func TestKontent_FromChunk(t *testing.T) {
2020
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
2121
defer cancel()
22-
testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors1")
22+
testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors5")
2323
if err != nil {
2424
t.Fatalf("could not get test secrets from GCP: %s", err)
2525
}
26-
secret := testSecrets.MustGetField("KONTENT")
26+
27+
envID := testSecrets.MustGetField("KONTENT_ENV_ID")
28+
secret := testSecrets.MustGetField("KONTENT_API_KEY")
2729
inactiveSecret := testSecrets.MustGetField("KONTENT_INACTIVE")
2830

2931
type args struct {
@@ -43,7 +45,7 @@ func TestKontent_FromChunk(t *testing.T) {
4345
s: Scanner{},
4446
args: args{
4547
ctx: context.Background(),
46-
data: []byte(fmt.Sprintf("You can find a kontent secret %s within", secret)),
48+
data: []byte(fmt.Sprintf("You can find a kontent env id: %s and kontent secret %s within", envID, secret)),
4749
verify: true,
4850
},
4951
want: []detectors.Result{
@@ -59,7 +61,7 @@ func TestKontent_FromChunk(t *testing.T) {
5961
s: Scanner{},
6062
args: args{
6163
ctx: context.Background(),
62-
data: []byte(fmt.Sprintf("You can find a kontent secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation
64+
data: []byte(fmt.Sprintf("You can find a kontent env id: %s and kontent secret %s within but not valid", envID, inactiveSecret)), // the secret would satisfy the regex but not pass validation
6365
verify: true,
6466
},
6567
want: []detectors.Result{
@@ -95,6 +97,7 @@ func TestKontent_FromChunk(t *testing.T) {
9597
t.Fatal("no raw secret present")
9698
}
9799
got[i].Raw = nil
100+
got[i].RawV2 = nil
98101
}
99102
if diff := pretty.Compare(got, tt.want); diff != "" {
100103
t.Errorf("Kontent.FromData() %s diff: (-got +want)\n%s", tt.name, diff)

pkg/detectors/kontent/kontent_test.go

Lines changed: 12 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ package kontent
22

33
import (
44
"context"
5-
"fmt"
65
"testing"
76

87
"github.com/google/go-cmp/cmp"
@@ -11,12 +10,6 @@ import (
1110
"github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick"
1211
)
1312

14-
var (
15-
validPattern = "jca9is4icbynssyi1y4spdwcbwe3vwv9jn4d"
16-
invalidPattern = "jca9is4icbynssyi1y4spdwcbwe3vwv9jn4"
17-
keyword = "kontent"
18-
)
19-
2013
func TestKontent_Pattern(t *testing.T) {
2114
d := Scanner{}
2215
ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d})
@@ -26,24 +19,20 @@ func TestKontent_Pattern(t *testing.T) {
2619
want []string
2720
}{
2821
{
29-
name: "valid pattern - with keyword kontent",
30-
input: fmt.Sprintf("%s token = '%s'", keyword, validPattern),
31-
want: []string{validPattern},
32-
},
33-
{
34-
name: "valid pattern - ignore duplicate",
35-
input: fmt.Sprintf("%s token = '%s' | '%s'", keyword, validPattern, validPattern),
36-
want: []string{validPattern},
37-
},
38-
{
39-
name: "valid pattern - key out of prefix range",
40-
input: fmt.Sprintf("%s keyword is not close to the real key in the data\n = '%s'", keyword, validPattern),
41-
want: []string{},
22+
name: "valid pattern - with keyword kontent",
23+
input: `
24+
// the following are credentials for kontent.ai APIs - do not share with anyone
25+
kontent_personal_api_key = eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJqdGkiOiJjOTE4OThlMWZlMGI0NDcwOTczOGM0ZmE0YzVlYzk0MyIsImlhdCI6MTc0NjUyNzQyNSwibmJmIjoxNzQ2NTI3NDI1LCJleHAiOjE3NjI0MjQ5NDAsInZlciI6IjMuMC4wIiwidWlkIjoidmlydHVhbF8zNTI4OGIxNC00YmE3LTQ5MzgtODZiNC1lYjFhYjczMDBiZTciLCJzY29wZV9pZCI6IjAyYmYxZDg5NzYzMjQ3ZWE4MTFkYjkwMjVhYjc0MTRhIiwicHJvamVjdF9jb250YWluZXJfaWQiOiI0MDFkMzg1NmMyYzUwMGZlOTYwMTE5YzFhMThkNWY4OCIsImF1ZCI6Im1hbmFnZS5rZW50aWNvY2xvdWQuY29tIn0.yfZTic9Zba6Dui8N6UO6t-SGbZYf17bKAd-uJ9enYPw
26+
kontent_env_id = 3d5f4d88-0511-00b3-37f1-31bb55c25ab4`,
27+
want: []string{"3d5f4d88-0511-00b3-37f1-31bb55c25ab4eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJqdGkiOiJjOTE4OThlMWZlMGI0NDcwOTczOGM0ZmE0YzVlYzk0MyIsImlhdCI6MTc0NjUyNzQyNSwibmJmIjoxNzQ2NTI3NDI1LCJleHAiOjE3NjI0MjQ5NDAsInZlciI6IjMuMC4wIiwidWlkIjoidmlydHVhbF8zNTI4OGIxNC00YmE3LTQ5MzgtODZiNC1lYjFhYjczMDBiZTciLCJzY29wZV9pZCI6IjAyYmYxZDg5NzYzMjQ3ZWE4MTFkYjkwMjVhYjc0MTRhIiwicHJvamVjdF9jb250YWluZXJfaWQiOiI0MDFkMzg1NmMyYzUwMGZlOTYwMTE5YzFhMThkNWY4OCIsImF1ZCI6Im1hbmFnZS5rZW50aWNvY2xvdWQuY29tIn0.yfZTic9Zba6Dui8N6UO6t-SGbZYf17bKAd-uJ9enYPw"},
4228
},
4329
{
44-
name: "invalid pattern",
45-
input: fmt.Sprintf("%s = '%s'", keyword, invalidPattern),
46-
want: []string{},
30+
name: "invalid pattern",
31+
input: `
32+
// the following are credentials for kontent.ai APIs - do not share with anyone
33+
kontent_personal_api_key = eyJhbGciOiJIUzI1NiIsInR5cCVCJ9.eyJqdGkiOiJjOTE4OThlMWZlMGI0NDcwOTczOGM0ZmE0YzVlYzk0MyIsImlhdCI6MTc0NjUyNzQyNSwibmJmIjoxNzQ2NTI3NDI1LCJleHAiOjE3NjI0MjQ5NDAsInZlciI6IjMuMC4wIiwidWlkIjoidmlydHVhbF8zNTI4OGIxNC00YmE3LTQ5MzgtODZiNC1lYjFhYjczMDBiZTciLCJzY29wZV9pZCI6IjAyYmYxZDg5NzYzMjQ3ZWE4MTFkYjkwMjVhYjc0MTRhIiwicHJvamVjdF9jb250YWluZXJfaWQiOiI0MDFkMzg1NmMyYzUwMGZlOTYwMTE5YzFhMThkNWY4OCIsImF1ZCI6Im1hbmFnZS5rZW50aWNvY2xvdWQuY29tIn0.yfZTic9Zba6Dui8N6UO6t-SGbZYf17bKAd-uJ9enYPw
34+
kontent_env_id = 3d5f4d88-051-00b3-37f1-31bb55c25ab4`,
35+
want: []string{},
4736
},
4837
}
4938

0 commit comments

Comments
 (0)