Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added azure COSMOSDB detector #3951

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 169 additions & 0 deletions pkg/detectors/azure_cosmosdb/azure_cosmosdb.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
package azure_cosmosdb

import (
"context"
"crypto/hmac"
"crypto/sha256"
"encoding/base64"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"regexp"
"strings"
"time"

"github.com/trufflesecurity/trufflehog/v3/pkg/cache/simple"
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)

type Scanner struct {
client *http.Client
}

var (
defaultClient = common.SaneHttpClient()

dbKeyPattern = regexp.MustCompile(`([A-Za-z0-9+/]{86}==)`)
// account name can contain only lowercase letters, numbers and the `-` character, must be between 3 and 44 characters long.
accountUrlPattern = regexp.MustCompile(`([a-z0-9-]{3,44}.documents\.azure\.com)`)

invalidHosts = simple.NewCache[struct{}]()

noHostErr = errors.New("no such host")
)

func (s Scanner) getClient() *http.Client {
if s.client != nil {
return s.client
}

return defaultClient
}

// Ensure the Scanner satisfies the interface at compile time.
var _ detectors.Detector = (*Scanner)(nil)

func (s Scanner) Type() detectorspb.DetectorType {
return detectorspb.DetectorType_AzureCosmosDB
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you check how this detector is different than already CosmosDBKey one

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one does not exist. The entry only exist in Proto file somehow.

Copy link
Contributor Author

@kashifkhan0771 kashifkhan0771 Feb 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Deprecated the old one.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it make sense to create a new entry when there's an existing (unused) one?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure why this old entry was added. Was there a detector for it, or was it added by mistake? We can reuse the old entry for the new detector, but I want to deprecate it to keep a record that this key existed.

Anyway I'll update it 😃

}

func (s Scanner) Description() string {
return "Azure Cosmos DB is a globally distributed, multi-model database service offered by Microsoft. CosmosDB keys and connection string are used to connect with Cosmos DB."
}

func (s Scanner) Keywords() []string {
return []string{".documents.azure.com"}
}

func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
dataStr := string(data)

var uniqueKeyMatches, uniqueAccountMatches = make(map[string]struct{}), make(map[string]struct{})

for _, match := range dbKeyPattern.FindAllStringSubmatch(dataStr, -1) {
uniqueKeyMatches[match[1]] = struct{}{}
}

for _, match := range accountUrlPattern.FindAllStringSubmatch(dataStr, -1) {
uniqueAccountMatches[match[1]] = struct{}{}
}

for key := range uniqueKeyMatches {
for accountUrl := range uniqueAccountMatches {
if invalidHosts.Exists(accountUrl) {
delete(uniqueAccountMatches, accountUrl)
continue
}

s1 := detectors.Result{
DetectorType: detectorspb.DetectorType_AzureCosmosDB,
Raw: []byte(key),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The URL should also be added. #3938 (comment)

ExtraData: make(map[string]string),
}

if verify {
verified, verificationErr := verifyCosmosDB(s.getClient(), accountUrl, key)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit**: If the host url is invalid or does not exists, then we should not be spending iteration to verify other keys on that. Richard has already implemented this in AzureContainerRegistry

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is great approach ❤️ Thanks for sharing @abmussani

s1.Verified = verified
if verificationErr != nil {
if errors.Is(verificationErr, noHostErr) {
invalidHosts.Set(accountUrl, struct{}{})
continue
}

s1.SetVerificationError(verificationErr)
}
}

results = append(results, s1)
}
}

return results, nil
}

// documentation: https://learn.microsoft.com/en-us/rest/api/cosmos-db/list-databases
func verifyCosmosDB(client *http.Client, accountUrl, key string) (bool, error) {
// decode the base64 encoded key
decodedKey, err := base64.StdEncoding.DecodeString(key)
if err != nil {
return false, fmt.Errorf("failed to decode key: %v", err)
}

req, err := http.NewRequest(http.MethodGet, fmt.Sprintf("https://%s:443/dbs", accountUrl), nil)
if err != nil {
return false, fmt.Errorf("failed to create request: %v", err)
}

dateRFC1123 := time.Now().UTC().Format("Mon, 02 Jan 2006 15:04:05 GMT")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit**: time.RFC1123 already has the similar format.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kashifkhan0771 is this the expiry time ? it is being used in creating signature.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I used the formation for timezone. x-ms-date header require time in GMT
Docs: https://learn.microsoft.com/en-us/rest/api/cosmos-db/common-cosmosdb-rest-request-headers

authHeader := fmt.Sprintf("type=master&ver=1.0&sig=%s", url.QueryEscape(createSignature(decodedKey, dateRFC1123)))

// required headers
// docs: https://learn.microsoft.com/en-us/rest/api/cosmos-db/common-cosmosdb-rest-request-headers
req.Header.Set("Authorization", authHeader)
req.Header.Set("x-ms-date", dateRFC1123)
req.Header.Set("x-ms-version", "2018-12-31")

resp, err := client.Do(req)
if err != nil {
// lookup foo.documents.azure.com: no such host
if strings.Contains(err.Error(), "no such host") {
return false, noHostErr
}

return false, err
}
defer func() {
_, _ = io.Copy(io.Discard, resp.Body)
_ = resp.Body.Close()
}()

// Check response status code
switch resp.StatusCode {
case http.StatusOK:
return true, nil
case http.StatusUnauthorized:
return false, nil
default:
return false, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}
}

func createSignature(decodedKey []byte, dateRFC1123 string) string {
stringToSign := fmt.Sprintf(
"%s\n%s\n%s\n%s\n\n",
strings.ToLower(http.MethodGet),
strings.ToLower("dbs"),
"",
strings.ToLower(dateRFC1123),
)

// compute HMAC-SHA256 signature
mac := hmac.New(sha256.New, decodedKey)
mac.Write([]byte(stringToSign))

return base64.StdEncoding.EncodeToString(mac.Sum(nil))
}
129 changes: 129 additions & 0 deletions pkg/detectors/azure_cosmosdb/azure_cosmosdb_integration_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
//go:build detectors
// +build detectors

package azure_cosmosdb

import (
"context"
"fmt"
"testing"
"time"

"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)

func TestCosmosDB_FromChunk(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
defer cancel()
testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors5")
if err != nil {
t.Fatalf("could not get test secrets from GCP: %s", err)
}

key := testSecrets.MustGetField("COSMOSDB_KEY")
accountUrl := testSecrets.MustGetField("COSMOSDB_ACCOUNT")
inactiveKey := testSecrets.MustGetField("COSMOSDB_INACTIVE")

type args struct {
ctx context.Context
data []byte
verify bool
}
tests := []struct {
name string
s Scanner
args args
want []detectors.Result
wantErr bool
wantVerificationErr bool
}{
{
name: "found, verified",
s: Scanner{},
args: args{
ctx: ctx,
data: []byte(fmt.Sprintf("You can find a cosmosdb key: %s and account url: %s within", key, accountUrl)),
verify: true,
},
want: []detectors.Result{
{
DetectorType: detectorspb.DetectorType_Meraki,
Verified: true,
},
},
wantErr: false,
wantVerificationErr: false,
},
{
name: "found, unverified",
s: Scanner{},
args: args{
ctx: ctx,
data: []byte(fmt.Sprintf("You can find a cosmosdb key: %s and accounturl: %s within but not valid", inactiveKey, accountUrl)), // the secret would satisfy the regex but not pass validation
verify: true,
},
want: []detectors.Result{
{
DetectorType: detectorspb.DetectorType_Meraki,
Verified: false,
},
},
wantErr: false,
wantVerificationErr: false,
},
{
name: "not found",
s: Scanner{},
args: args{
ctx: context.Background(),
data: []byte("You cannot find the secret within"),
verify: true,
},
want: nil,
wantErr: false,
wantVerificationErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := tt.s.FromData(tt.args.ctx, tt.args.verify, tt.args.data)
if (err != nil) != tt.wantErr {
t.Errorf("CosmosDB.FromData() error = %v, wantErr %v", err, tt.wantErr)
return
}
for i := range got {
if len(got[i].Raw) == 0 {
t.Fatalf("no raw secret present: \n %+v", got[i])
}
if (got[i].VerificationError() != nil) != tt.wantVerificationErr {
t.Fatalf("wantVerificationError = %v, verification error = %v", tt.wantVerificationErr, got[i].VerificationError())
}
}
ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "verificationError")
if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" {
t.Errorf("CosmosDB.FromData() %s diff: (-got +want)\n%s", tt.name, diff)
}
})
}
}

func BenchmarkFromData(benchmark *testing.B) {
ctx := context.Background()
s := Scanner{}
for name, data := range detectors.MustGetBenchmarkData() {
benchmark.Run(name, func(b *testing.B) {
b.ResetTimer()
for n := 0; n < b.N; n++ {
_, err := s.FromData(ctx, false, data)
if err != nil {
b.Fatal(err)
}
}
})
}
}
91 changes: 91 additions & 0 deletions pkg/detectors/azure_cosmosdb/azure_cosmosdb_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package azure_cosmosdb

import (
"context"
"testing"

"github.com/google/go-cmp/cmp"

"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick"
)

var (
validPattern = `
Cluster name: Cluster name must be at least 3 characters and at most 40 characters.
Cluster name must only contain lowercase letters, numbers, and hyphens.
The cluster name must not start or end in a hyphen.
// config
cosmosKey: FakeeP35zYGPXaEUfakeU7S8kcOY7NI7id8ddbHfakeAifake8Bbql1mXhMF2t0wQ0FAKEPQrwZZACDb3msoAg==
https://trufflesecurity-fake.documents.azure.com:443
`

invalidPattern = `
FakeeP35zYGPXaEUfakeU7S8kcOY7I7id8ddbHfakeAifake8Bbql1mXhMF2t0wQ0FAKEPQrwZZACDb3msoAg==
https://not-a-host.documents.azure.com:443
`
)

func TestCosmosDB_Pattern(t *testing.T) {
d := Scanner{}
ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d})

tests := []struct {
name string
input string
want []string
}{
{
name: "valid pattern",
input: validPattern,
want: []string{"FakeeP35zYGPXaEUfakeU7S8kcOY7NI7id8ddbHfakeAifake8Bbql1mXhMF2t0wQ0FAKEPQrwZZACDb3msoAg=="},
},
{
name: "invalid pattern",
input: invalidPattern,
want: nil,
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
matchedDetectors := ahoCorasickCore.FindDetectorMatches([]byte(test.input))
if len(matchedDetectors) == 0 {
t.Errorf("keywords '%v' not matched by: %s", d.Keywords(), test.input)
return
}

results, err := d.FromData(context.Background(), false, []byte(test.input))
if err != nil {
t.Errorf("error = %v", err)
return
}

if len(results) != len(test.want) {
if len(results) == 0 {
t.Errorf("did not receive result")
} else {
t.Errorf("expected %d results, only received %d", len(test.want), len(results))
}
return
}

actual := make(map[string]struct{}, len(results))
for _, r := range results {
if len(r.RawV2) > 0 {
actual[string(r.RawV2)] = struct{}{}
} else {
actual[string(r.Raw)] = struct{}{}
}
}
expected := make(map[string]struct{}, len(test.want))
for _, v := range test.want {
expected[v] = struct{}{}
}

if diff := cmp.Diff(expected, actual); diff != "" {
t.Errorf("%s diff: (-want +got)\n%s", test.name, diff)
}
})
}
}
Loading
Oops, something went wrong.
Loading
Oops, something went wrong.