Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added azure COSMOSDB detector #3951

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 184 additions & 0 deletions pkg/detectors/azure_cosmosdb/azure_cosmosdb.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
package azure_cosmosdb

import (
"context"
"crypto/hmac"
"crypto/sha256"
"encoding/base64"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"regexp"
"strings"
"time"

"github.com/trufflesecurity/trufflehog/v3/pkg/cache/simple"
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)

type Scanner struct {
client *http.Client
}

var (
defaultClient = common.SaneHttpClient()

dbKeyPattern = regexp.MustCompile(`([A-Za-z0-9]{86}==)`)
// account name can contain only lowercase letters, numbers and the `-` character, must be between 3 and 44 characters long.
accountUrlPattern = regexp.MustCompile(`([a-z0-9-]{3,44}\.(?:documents|table\.cosmos)\.azure\.com)`)

invalidHosts = simple.NewCache[struct{}]()

noHostErr = errors.New("no such host")
)

func (s Scanner) getClient() *http.Client {
if s.client != nil {
return s.client
}

return defaultClient
}

// Ensure the Scanner satisfies the interface at compile time.
var _ detectors.Detector = (*Scanner)(nil)

func (s Scanner) Type() detectorspb.DetectorType {
return detectorspb.DetectorType_AzureCosmosDBKeyIdentifiable
}

func (s Scanner) Description() string {
return "Azure Cosmos DB is a globally distributed, multi-model database service offered by Microsoft. CosmosDB keys and connection string are used to connect with Cosmos DB."
}

func (s Scanner) Keywords() []string {
return []string{".documents.azure.com", ".table.cosmos.azure.com"}
}

func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
dataStr := string(data)

var uniqueKeyMatches, uniqueAccountMatches = make(map[string]struct{}), make(map[string]struct{})

for _, match := range dbKeyPattern.FindAllStringSubmatch(dataStr, -1) {
uniqueKeyMatches[match[1]] = struct{}{}
}

for _, match := range accountUrlPattern.FindAllStringSubmatch(dataStr, -1) {
uniqueAccountMatches[match[1]] = struct{}{}
}

for key := range uniqueKeyMatches {
for accountUrl := range uniqueAccountMatches {
if invalidHosts.Exists(accountUrl) {
delete(uniqueAccountMatches, accountUrl)
continue
}

s1 := detectors.Result{
DetectorType: detectorspb.DetectorType_AzureCosmosDBKeyIdentifiable,
Raw: []byte(key),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The URL should also be added. #3938 (comment)

RawV2: []byte("key: " + key + " account_url: " + accountUrl), // key: <key> account_url: <account_url>
ExtraData: map[string]string{},
}

if verify {
var verified bool
var verificationErr error

client := s.getClient()

// perform verification based on db type
if strings.Contains(accountUrl, ".documents.azure.com") {
verified, verificationErr = verifyCosmosDocumentDB(client, accountUrl, key)
s1.ExtraData["DB Type"] = "Document"

} else if strings.Contains(accountUrl, ".table.cosmos.azure.com") {
verified, verificationErr = verifyCosmosTableDB(client, accountUrl, key)
s1.ExtraData["DB Type"] = "Table"
}

s1.Verified = verified
if verificationErr != nil {
if errors.Is(verificationErr, noHostErr) {
invalidHosts.Set(accountUrl, struct{}{})
continue
}

s1.SetVerificationError(verificationErr)
}
}

results = append(results, s1)
}
}

return results, nil
}

// documentation: https://learn.microsoft.com/en-us/rest/api/cosmos-db/list-databases
func verifyCosmosDocumentDB(client *http.Client, accountUrl, key string) (bool, error) {
// decode the base64 encoded key
decodedKey, err := base64.StdEncoding.DecodeString(key)
if err != nil {
return false, fmt.Errorf("failed to decode key: %v", err)
}

req, err := http.NewRequest(http.MethodGet, fmt.Sprintf("https://%s:443/dbs", accountUrl), nil)
if err != nil {
return false, fmt.Errorf("failed to create request: %v", err)
}

dateRFC1123 := time.Now().UTC().Format("Mon, 02 Jan 2006 15:04:05 GMT")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit**: time.RFC1123 already has the similar format.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kashifkhan0771 is this the expiry time ? it is being used in creating signature.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I used the formation for timezone. x-ms-date header require time in GMT
Docs: https://learn.microsoft.com/en-us/rest/api/cosmos-db/common-cosmosdb-rest-request-headers

authHeader := fmt.Sprintf("type=master&ver=1.0&sig=%s", url.QueryEscape(createDocumentsSignature(decodedKey, dateRFC1123)))

// required headers
// docs: https://learn.microsoft.com/en-us/rest/api/cosmos-db/common-cosmosdb-rest-request-headers
req.Header.Set("Authorization", authHeader)
req.Header.Set("x-ms-date", dateRFC1123)
req.Header.Set("x-ms-version", "2018-12-31")

resp, err := client.Do(req)
if err != nil {
// lookup foo.documents.azure.com: no such host
if strings.Contains(err.Error(), "no such host") {
return false, noHostErr
}

return false, err
}
defer func() {
_, _ = io.Copy(io.Discard, resp.Body)
_ = resp.Body.Close()
}()

// Check response status code
switch resp.StatusCode {
case http.StatusOK:
return true, nil
case http.StatusUnauthorized:
return false, nil
default:
return false, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}
}

func createDocumentsSignature(decodedKey []byte, dateRFC1123 string) string {
stringToSign := fmt.Sprintf(
"%s\n%s\n%s\n%s\n\n",
strings.ToLower(http.MethodGet),
strings.ToLower("dbs"),
"",
strings.ToLower(dateRFC1123),
)

// compute HMAC-SHA256 signature
mac := hmac.New(sha256.New, decodedKey)
mac.Write([]byte(stringToSign))

return base64.StdEncoding.EncodeToString(mac.Sum(nil))
}
129 changes: 129 additions & 0 deletions pkg/detectors/azure_cosmosdb/azure_cosmosdb_integration_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
//go:build detectors
// +build detectors

package azure_cosmosdb

import (
"context"
"fmt"
"testing"
"time"

"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)

func TestCosmosDB_FromChunk(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
defer cancel()
testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors5")
if err != nil {
t.Fatalf("could not get test secrets from GCP: %s", err)
}

key := testSecrets.MustGetField("COSMOSDB_KEY")
accountUrl := testSecrets.MustGetField("COSMOSDB_ACCOUNT")
inactiveKey := testSecrets.MustGetField("COSMOSDB_INACTIVE")

type args struct {
ctx context.Context
data []byte
verify bool
}
tests := []struct {
name string
s Scanner
args args
want []detectors.Result
wantErr bool
wantVerificationErr bool
}{
{
name: "found, verified",
s: Scanner{},
args: args{
ctx: ctx,
data: []byte(fmt.Sprintf("You can find a cosmosdb key: %s and account url: %s within", key, accountUrl)),
verify: true,
},
want: []detectors.Result{
{
DetectorType: detectorspb.DetectorType_Meraki,
Verified: true,
},
},
wantErr: false,
wantVerificationErr: false,
},
{
name: "found, unverified",
s: Scanner{},
args: args{
ctx: ctx,
data: []byte(fmt.Sprintf("You can find a cosmosdb key: %s and accounturl: %s within but not valid", inactiveKey, accountUrl)), // the secret would satisfy the regex but not pass validation
verify: true,
},
want: []detectors.Result{
{
DetectorType: detectorspb.DetectorType_Meraki,
Verified: false,
},
},
wantErr: false,
wantVerificationErr: false,
},
{
name: "not found",
s: Scanner{},
args: args{
ctx: context.Background(),
data: []byte("You cannot find the secret within"),
verify: true,
},
want: nil,
wantErr: false,
wantVerificationErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := tt.s.FromData(tt.args.ctx, tt.args.verify, tt.args.data)
if (err != nil) != tt.wantErr {
t.Errorf("CosmosDB.FromData() error = %v, wantErr %v", err, tt.wantErr)
return
}
for i := range got {
if len(got[i].Raw) == 0 {
t.Fatalf("no raw secret present: \n %+v", got[i])
}
if (got[i].VerificationError() != nil) != tt.wantVerificationErr {
t.Fatalf("wantVerificationError = %v, verification error = %v", tt.wantVerificationErr, got[i].VerificationError())
}
}
ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "verificationError")
if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" {
t.Errorf("CosmosDB.FromData() %s diff: (-got +want)\n%s", tt.name, diff)
}
})
}
}

func BenchmarkFromData(benchmark *testing.B) {
ctx := context.Background()
s := Scanner{}
for name, data := range detectors.MustGetBenchmarkData() {
benchmark.Run(name, func(b *testing.B) {
b.ResetTimer()
for n := 0; n < b.N; n++ {
_, err := s.FromData(ctx, false, data)
if err != nil {
b.Fatal(err)
}
}
})
}
}
Loading
Oops, something went wrong.
Loading
Oops, something went wrong.