Skip to content

Commit

Permalink
feat(npm): overhaul detector
Browse files Browse the repository at this point in the history
  • Loading branch information
rgmz authored and Richard Gomez committed Dec 28, 2023
1 parent eeb4dbd commit 0080bf4
Show file tree
Hide file tree
Showing 18 changed files with 2,143 additions and 419 deletions.
161 changes: 161 additions & 0 deletions pkg/detectors/npm/common.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
package npm

import (
"context"
"crypto/tls"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"strings"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
)

var defaultClient = common.SaneHttpClient()

type npmScanner struct {
client *http.Client
}

// verifyToken attempts to verify a |token| by finding the associated registry URL in |data|.
// It returns three values:
// 1. whether the token is valid
// 2. data associated with the token
// 3. any errors encountered during verification
func (s npmScanner) verifyToken(ctx context.Context, data string, token string) (bool, map[string]string, error) {
if s.client == nil {
s.client = defaultClient
}

registry := findTokenRegistry(data, token)
if registry != nil {
// A high confidence match was found, attempt to verify the token against it.
// e.g., |token|="s3cret" and |data| contains "//npm.company.com/:_authToken=s3cret".
// TODO: Handle multiple high confidence matches
return doVerification(ctx, s.client, registry, token)
} else {
// A high confidence match was not found.
// Attempt to verify the token against any registries we can find.
var (
registries = findAllRegistryURLs(data)
errs = make([]error, 0, len(registries))

verified bool
extraData map[string]string
err error
)
for _, registry := range registries {
verified, extraData, err = doVerification(ctx, s.client, registry, token)
if verified {
return true, extraData, err
}
if err != nil {
errs = append(errs, err)
}
}
return false, nil, errors.Join(errs...)
}
}

// Most repositories implement a "whoami" endpoint
// that returns the username of the authenticated user.
type whoamiResponse struct {
Username string `json:"username"`
}

// doVerification checks whether |token| is valid for the given |registry|.
func doVerification(ctx context.Context, client *http.Client, registry *registryInfo, token string) (bool, map[string]string, error) {
// Construct and send request.
scheme := registry.Scheme.Prefix()
if registry.Scheme == unknown {
scheme = isHttps.Prefix()
}
reqUrl := fmt.Sprintf("%s%s/-/whoami", scheme, registry.Uri)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqUrl, nil)
if err != nil {
return false, nil, fmt.Errorf("failed to construct request: %s", err)
}

req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", token))
res, err := client.Do(req)
if err != nil {
// A |tls.RecordHeaderError| likely means that the server is using HTTP, not HTTPS.
// TODO: Is it possible to handle the reverse case?
var tlsErr tls.RecordHeaderError
if errors.As(err, &tlsErr) && registry.Scheme == isHttps {
r := *registry
r.Scheme = isHttp
return doVerification(ctx, client, &r, token)
}
return false, nil, fmt.Errorf("request to %s failed: %w", reqUrl, err)
}
defer func() {
_, _ = io.Copy(io.Discard, res.Body)
_ = res.Body.Close()
}()

// Handle the response.
if res.StatusCode == http.StatusOK {
body, _ := io.ReadAll(res.Body)
whoamiRes := whoamiResponse{}
if err := json.Unmarshal(body, &whoamiRes); err != nil {
if json.Valid(body) {
return false, nil, fmt.Errorf("failed to decode response %s: %w", reqUrl, err)
} else {
// If the response isn't JSON it's highly unlikely to be valid.
return false, nil, nil
}
}

// It is possible for the response to be `{"username": null}`, `{"username":""}`, etc.
// While a valid token _can_ return an empty username, the registry is likely returning 200 for invalid auth.
// TODO: Write a test for this.
if whoamiRes.Username == "" ||
(registry.RegistryType == nexusRepo3 && strings.HasPrefix(whoamiRes.Username, "anonymous")) ||
(registry.RegistryType == jetbrains && whoamiRes.Username == "internal") {
req.Header.Del("Authorization")
res2, err := client.Do(req)
if err != nil {
return false, nil, fmt.Errorf("request failed for %s: %w", reqUrl, err)
}
_, _ = io.Copy(io.Discard, res.Body)
_ = res2.Body.Close()

if res2.StatusCode == http.StatusOK {
return false, nil, nil
}
}

data := map[string]string{
"registry_type": registry.RegistryType.String(),
"registry_url": registry.Uri,
"username": whoamiRes.Username,
"rotation_guide": "https://howtorotate.com/docs/tutorials/npm/",
}
return true, data, nil
} else if res.StatusCode == http.StatusUnauthorized ||
(registry.RegistryType == github && res.StatusCode == http.StatusForbidden) {
// Token is not valid.
return false, nil, nil
} else {
// Here be dragons.
return false, nil, fmt.Errorf("unexpected response status %d for %s", res.StatusCode, reqUrl)
}
}

// firstNonEmptyMatch returns the index and value of the first non-empty match.
// If no non-empty match is found, it will return: 0, "".
func firstNonEmptyMatch(matches []string, skip int) (int, string) {
if len(matches) < skip {
return 0, ""
}
// The first index is the entire matched string.
for i, val := range matches[skip:] {
if val != "" {
return i + skip, val
}
}
return 0, ""
}
47 changes: 47 additions & 0 deletions pkg/detectors/npm/common_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package npm

import (
"context"
"testing"

"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick"
)

type npmPatternTestCase struct {
input string
expected string
}

func testPattern(t *testing.T, d detectors.Detector, tests map[string]npmPatternTestCase) {
ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d})

for name, test := range tests {
t.Run(name, func(t *testing.T) {
chunkSpecificDetectors := make(map[ahocorasick.DetectorKey]detectors.Detector, 2)
ahoCorasickCore.PopulateMatchingDetectors(test.input, chunkSpecificDetectors)
if len(chunkSpecificDetectors) == 0 {
t.Errorf("keywords '%v' not matched by %s", d.Keywords(), test.input)
return
}

results, err := d.FromData(context.Background(), false, []byte(test.input))
if err != nil {
t.Errorf("error = %v", err)
return
}

if len(results) == 0 {
if test.expected != "" {
t.Error("did not receive result")
}
return
}

actual := string(results[0].Raw)
if test.expected != actual {
t.Errorf("expected '%s' != actual '%s'", test.expected, actual)
}
})
}
}
78 changes: 78 additions & 0 deletions pkg/detectors/npm/npm_token_generic.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package npm

import (
"context"
"regexp"
"strings"

"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)

type ScannerGeneric struct {
npmScanner
}

// Ensure the Scanner satisfies the interfaces at compile time.
var _ interface {
detectors.Detector
detectors.Versioner
} = (*ScannerGeneric)(nil)

func (s ScannerGeneric) Version() int { return 0 }

// genericKeyPat should match all possible values for .npmrc auth tokens.
// TODO: Ensure this works with Yarn and UPM configurations.
var genericKeyPat = regexp.MustCompile(`(?:_authToken|(?i:npm[_\-.]?token))['"]?[ \t]*[=:]?(?:[ \t]*['"]?)?([a-zA-Z0-9\-_.+=/]{5,})`)

// Keywords are used for efficiently pre-filtering chunks.
// Use identifiers in the secret preferably, or the provider name.
func (s ScannerGeneric) Keywords() []string {
return []string{"_authToken", "npm_token", "npm-token", "npm.token"}
}

// FromData will find and optionally verify secrets in a given set of bytes.
func (s ScannerGeneric) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
dataStr := string(data)

// Deduplicate results for more efficient handling.
tokens := make(map[string]struct{})
for _, match := range genericKeyPat.FindAllStringSubmatch(dataStr, -1) {
t := match[1]
// Ignore results that can be handled by the v1 or v2 detectors.
if strings.HasPrefix(t, "NpmToken.") || strings.HasPrefix(t, "npm_") {
continue
}
tokens[t] = struct{}{}
}
if len(tokens) == 0 {
return
}

// Iterate through results.
for token := range tokens {
s1 := detectors.Result{
DetectorType: s.Type(),
Raw: []byte(token),
}

if verify {
verified, extraData, vErr := s.verifyToken(ctx, dataStr, token)
s1.Verified = verified
s1.ExtraData = extraData
s1.SetVerificationError(vErr)
}

// This function will check false positives for common test words, but also it will make sure the key appears 'random' enough to be a real key.
if !s1.Verified && detectors.IsKnownFalsePositive(token, detectors.DefaultFalsePositives, true) {
continue
}

results = append(results, s1)
}
return
}

func (s ScannerGeneric) Type() detectorspb.DetectorType {
return detectorspb.DetectorType_NpmToken
}
Loading

0 comments on commit 0080bf4

Please sign in to comment.