trufflesecurity · rgmz · Dec 18, 2023 · Jan 4, 2024 · rosecodym · Jan 2, 2024
diff --git a/pkg/detectors/npm/common.go b/pkg/detectors/npm/common.go
@@ -0,0 +1,161 @@
+package npm
+
+import (
+	"context"
+	"crypto/tls"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+
+	"github.com/trufflesecurity/trufflehog/v3/pkg/common"
+)
+
+var defaultClient = common.SaneHttpClient()
+
+type npmScanner struct {
+	client *http.Client
+}
+
+// verifyToken attempts to verify a |token| by finding the associated registry URL in |data|.
+// It returns three values:
+//  1. whether the token is valid
+//  2. data associated with the token
+//  3. any errors encountered during verification
+func (s npmScanner) verifyToken(ctx context.Context, data string, token string) (bool, map[string]string, error) {
+	if s.client == nil {
+		s.client = defaultClient
+	}
+
+	registry := findTokenRegistry(data, token)
+	if registry != nil {
+		// A high confidence match was found, attempt to verify the token against it.
+		// e.g., |token|="s3cret" and |data| contains "//npm.company.com/:_authToken=s3cret".
+		// TODO: Handle multiple high confidence matches
+		return doVerification(ctx, s.client, registry, token)
+	} else {
+		// A high confidence match was not found.
+		// Attempt to verify the token against any registries we can find.
+		var (
+			registries = findAllRegistryURLs(data)
+			errs       = make([]error, 0, len(registries))
+
+			verified  bool
+			extraData map[string]string
+			err       error
+		)
+		for _, registry := range registries {
+			verified, extraData, err = doVerification(ctx, s.client, registry, token)
+			if verified {
+				return true, extraData, err
+			}
+			if err != nil {
+				errs = append(errs, err)
+			}
+		}
+		return false, nil, errors.Join(errs...)
+	}
+}
+
+// Most repositories implement a "whoami" endpoint
+// that returns the username of the authenticated user.
+type whoamiResponse struct {
+	Username string `json:"username"`
+}
+
+// doVerification checks whether |token| is valid for the given |registry|.
+func doVerification(ctx context.Context, client *http.Client, registry *registryInfo, token string) (bool, map[string]string, error) {
+	// Construct and send request.
+	scheme := registry.Scheme.Prefix()
+	if registry.Scheme == unknown {
+		scheme = isHttps.Prefix()
+	}
+	reqUrl := fmt.Sprintf("%s%s/-/whoami", scheme, registry.Uri)
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqUrl, nil)
+	if err != nil {
+		return false, nil, fmt.Errorf("failed to construct request: %s", err)
+	}
+
+	req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", token))
+	res, err := client.Do(req)
+	if err != nil {
+		// A |tls.RecordHeaderError| likely means that the server is using HTTP, not HTTPS.
+		// TODO: Is it possible to handle the reverse case?
+		var tlsErr tls.RecordHeaderError
+		if errors.As(err, &tlsErr) && registry.Scheme == isHttps {
+			r := *registry
+			r.Scheme = isHttp
+			return doVerification(ctx, client, &r, token)
+		}
+		return false, nil, fmt.Errorf("request to %s failed: %w", reqUrl, err)
+	}
+	defer func() {
+		_, _ = io.Copy(io.Discard, res.Body)
+		_ = res.Body.Close()
+	}()
+
+	// Handle the response.
+	if res.StatusCode == http.StatusOK {
+		body, _ := io.ReadAll(res.Body)
+		whoamiRes := whoamiResponse{}
+		if err := json.Unmarshal(body, &whoamiRes); err != nil {
+			if json.Valid(body) {
+				return false, nil, fmt.Errorf("failed to decode response %s: %w", reqUrl, err)
+			} else {
+				// If the response isn't JSON it's highly unlikely to be valid.
+				return false, nil, nil
+			}
+		}
+
+		// It is possible for the response to be `{"username": null}`, `{"username":""}`, etc.
+		// While a valid token _can_ return an empty username, the registry is likely returning 200 for invalid auth.
+		// TODO: Write a test for this.
+		if whoamiRes.Username == "" ||
+			(registry.RegistryType == nexusRepo3 && strings.HasPrefix(whoamiRes.Username, "anonymous")) ||
+			(registry.RegistryType == jetbrains && whoamiRes.Username == "internal") {
+			req.Header.Del("Authorization")
+			res2, err := client.Do(req)
+			if err != nil {
+				return false, nil, fmt.Errorf("request failed for %s: %w", reqUrl, err)
+			}
+			_, _ = io.Copy(io.Discard, res.Body)
+			_ = res2.Body.Close()
+
+			if res2.StatusCode == http.StatusOK {
+				return false, nil, nil
+			}
+		}
+
+		data := map[string]string{
+			"registry_type":  registry.RegistryType.String(),
+			"registry_url":   registry.Uri,
+			"username":       whoamiRes.Username,
+			"rotation_guide": "https://howtorotate.com/docs/tutorials/npm/",
+		}
+		return true, data, nil
+	} else if res.StatusCode == http.StatusUnauthorized ||
+		(registry.RegistryType == github && res.StatusCode == http.StatusForbidden) {
+		// Token is not valid.
+		return false, nil, nil
+	} else {
+		// Here be dragons.
+		return false, nil, fmt.Errorf("unexpected response status %d for %s", res.StatusCode, reqUrl)
+	}
+}
+
+// firstNonEmptyMatch returns the index and value of the first non-empty match.
+// If no non-empty match is found, it will return: 0, "".
+func firstNonEmptyMatch(matches []string, skip int) (int, string) {
+	if len(matches) < skip {
+		return 0, ""
+	}
+	// The first index is the entire matched string.
+	for i, val := range matches[skip:] {
+		if val != "" {
+			return i + skip, val
+		}
+	}
+	return 0, ""
+}
diff --git a/pkg/detectors/npm/common_test.go b/pkg/detectors/npm/common_test.go
@@ -0,0 +1,47 @@
+package npm
+
+import (
+	"context"
+	"testing"
+
+	"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
+	"github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick"
+)
+
+type npmPatternTestCase struct {
+	input    string
+	expected string
+}
+
+func testPattern(t *testing.T, d detectors.Detector, tests map[string]npmPatternTestCase) {
+	ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d})
+
+	for name, test := range tests {
+		t.Run(name, func(t *testing.T) {
+			chunkSpecificDetectors := make(map[ahocorasick.DetectorKey]detectors.Detector, 2)
+			ahoCorasickCore.PopulateMatchingDetectors(test.input, chunkSpecificDetectors)
+			if len(chunkSpecificDetectors) == 0 {
+				t.Errorf("keywords '%v' not matched by %s", d.Keywords(), test.input)
+				return
+			}
+
+			results, err := d.FromData(context.Background(), false, []byte(test.input))
+			if err != nil {
+				t.Errorf("error = %v", err)
+				return
+			}
+
+			if len(results) == 0 {
+				if test.expected != "" {
+					t.Error("did not receive result")
+				}
+				return
+			}
+
+			actual := string(results[0].Raw)
+			if test.expected != actual {
+				t.Errorf("expected '%s' != actual '%s'", test.expected, actual)
+			}
+		})
+	}
+}
diff --git a/pkg/detectors/npm/npm_token_generic.go b/pkg/detectors/npm/npm_token_generic.go
@@ -0,0 +1,78 @@
+package npm
+
+import (
+	"context"
+	"regexp"
+	"strings"
+
+	"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
+	"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
+)
+
+type ScannerGeneric struct {
+	npmScanner
+}
+
+// Ensure the Scanner satisfies the interfaces at compile time.
+var _ interface {
+	detectors.Detector
+	detectors.Versioner
+} = (*ScannerGeneric)(nil)
+
+func (s ScannerGeneric) Version() int { return 0 }
+
+// genericKeyPat should match all possible values for .npmrc auth tokens.
+// TODO: Ensure this works with Yarn and UPM configurations.
+var genericKeyPat = regexp.MustCompile(`(?:_authToken|(?i:npm[_\-.]?token))['"]?[ \t]*[=:]?(?:[ \t]*['"]?)?([a-zA-Z0-9\-_.+=/]{5,})`)
+
+// Keywords are used for efficiently pre-filtering chunks.
+// Use identifiers in the secret preferably, or the provider name.
+func (s ScannerGeneric) Keywords() []string {
+	return []string{"_authToken", "npm_token", "npm-token", "npm.token"}
+}
+
+// FromData will find and optionally verify secrets in a given set of bytes.
+func (s ScannerGeneric) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
+	dataStr := string(data)
+
+	// Deduplicate results for more efficient handling.
+	tokens := make(map[string]struct{})
+	for _, match := range genericKeyPat.FindAllStringSubmatch(dataStr, -1) {
+		t := match[1]
+		// Ignore results that can be handled by the v1 or v2 detectors.
+		if strings.HasPrefix(t, "NpmToken.") || strings.HasPrefix(t, "npm_") {
+			continue
+		}
+		tokens[t] = struct{}{}
+	}
+	if len(tokens) == 0 {
+		return
+	}
+
+	// Iterate through results.
+	for token := range tokens {
+		s1 := detectors.Result{
+			DetectorType: s.Type(),
+			Raw:          []byte(token),
+		}
+
+		if verify {
+			verified, extraData, vErr := s.verifyToken(ctx, dataStr, token)
+			s1.Verified = verified
+			s1.ExtraData = extraData
+			s1.SetVerificationError(vErr)
+		}
+
+		// This function will check false positives for common test words, but also it will make sure the key appears 'random' enough to be a real key.
+		if !s1.Verified && detectors.IsKnownFalsePositive(token, detectors.DefaultFalsePositives, true) {
+			continue
+		}
+
+		results = append(results, s1)
+	}
+	return
+}
+
+func (s ScannerGeneric) Type() detectorspb.DetectorType {
+	return detectorspb.DetectorType_NpmToken
+}