diff --git a/pkg/detectors/npm/common.go b/pkg/detectors/npm/common.go new file mode 100644 index 000000000000..13eaaf9d073a --- /dev/null +++ b/pkg/detectors/npm/common.go @@ -0,0 +1,161 @@ +package npm + +import ( + "context" + "crypto/tls" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "strings" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" +) + +var defaultClient = common.SaneHttpClient() + +type npmScanner struct { + client *http.Client +} + +// verifyToken attempts to verify a |token| by finding the associated registry URL in |data|. +// It returns three values: +// 1. whether the token is valid +// 2. data associated with the token +// 3. any errors encountered during verification +func (s npmScanner) verifyToken(ctx context.Context, data string, token string) (bool, map[string]string, error) { + if s.client == nil { + s.client = defaultClient + } + + registry := findTokenRegistry(data, token) + if registry != nil { + // A high confidence match was found, attempt to verify the token against it. + // e.g., |token|="s3cret" and |data| contains "//npm.company.com/:_authToken=s3cret". + // TODO: Handle multiple high confidence matches + return doVerification(ctx, s.client, registry, token) + } else { + // A high confidence match was not found. + // Attempt to verify the token against any registries we can find. + var ( + registries = findAllRegistryURLs(data) + errs = make([]error, 0, len(registries)) + + verified bool + extraData map[string]string + err error + ) + for _, registry := range registries { + verified, extraData, err = doVerification(ctx, s.client, registry, token) + if verified { + return true, extraData, err + } + if err != nil { + errs = append(errs, err) + } + } + return false, nil, errors.Join(errs...) + } +} + +// Most repositories implement a "whoami" endpoint +// that returns the username of the authenticated user. +type whoamiResponse struct { + Username string `json:"username"` +} + +// doVerification checks whether |token| is valid for the given |registry|. +func doVerification(ctx context.Context, client *http.Client, registry *registryInfo, token string) (bool, map[string]string, error) { + // Construct and send request. + scheme := registry.Scheme.Prefix() + if registry.Scheme == unknown { + scheme = isHttps.Prefix() + } + reqUrl := fmt.Sprintf("%s%s/-/whoami", scheme, registry.Uri) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, reqUrl, nil) + if err != nil { + return false, nil, fmt.Errorf("failed to construct request: %s", err) + } + + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", token)) + res, err := client.Do(req) + if err != nil { + // A |tls.RecordHeaderError| likely means that the server is using HTTP, not HTTPS. + // TODO: Is it possible to handle the reverse case? + var tlsErr tls.RecordHeaderError + if errors.As(err, &tlsErr) && registry.Scheme == isHttps { + r := *registry + r.Scheme = isHttp + return doVerification(ctx, client, &r, token) + } + return false, nil, fmt.Errorf("request to %s failed: %w", reqUrl, err) + } + defer func() { + _, _ = io.Copy(io.Discard, res.Body) + _ = res.Body.Close() + }() + + // Handle the response. + if res.StatusCode == http.StatusOK { + body, _ := io.ReadAll(res.Body) + whoamiRes := whoamiResponse{} + if err := json.Unmarshal(body, &whoamiRes); err != nil { + if json.Valid(body) { + return false, nil, fmt.Errorf("failed to decode response %s: %w", reqUrl, err) + } else { + // If the response isn't JSON it's highly unlikely to be valid. + return false, nil, nil + } + } + + // It is possible for the response to be `{"username": null}`, `{"username":""}`, etc. + // While a valid token _can_ return an empty username, the registry is likely returning 200 for invalid auth. + // TODO: Write a test for this. + if whoamiRes.Username == "" || + (registry.RegistryType == nexusRepo3 && strings.HasPrefix(whoamiRes.Username, "anonymous")) || + (registry.RegistryType == jetbrains && whoamiRes.Username == "internal") { + req.Header.Del("Authorization") + res2, err := client.Do(req) + if err != nil { + return false, nil, fmt.Errorf("request failed for %s: %w", reqUrl, err) + } + _, _ = io.Copy(io.Discard, res.Body) + _ = res2.Body.Close() + + if res2.StatusCode == http.StatusOK { + return false, nil, nil + } + } + + data := map[string]string{ + "registry_type": registry.RegistryType.String(), + "registry_url": registry.Uri, + "username": whoamiRes.Username, + "rotation_guide": "https://howtorotate.com/docs/tutorials/npm/", + } + return true, data, nil + } else if res.StatusCode == http.StatusUnauthorized || + (registry.RegistryType == github && res.StatusCode == http.StatusForbidden) { + // Token is not valid. + return false, nil, nil + } else { + // Here be dragons. + return false, nil, fmt.Errorf("unexpected response status %d for %s", res.StatusCode, reqUrl) + } +} + +// firstNonEmptyMatch returns the index and value of the first non-empty match. +// If no non-empty match is found, it will return: 0, "". +func firstNonEmptyMatch(matches []string, skip int) (int, string) { + if len(matches) < skip { + return 0, "" + } + // The first index is the entire matched string. + for i, val := range matches[skip:] { + if val != "" { + return i + skip, val + } + } + return 0, "" +} diff --git a/pkg/detectors/npm/common_test.go b/pkg/detectors/npm/common_test.go new file mode 100644 index 000000000000..508adeb1033c --- /dev/null +++ b/pkg/detectors/npm/common_test.go @@ -0,0 +1,47 @@ +package npm + +import ( + "context" + "testing" + + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" +) + +type npmPatternTestCase struct { + input string + expected string +} + +func testPattern(t *testing.T, d detectors.Detector, tests map[string]npmPatternTestCase) { + ahoCorasickCore := ahocorasick.NewAhoCorasickCore([]detectors.Detector{d}) + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + chunkSpecificDetectors := make(map[ahocorasick.DetectorKey]detectors.Detector, 2) + ahoCorasickCore.PopulateMatchingDetectors(test.input, chunkSpecificDetectors) + if len(chunkSpecificDetectors) == 0 { + t.Errorf("keywords '%v' not matched by %s", d.Keywords(), test.input) + return + } + + results, err := d.FromData(context.Background(), false, []byte(test.input)) + if err != nil { + t.Errorf("error = %v", err) + return + } + + if len(results) == 0 { + if test.expected != "" { + t.Error("did not receive result") + } + return + } + + actual := string(results[0].Raw) + if test.expected != actual { + t.Errorf("expected '%s' != actual '%s'", test.expected, actual) + } + }) + } +} diff --git a/pkg/detectors/npm/npm_token_generic.go b/pkg/detectors/npm/npm_token_generic.go new file mode 100644 index 000000000000..5e83501d37d6 --- /dev/null +++ b/pkg/detectors/npm/npm_token_generic.go @@ -0,0 +1,78 @@ +package npm + +import ( + "context" + "regexp" + "strings" + + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +type ScannerGeneric struct { + npmScanner +} + +// Ensure the Scanner satisfies the interfaces at compile time. +var _ interface { + detectors.Detector + detectors.Versioner +} = (*ScannerGeneric)(nil) + +func (s ScannerGeneric) Version() int { return 0 } + +// genericKeyPat should match all possible values for .npmrc auth tokens. +// TODO: Ensure this works with Yarn and UPM configurations. +var genericKeyPat = regexp.MustCompile(`(?:_authToken|(?i:npm[_\-.]?token))['"]?[ \t]*[=:]?(?:[ \t]*['"]?)?([a-zA-Z0-9\-_.+=/]{5,})`) + +// Keywords are used for efficiently pre-filtering chunks. +// Use identifiers in the secret preferably, or the provider name. +func (s ScannerGeneric) Keywords() []string { + return []string{"_authToken", "npm_token", "npm-token", "npm.token"} +} + +// FromData will find and optionally verify secrets in a given set of bytes. +func (s ScannerGeneric) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { + dataStr := string(data) + + // Deduplicate results for more efficient handling. + tokens := make(map[string]struct{}) + for _, match := range genericKeyPat.FindAllStringSubmatch(dataStr, -1) { + t := match[1] + // Ignore results that can be handled by the v1 or v2 detectors. + if strings.HasPrefix(t, "NpmToken.") || strings.HasPrefix(t, "npm_") { + continue + } + tokens[t] = struct{}{} + } + if len(tokens) == 0 { + return + } + + // Iterate through results. + for token := range tokens { + s1 := detectors.Result{ + DetectorType: s.Type(), + Raw: []byte(token), + } + + if verify { + verified, extraData, vErr := s.verifyToken(ctx, dataStr, token) + s1.Verified = verified + s1.ExtraData = extraData + s1.SetVerificationError(vErr) + } + + // This function will check false positives for common test words, but also it will make sure the key appears 'random' enough to be a real key. + if !s1.Verified && detectors.IsKnownFalsePositive(token, detectors.DefaultFalsePositives, true) { + continue + } + + results = append(results, s1) + } + return +} + +func (s ScannerGeneric) Type() detectorspb.DetectorType { + return detectorspb.DetectorType_NpmToken +} diff --git a/pkg/detectors/npm/npm_token_generic_test.go b/pkg/detectors/npm/npm_token_generic_test.go new file mode 100644 index 000000000000..622f3e5c9949 --- /dev/null +++ b/pkg/detectors/npm/npm_token_generic_test.go @@ -0,0 +1,256 @@ +//go:build detectors +// +build detectors + +package npm + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestNpmTokenGeneric_Pattern(t *testing.T) { + cases := map[string]npmPatternTestCase{ + "_authToken/top_level": { + input: `_authToken = dL4dfTOJSL8pijHrBFPKqp2bUwLGkVotezEn8dfTPe-Qa1cP +registry = https://npm.company.com/ +always-auth = true`, + expected: "dL4dfTOJSL8pijHrBFPKqp2bUwLGkVotezEn8dfTPe-Qa1cP", + }, + "_authToken/scoped/artifactory": { + input: `registry=https://artifactory.example.com/artifactory/api/npm/npm/ +//artifactory.example.com/artifactory/api/npm/npm/:_authToken=eyJ2ZXIiOiIyIiwidHlwIjoiSldUIiwiYWxnIjoiUlMyNTYiLCJraWQiOiJSZ25DcnBEVXlKOV9yNElVRnNSU2hqU0E0aGpibEpjZ0M2bnJhN3ZqcTNNIn0.eyJzdWIiOiJqZnJ0QDAxY2pwdDc0N3ZyNHo0MTU4MHNiN3MxYW14XC91c2Vyc1wvYXJ0dXJvLmNhbXBvcyIsInNjcCI6ImFwcGxpZWQtcGVybWlzc2lvbnNcL2dyb3VwczpyZWFkZXJzLGRlcGxveS1kZXYtbnBtLGRlcGxveS1sb2NhbCIsImF1ZCI6ImpmcnRAMDFjanB0NzQ3dnI0ejQxNTgwc2I3czFhbXgiLCJpc3MiOiJqZnJ0QDAxY2pwdDc0N3ZyNHo0MTU4MHNiN3MxYW14XC91c2Vyc1wvYXJ0dXJvLmNhbXBvcyIsImV4cCI6MTY1NjAwNDUxOSwiaWF0IjoxNjU2MDAwOTE5LCJqdGkiOiJjOWZhM2VhNS01MzE0LTQwMzUtYjNiYy03OGNjMDJmYmM1NWMifQ.EIEPLzz9H2oQ3rKLKI_t1qtxi-G9ym6P6J5z7xWLsq79aHK3XV_E8_yuUK8giaOOdl_CaITOjl8Bt2aUVXTZKFKIOiQMLscBM4B6qGj_n4Qq8jiMwcKjnD_iWx8Wo-aNHHxgjvrRdWf-2UPIm6lSc77oZbUNAjhA5Q-W3uQRG7d50FGZpq_EEZsfbOcD7EMU2ZnvfYNTgTtmhZWfLefzB6xUF8WHgiDAVHJKQ2fKLX45Z9trc2SkKQmPBxaS-pBtKBhK15kQZ3x625KtLRr2ZwgOaJKHcg4SwuGOpyTF48nTk53SDorSj6fqlypTavVQRi-5cuSGTPrqLObk6lwpRg`, + expected: "eyJ2ZXIiOiIyIiwidHlwIjoiSldUIiwiYWxnIjoiUlMyNTYiLCJraWQiOiJSZ25DcnBEVXlKOV9yNElVRnNSU2hqU0E0aGpibEpjZ0M2bnJhN3ZqcTNNIn0.eyJzdWIiOiJqZnJ0QDAxY2pwdDc0N3ZyNHo0MTU4MHNiN3MxYW14XC91c2Vyc1wvYXJ0dXJvLmNhbXBvcyIsInNjcCI6ImFwcGxpZWQtcGVybWlzc2lvbnNcL2dyb3VwczpyZWFkZXJzLGRlcGxveS1kZXYtbnBtLGRlcGxveS1sb2NhbCIsImF1ZCI6ImpmcnRAMDFjanB0NzQ3dnI0ejQxNTgwc2I3czFhbXgiLCJpc3MiOiJqZnJ0QDAxY2pwdDc0N3ZyNHo0MTU4MHNiN3MxYW14XC91c2Vyc1wvYXJ0dXJvLmNhbXBvcyIsImV4cCI6MTY1NjAwNDUxOSwiaWF0IjoxNjU2MDAwOTE5LCJqdGkiOiJjOWZhM2VhNS01MzE0LTQwMzUtYjNiYy03OGNjMDJmYmM1NWMifQ.EIEPLzz9H2oQ3rKLKI_t1qtxi-G9ym6P6J5z7xWLsq79aHK3XV_E8_yuUK8giaOOdl_CaITOjl8Bt2aUVXTZKFKIOiQMLscBM4B6qGj_n4Qq8jiMwcKjnD_iWx8Wo-aNHHxgjvrRdWf-2UPIm6lSc77oZbUNAjhA5Q-W3uQRG7d50FGZpq_EEZsfbOcD7EMU2ZnvfYNTgTtmhZWfLefzB6xUF8WHgiDAVHJKQ2fKLX45Z9trc2SkKQmPBxaS-pBtKBhK15kQZ3x625KtLRr2ZwgOaJKHcg4SwuGOpyTF48nTk53SDorSj6fqlypTavVQRi-5cuSGTPrqLObk6lwpRg", + }, + "_authToken/scoped/gitlab_old": { + input: `RUN npm config set "//gitlab.com/api/v4/packages/npm/:_authToken" "aB3_-9R2sPqLxY0ZwCc8D"`, + expected: "aB3_-9R2sPqLxY0ZwCc8D", + }, + "_authToken/scoped/gitlab_new": { + input: `@company:registry=https://gitlab.com/api/v4/projects/12356452/packages/npm/ +//gitlab.com/api/v4/projects/12356452/packages/npm/:_authToken=glpat-ijTwXA7JxJAEx7uzmVfH`, + expected: "glpat-ijTwXA7JxJAEx7uzmVfH", + }, + "_authToken/scoped/github": { + input: `npm config set @company:registry=https://npm.pkg.github.com/ + npm config set //npm.pkg.github.com/:_authToken=ghp_sS4gaQUHsXSdwojekGTlaIAgJ77Wsn4D7gPO + npm install -g @company/internal-package@1.2.3`, + expected: "ghp_sS4gaQUHsXSdwojekGTlaIAgJ77Wsn4D7gPO", + }, + "_authToken/scoped/google": { + input: `//us-east1-npm.pkg.dev/company-dev-167118/project/:_authToken=ya29.c.b0Aaekm1IkdX0VHcPkEnYB8dmgL5IHi3jXTffM9hwjaNInirTSOv3hsCsJuCyswioQpO1UGkvVqReYpSW7V6sxmTv7fSPpZJeKzRQcKb6LLjApF7gFGyZMg9lUf7YBFixyGDZxaq0T-FnksK8O7KC4MSxalTe4dnl_jWXcs7FKi-FQOwsAuR1-zwRS63F1YG9fpCq2WykPhjwcbYPVlb3jpTOmIJhGpaWq7Sd5_uunWTHadgI3sCCazp_rT8xa8MS8YtyTJl716Taix4nmD-2Rertq9uS8P9AkFHMHaRvXl3W2PbNHxQtJ3fI3RRmBaVSe5WQlA1MofCol-lUN344KqfknpMfIjuXhB4h5fRB5zZ0Z2te_f0SVS1ZZ1Ox-sWVbh-2keFJ9Um0OS7O46rIOg_z7X-817qf_rIWmhQZgmk6ubI4hVR-7_lUl8lzi0ypo6Ve2wVZd6n9q8ws_RWt5k1Q_YkI0ukMe-U3a7s8F16w7r0OSFqVmhy0psYQ4-8jp79IhUbdm10l32tQhZ22UcIuBk8S4FX4c7nUIWk1Xd9xeu0JJ8Xrwd6nW24i1j-vbnMZxk0t1_5ZljUXVncxo5xjucr4WyYOmvuaS_iSaz89jUdYfxiOxWieiUVbuzZSXWvZFFZrkQFggeq3Vq2hmj6Zls_W8Bm9o8-8020S6692rB5pf8h6b-RI4zig928xufkWtVfMXU3fth24eVWqYIIy_qQh6UcJ1_VZF8uvMYoowUu3aQShisof8eoi8qQixO4YIJ2VSSctnFvthQUJ_0p5Vt9YQSyx_XqS8_1VeaMpnwWMkXXSVZrflo9Ieb_s-V9RlYB23attr3oR0_5Q95rzgJXivy1-UmhIO3iSn4QWZceeJynjv1MB01tyJd35icywcmc5j5onQl5F3x0_rdZSZyhVduujy6gFk1yBnWidRFg38QU4Y2_ZwS68e66SjYXddORq9-xaw6fjO4qkb1n6i6zb00eX1mY2rq8UpqQ6v4t1n4n4h9M3 +@fortawesome:registry=https://npm.fontawesome.com/`, + expected: "ya29.c.b0Aaekm1IkdX0VHcPkEnYB8dmgL5IHi3jXTffM9hwjaNInirTSOv3hsCsJuCyswioQpO1UGkvVqReYpSW7V6sxmTv7fSPpZJeKzRQcKb6LLjApF7gFGyZMg9lUf7YBFixyGDZxaq0T-FnksK8O7KC4MSxalTe4dnl_jWXcs7FKi-FQOwsAuR1-zwRS63F1YG9fpCq2WykPhjwcbYPVlb3jpTOmIJhGpaWq7Sd5_uunWTHadgI3sCCazp_rT8xa8MS8YtyTJl716Taix4nmD-2Rertq9uS8P9AkFHMHaRvXl3W2PbNHxQtJ3fI3RRmBaVSe5WQlA1MofCol-lUN344KqfknpMfIjuXhB4h5fRB5zZ0Z2te_f0SVS1ZZ1Ox-sWVbh-2keFJ9Um0OS7O46rIOg_z7X-817qf_rIWmhQZgmk6ubI4hVR-7_lUl8lzi0ypo6Ve2wVZd6n9q8ws_RWt5k1Q_YkI0ukMe-U3a7s8F16w7r0OSFqVmhy0psYQ4-8jp79IhUbdm10l32tQhZ22UcIuBk8S4FX4c7nUIWk1Xd9xeu0JJ8Xrwd6nW24i1j-vbnMZxk0t1_5ZljUXVncxo5xjucr4WyYOmvuaS_iSaz89jUdYfxiOxWieiUVbuzZSXWvZFFZrkQFggeq3Vq2hmj6Zls_W8Bm9o8-8020S6692rB5pf8h6b-RI4zig928xufkWtVfMXU3fth24eVWqYIIy_qQh6UcJ1_VZF8uvMYoowUu3aQShisof8eoi8qQixO4YIJ2VSSctnFvthQUJ_0p5Vt9YQSyx_XqS8_1VeaMpnwWMkXXSVZrflo9Ieb_s-V9RlYB23attr3oR0_5Q95rzgJXivy1-UmhIO3iSn4QWZceeJynjv1MB01tyJd35icywcmc5j5onQl5F3x0_rdZSZyhVduujy6gFk1yBnWidRFg38QU4Y2_ZwS68e66SjYXddORq9-xaw6fjO4qkb1n6i6zb00eX1mY2rq8UpqQ6v4t1n4n4h9M3", + }, + "_authToken/scoped/gemfury": { + input: `# always-auth=true +# registry=https://npm.fury.io/company/ +# //npm.fury.io/company/:_authToken=Du4CPz7SsRom1Mz8hbSR`, + expected: "Du4CPz7SsRom1Mz8hbSR", + }, + "_authToken/scoped/other(1)": { + input: `//npm.company.com/:_authToken="VVEvgoi7lSkBaCd4s0Gb0A==" +always_auth=true +registry=https://npm.company.com/ +user=jdoe`, + expected: "VVEvgoi7lSkBaCd4s0Gb0A==", + }, + "_authToken/scoped/other(2)": { + input: `loglevel=silent +registry=https://npm.company.de:4873/ +@babel:registry=https://registry.npmjs.org +//npm.company.de:4873/:_authToken="zZcZwiAWuyyspOAGrzAlE/LBH55oyfzsIOQvPsQ/5n0="`, + expected: "zZcZwiAWuyyspOAGrzAlE/LBH55oyfzsIOQvPsQ/5n0=", + }, + "_authToken/scoped/other(3)": { + input: `strict-ssl=true +//r.privjs.com/:_authToken=JWT_eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VyVVVJRCI6ImI1NjM1NDdmLTQ5NTQtNGVkOS04ZDI4LTkzZjFlYjE2YjgwYyIsInVzZXJuYW1lIjoiZGFya28iLCJpYXQiOjE2NzAzMzQwMTV9.itmCA6WviKLcGwahuV-K2cvDtDkM_j7o_NjZrdzWu0M +@module-federation:registry=https://r.privjs.com`, + expected: "JWT_eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VyVVVJRCI6ImI1NjM1NDdmLTQ5NTQtNGVkOS04ZDI4LTkzZjFlYjE2YjgwYyIsInVzZXJuYW1lIjoiZGFya28iLCJpYXQiOjE2NzAzMzQwMTV9.itmCA6WviKLcGwahuV-K2cvDtDkM_j7o_NjZrdzWu0M", + }, + "NPM_TOKEN/buildkite": { + input: `steps: + - label: 'Install' + command: NODE_ENV=development yarn install --frozen-lockfile + plugins: + - ssh://git@github.com/foo/bar-plugin#v0.0.18: + secrets: + NPM_TOKEN: "x3jAqghGq90/oN3mM3rWxQ8KaD4nw9g6bw/dL4dfTOJSL8pijHrBFPK6p7bUwLGkVotezEn8dfTPe-Qa1cP"`, + expected: "x3jAqghGq90/oN3mM3rWxQ8KaD4nw9g6bw/dL4dfTOJSL8pijHrBFPK6p7bUwLGkVotezEn8dfTPe-Qa1cP", + }, + "NPM_TOKEN/cloudbuild": { + input: `secrets: +- kmsKeyName: projects/myproject/locations/global/keyRings/cloud-build/cryptoKeys/cloud-build + secretEnv: + NPM_TOKEN: CiQAwtE8WoPa1sNqAQJZ1WMODuJooVmO6zihz2hAZOfUmDsgogUSTQCq8yp8qgltY+8jWpAR9GuS1JaVhd+fTVRilqLtdi2yXSdiDPTzLhZ+30bMlAOcoc0PxhCBn3JOpn8H1xshX+mG8yK7xog2Uq+CLVx/ + +timeout: 60s`, + expected: "CiQAwtE8WoPa1sNqAQJZ1WMODuJooVmO6zihz2hAZOfUmDsgogUSTQCq8yp8qgltY+8jWpAR9GuS1JaVhd+fTVRilqLtdi2yXSdiDPTzLhZ+30bMlAOcoc0PxhCBn3JOpn8H1xshX+mG8yK7xog2Uq+CLVx/", + }, + + // Invalid + "invalid/_authToken/top_level": { + input: `_authToken = ${NPM_TOKEN} +registry = https://npm.company.com/`, + }, + "invalid/_authToken/v1_pattern": { + input: `echo //nexus.company.com/repository/npm-registry/:_authToken=NpmToken.fe093789-9551-3238-a766-9d2b694f2600 >> .npmrc`, + }, + "invalid/_authToken/v2_pattern": { + input: ` //registry.npmjs.org/:_authToken=npm_ArCHsOJAC3gMXmzaVwUts00QfWWUrW4UuewA`, + }, + } + + testPattern(t, ScannerGeneric{}, cases) +} + +func TestNpmTokenGeneric_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors5") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("NPM_TOKEN_GENERIC") + inactiveSecret := testSecrets.MustGetField("NPM_TOKEN_GENERIC_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s ScannerGeneric + args args + want []detectors.Result + wantErr bool + wantVerificationErr bool + }{ + { + name: "found, verified", + s: ScannerGeneric{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a npm_token_generic secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_NpmToken, + Verified: true, + }, + }, + wantErr: false, + wantVerificationErr: false, + }, + { + name: "found, unverified", + s: ScannerGeneric{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a npm_token_generic secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_NpmToken, + Verified: false, + }, + }, + wantErr: false, + wantVerificationErr: false, + }, + { + name: "not found", + s: ScannerGeneric{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + wantVerificationErr: false, + }, + { + name: "found, would be verified if not for timeout", + s: ScannerGeneric{npmScanner{client: common.SaneHttpClientTimeOut(1 * time.Microsecond)}}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a npm_token_generic secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_NpmToken, + Verified: false, + }, + }, + wantErr: false, + wantVerificationErr: true, + }, + { + name: "found, verified but unexpected api surface", + s: ScannerGeneric{npmScanner{client: common.ConstantResponseHttpClient(404, "")}}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a npm_token_generic secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_NpmToken, + Verified: false, + }, + }, + wantErr: false, + wantVerificationErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := tt.s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("npm_token_generic.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + if (got[i].VerificationError() != nil) != tt.wantVerificationErr { + t.Fatalf("wantVerificationError = %v, verification error = %v", tt.wantVerificationErr, got[i].VerificationError()) + } + } + ignoreOpts := cmpopts.IgnoreFields(detectors.Result{}, "Raw", "verificationError") + if diff := cmp.Diff(got, tt.want, ignoreOpts); diff != "" { + t.Errorf("npm_token_generic.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromData(benchmark *testing.B) { + ctx := context.Background() + s := ScannerGeneric{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/npm/npm_token_v1.go b/pkg/detectors/npm/npm_token_v1.go new file mode 100644 index 000000000000..15f47f9a51a1 --- /dev/null +++ b/pkg/detectors/npm/npm_token_v1.go @@ -0,0 +1,70 @@ +package npm + +import ( + "context" + "regexp" + + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +type ScannerV1 struct { + npmScanner +} + +// Ensure the Scanner satisfies the interfaces at compile time. +var _ interface { + detectors.Detector + detectors.Versioner +} = (*ScannerV1)(nil) + +func (s ScannerV1) Version() int { return 1 } + +// Make sure that your group is surrounded in boundary characters such as below to reduce false positives. +var v1KeyPat = regexp.MustCompile(`(?:NpmToken\.|` + detectors.PrefixRegex([]string{"npm"}) + `)\b(?i)([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})\b`) + +// Keywords are used for efficiently pre-filtering chunks. +// Use identifiers in the secret preferably, or the provider name. +func (s ScannerV1) Keywords() []string { + return []string{"npm"} +} + +// FromData will find and optionally verify NpmToken secrets in a given set of bytes. +func (s ScannerV1) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { + dataStr := string(data) + + // Deduplicate results for more efficient handling. + tokens := make(map[string]struct{}) + for _, match := range v1KeyPat.FindAllStringSubmatch(dataStr, -1) { + tokens[match[1]] = struct{}{} + } + if len(tokens) == 0 { + return + } + + for token := range tokens { + s1 := detectors.Result{ + DetectorType: s.Type(), + Raw: []byte(token), + } + + if verify { + verified, extraData, vErr := s.verifyToken(ctx, dataStr, token) + s1.Verified = verified + s1.ExtraData = extraData + s1.SetVerificationError(vErr) + } + + // This function will check false positives for common test words, but also it will make sure the key appears 'random' enough to be a real key. + if !s1.Verified && detectors.IsKnownFalsePositive(token, detectors.DefaultFalsePositives, true) { + continue + } + + results = append(results, s1) + } + return +} + +func (s ScannerV1) Type() detectorspb.DetectorType { + return detectorspb.DetectorType_NpmToken +} diff --git a/pkg/detectors/npm/npm_token_v1_test.go b/pkg/detectors/npm/npm_token_v1_test.go new file mode 100644 index 000000000000..bd5e1fa00daf --- /dev/null +++ b/pkg/detectors/npm/npm_token_v1_test.go @@ -0,0 +1,202 @@ +//go:build detectors +// +build detectors + +package npm + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestNpmTokenV1_Pattern(t *testing.T) { + cases := map[string]npmPatternTestCase{ + "npmrc/_authToken/top_level": { + input: `registry=https://nexus.company.com/repository/npm-group/ +_authToken=NpmToken.3e9adc26-5c1b-3fdf-901f-6df392a48616`, + expected: "3e9adc26-5c1b-3fdf-901f-6df392a48616", + }, + "npmrc/_authToken/scoped/npm": { + input: `loglevel=silent +registry=https://registry.npmjs.org/ +//registry.npmjs.org/:_authToken=fcb3b15d-4d4a-44dc-b92d-13ee9d25582d`, + expected: "fcb3b15d-4d4a-44dc-b92d-13ee9d25582d", + }, + "npmrc/_authToken/scoped/nexus": { + input: ` echo email=jdoe@company.com > .npmrc + echo always-auth=true >> .npmrc + echo registry=https://nexus.company.com:8443/repository/npm-registry/ >> .npmrc + echo //nexus.company.com/repository:8443/npm-registry/:_authToken=NpmToken.de093289-9551-3238-a766-9d2c694f2600 >> .npmrc`, + expected: "de093289-9551-3238-a766-9d2c694f2600", + }, + "npmrc/_authToken/scopegd/other(1)": { + input: `@fontawesome:registry=https://npm.fontawesome.com/ +//npm.fontawesome.com/:_authToken=E8EC7793-A630-49AA-3351-6887EE647296`, + expected: "E8EC7793-A630-49AA-3351-6887EE647296", + }, + "yarn/npmAuthToken/scoped": { + input: `npmScopes: + fortawesome: + npmAlwaysAuth: true + npmRegistryServer: "https://npm.fontawesome.com/" + npmAuthToken: "${20FCC725-C7FF-4BBF-3DE8-632C89A16C87}"`, + expected: "20FCC725-C7FF-4BBF-3DE8-632C89A16C87", + }, + "misc(1)": { + input: `CI: "true" + NPM_PUBLISH_URL: "http://nexus3.company.net:8081/repository/npm-releases/" + NPM_PUBLISH_TOKEN: "b5505337-ffb2-3fac-8b3a-fcd81b8bb8fb"`, + expected: "b5505337-ffb2-3fac-8b3a-fcd81b8bb8fb", + }, + "misc(2)": { + input: `- name: NPM_PUBLISH_TOKEN + description: "Npm user used when upload artifacts" + required: true + value: "NpmToken.b5505337-ffb2-3fac-8b3a-fcd81b8ab8fb"`, + expected: "b5505337-ffb2-3fac-8b3a-fcd81b8ab8fb", + }, + "misc(3)": { + input: `root@4f5ec7bfe603:/# cd && cat .npmrc +//192.168.1.253:8081/repository/npm-group-local/:_authToken=NpmToken.7385beb7-2f92-3295-8ccf-8020132d6232`, + expected: "7385beb7-2f92-3295-8ccf-8020132d6232", + }, + "misc(4)": { + input: `ENV NPM_TOKEN "16b46f03-f1fb-4dce-9a98-c7e685751e67"`, + expected: "16b46f03-f1fb-4dce-9a98-c7e685751e67", + }, + "misc(5)": { + input: // https://github.com/arnaud-deprez/jenkins-docker-openshift/blob/60bb4dbe4d5484ff3f81697c26892dda4cd33930/charts/jenkins-openshift/values.yaml#L209 + ` CI: "true" + NPM_MIRROR_URL: "http://nexus3:8081/repository/npm-public/" + NPM_PUBLISH_URL: "http://nexus3:8081/repository/npm-releases/" + NPM_PUBLISH_TOKEN: "b5505337-ffb2-3fac-8b3a-fcd81b8bb8fb"`, + expected: "b5505337-ffb2-3fac-8b3a-fcd81b8bb8fb", + }, + + // Invalid + "invalid/_authToken/variable": { + input: `//npm.pkg.github.com/:_authToken=${GITHUB_PACKAGES_AUTH_TOKEN}`, + }, + "invalid/default": { + input: `assert(registry, 'registry not set, example: "https://nexus.foo.com/repository/mynpm/"') +const tokenErrorMsg = 'npm token invalid, example: "NpmToken.00000000-0000-0000-0000-000000000000" before base64 encoded'`, + }, + "invalid/not_uuid": { + input: `# .npmrc +# @ngiq:registry=https://registry.corp.net/repository/npm-group +# //registry.corp.net/repository/:_authToken=NpmToken.xxxx`, + }, + } + testPattern(t, ScannerV1{}, cases) +} + +func TestNpmTokenV1_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("NPM_TOKEN_V1") + inactiveSecret := testSecrets.MustGetField("NPM_TOKEN_V1_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s ScannerV1 + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: ScannerV1{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a npm secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_NpmToken, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: ScannerV1{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a npm secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_NpmToken, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: ScannerV1{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := ScannerV1{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("NpmTokenV1.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("NpmTokenV1.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromDataV1(benchmark *testing.B) { + ctx := context.Background() + s := ScannerV1{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/npm/npm_token_v2.go b/pkg/detectors/npm/npm_token_v2.go new file mode 100644 index 000000000000..525c4e3b4541 --- /dev/null +++ b/pkg/detectors/npm/npm_token_v2.go @@ -0,0 +1,71 @@ +package npm + +import ( + "context" + "regexp" + + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +type ScannerV2 struct { + npmScanner +} + +// Ensure the Scanner satisfies the interfaces at compile time. +var _ interface { + detectors.Detector + detectors.Versioner +} = (*ScannerV2)(nil) + +func (s ScannerV2) Version() int { return 2 } + +// Make sure that your group is surrounded in boundary characters such as below to reduce false positives. +var v2KeyPat = regexp.MustCompile(`\b(npm_[a-zA-Z0-9]{36})\b`) + +// Keywords are used for efficiently pre-filtering chunks. +// Use identifiers in the secret preferably, or the provider name. +func (s ScannerV2) Keywords() []string { + return []string{"npm_"} +} + +// FromData will find and optionally verify NpmTokenV2 secrets in a given set of bytes. +func (s ScannerV2) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { + dataStr := string(data) + + // Deduplicate results for more efficient handling. + tokens := make(map[string]struct{}) + for _, match := range v2KeyPat.FindAllStringSubmatch(dataStr, -1) { + tokens[match[1]] = struct{}{} + } + if len(tokens) == 0 { + return + } + + // Iterate through results. + for token := range tokens { + s1 := detectors.Result{ + DetectorType: s.Type(), + Raw: []byte(token), + } + + if verify { + verified, extraData, vErr := s.verifyToken(ctx, dataStr, token) + s1.Verified = verified + s1.ExtraData = extraData + s1.SetVerificationError(vErr) + } + + // This function will check false positives for common test words, but also it will make sure the key appears 'random' enough to be a real key. + if !s1.Verified && detectors.IsKnownFalsePositive(token, detectors.DefaultFalsePositives, true) { + continue + } + + results = append(results, s1) + } + return +} + +func (s ScannerV2) Type() detectorspb.DetectorType { + return detectorspb.DetectorType_NpmToken +} diff --git a/pkg/detectors/npm/npm_token_v2_test.go b/pkg/detectors/npm/npm_token_v2_test.go new file mode 100644 index 000000000000..4288cc062db6 --- /dev/null +++ b/pkg/detectors/npm/npm_token_v2_test.go @@ -0,0 +1,159 @@ +//go:build detectors +// +build detectors + +package npm + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/kylelemons/godebug/pretty" + + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + + "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" +) + +func TestNpmTokenV2_Pattern(t *testing.T) { + tests := map[string]npmPatternTestCase{ + "no_context": { + input: `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`, + expected: "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", + }, + ".npmrc": { + input: `//registry.npmjs.org/:_authToken=npm_ZAQB7VuVmml1pMGorDFwyeEpuQrA8I4ypgPF`, + expected: "npm_ZAQB7VuVmml1pMGorDFwyeEpuQrA8I4ypgPF", + }, + "yaml_spec": { + input: ` - env: + NPM_TOKEN: npm_tCEMceczuiTXKQaBjGIaAezYQ63PqI972ANG`, + expected: "npm_tCEMceczuiTXKQaBjGIaAezYQ63PqI972ANG", + }, + "bashrc": { + input: `export NPM_TOKEN=npm_ySTLJHpS9DCwByClZBMyqRWptr2kB40hEjiS`, + expected: "npm_ySTLJHpS9DCwByClZBMyqRWptr2kB40hEjiS", + }, + + // Invalid + "invalid/placeholder_0": { + input: ` //registry.npmjs.org/:_authToken=npm_000000000000000000000000000000000000`, + }, + "invalid/placeholder_x": { + input: `//registry.npmjs.org/:_authToken=npm_XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX`, + }, + "invalid/word_boundary": { + input: ` "image_small_url": "https://c10.patreonusercontent.com/3/eyJoIjo2NDAsInYiOiIzIiwidyI6NjQwfQ%3D%3D/patreon-media/campaign/1493621/91a5dc5347a741af89aaed35d2a82b5c?token-time=2145916800\u0026token-hash=Qznpm_uHiQAba4K3HTRZjrhQei4dU0tmZbaavLrM2FY%3D",`, + }, + "invalid/uppercase": { + input: `"operationId": "Npm_GetScopedPackageVersionFromRecycleBin",`, + }, + } + + testPattern(t, ScannerV2{}, tests) +} + +func TestNpmTokenV2_FromChunk(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + defer cancel() + testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") + if err != nil { + t.Fatalf("could not get test secrets from GCP: %s", err) + } + secret := testSecrets.MustGetField("NPM_TOKEN_V2") + inactiveSecret := testSecrets.MustGetField("NPM_TOKEN_V2_INACTIVE") + + type args struct { + ctx context.Context + data []byte + verify bool + } + tests := []struct { + name string + s ScannerV2 + args args + want []detectors.Result + wantErr bool + }{ + { + name: "found, verified", + s: ScannerV2{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a NpmTokenV2 secret %s within", secret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_NpmToken, + Verified: true, + }, + }, + wantErr: false, + }, + { + name: "found, unverified", + s: ScannerV2{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a NpmTokenV2 secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_NpmToken, + Verified: false, + }, + }, + wantErr: false, + }, + { + name: "not found", + s: ScannerV2{}, + args: args{ + ctx: context.Background(), + data: []byte("You cannot find the secret within"), + verify: true, + }, + want: nil, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := ScannerV2{} + got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) + if (err != nil) != tt.wantErr { + t.Errorf("NpmTokenV2.FromData() error = %v, wantErr %v", err, tt.wantErr) + return + } + for i := range got { + if len(got[i].Raw) == 0 { + t.Fatalf("no raw secret present: \n %+v", got[i]) + } + got[i].Raw = nil + } + if diff := pretty.Compare(got, tt.want); diff != "" { + t.Errorf("NpmTokenV2.FromData() %s diff: (-got +want)\n%s", tt.name, diff) + } + }) + } +} + +func BenchmarkFromDataV2(benchmark *testing.B) { + ctx := context.Background() + s := ScannerV2{} + for name, data := range detectors.MustGetBenchmarkData() { + benchmark.Run(name, func(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + _, err := s.FromData(ctx, false, data) + if err != nil { + b.Fatal(err) + } + } + }) + } +} diff --git a/pkg/detectors/npm/registry.go b/pkg/detectors/npm/registry.go new file mode 100644 index 000000000000..e7b99ca392ca --- /dev/null +++ b/pkg/detectors/npm/registry.go @@ -0,0 +1,300 @@ +package npm + +import ( + "fmt" + "regexp" + "strings" +) + +type registryInfo struct { + Scheme scheme + Uri string + RegistryType registryType +} + +// The scheme of the registry URL. +type scheme int + +const ( + unknown scheme = iota + isHttp + isHttps +) + +func (scheme scheme) String() string { + return [...]string{ + "unknown", + "isHttp", + "isHttps", + }[scheme] +} + +// Prefix returns the HTTP prefix that corresponds to the enum: "", "http://", and "https://" respectively. +func (scheme scheme) Prefix() string { + return [...]string{ + "", + "http://", + "https://", + }[scheme] +} + +// A collection of known registry implementations. +type registryType int + +const ( + // Others npm registries include: + // - https://github.com/verdaccio/verdaccio + // - https://coding.net/help/docs/ci/practice/artifacts/npm.html + // - https://www.privjs.com + other registryType = iota + npm + artifactoryCloud + artifactoryHosted + nexusRepo2 + nexusRepo3 + gitlab // TODO: Self-hosted GitLab? + github // TODO: Self-hosted GitHub packages? + azure + jetbrains + googleArtifactRegistry + gemfury +) + +func (t registryType) String() string { + return [...]string{ + "other", + "npm", + "artifactoryCloud", + "artifactoryHosted", + "nexusRepo2", + "nexusRepo3", + "gitlab", + "github", + "azure", + "jetbrains", + "googleArtifactRegistry", + "gemfury", + }[t] +} + +var ( + defaultRegistryInfo = ®istryInfo{ + RegistryType: npm, + Scheme: isHttps, + Uri: "registry.npmjs.org", + } + + domainPat = `(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.)+[a-z]{2,}` // this doesn't match single segment hosts (e.g., localhost) + ipV4Pat = `(?:[0-9]{1,3}\.){3}[0-9]{1,3}` // overly permissive but should be fine in context + hostPat = fmt.Sprintf(`(?:%s|%s)(?::\d{1,5})?`, domainPat, ipV4Pat) + + knownRegistryPat = func() *regexp.Regexp { + var sb strings.Builder + sb.WriteString(`(?i)((?:https?:)?//)(?:`) + // `registry.yarnpkg.com` is a reverse-proxy (https://github.com/yarnpkg/yarn/issues/889) + sb.WriteString(`(registry\.(?:npmjs\.org|yarnpkg\.com))`) + artifactoryPath := `/(?:artifactory|[a-z0-9._-]+)/api/npm/[a-z][a-z0-9._-]+` + artifactoryOldPath := `/(?:artifactory|[a-z0-9._-]+)/v\d\.\d/artifacts/[a-z][a-z0-9._-]+` // appears to be a path from older versions. + sb.WriteString(`|([a-z0-9-]+\.jfrog\.io` + artifactoryPath + `)`) // cloud + sb.WriteString(fmt.Sprintf(`|(%s(?:%s|%s))`, hostPat, artifactoryPath, artifactoryOldPath)) // hosted + // https://help.sonatype.com/repomanager2/node-packaged-modules-and-npm-registries + sb.WriteString(`|(` + hostPat + `/nexus/content/(?:groups|repositories)/[a-z0-9-][a-z0-9._-]+)`) + // https://help.sonatype.com/repomanager3/nexus-repository-administration/formats/npm-registry/configuring-npm + sb.WriteString(`|(` + hostPat + `/(?:nexus/)?repository/[a-z0-9-][a-z0-9._-]+)`) + // https://docs.gitlab.com/ee/user/packages/npm_registry/ + sb.WriteString(`|(` + hostPat + `/api/v4/(?:groups/\d+/-/|projects/\d+/)?packages/npm)`) + // https://docs.github.com/en/packages/working-with-a-github-packages-registry/working-with-the-npm-registry + sb.WriteString(`|(npm\.pkg\.github\.com)`) + // https://learn.microsoft.com/en-us/azure/devops/artifacts/get-started-npm?view=azure-devops&tabs=Windows + // https://stackoverflow.com/a/73495381 + azurePat := `pkgs\.dev\.azure\.com/[a-z0-9._-]+(?:/[a-z0-9._-]+)?` + vsPat := `[a-z0-9-]+\.pkgs\.visualstudio\.com(?:/[a-z0-9._-]+)?` + sb.WriteString(fmt.Sprintf(`|((?:%s|%s)/_packaging/[a-z0-9._-]+/npm(?:/registry)?)`, azurePat, vsPat)) + // https://www.jetbrains.com/help/space/using-an-npm-registry-with-npmjs-com.html + sb.WriteString(`|(npm\.pkg\.jetbrains\.space/[a-z0-9._-]+/p/[a-z0-9._-]+/[a-z0-9._-]+)`) + sb.WriteString(`|((?:[a-z0-9-]+-)?npm\.pkg\.dev/[a-z0-9._-]+/[a-z0-9._-]+)`) + sb.WriteString(`|(npm(?:-proxy)?\.fury\.io/[a-z0-9._-]+)`) + sb.WriteString(`)`) + + return regexp.MustCompile(sb.String()) + }() + genericRegistryPat = func() *regexp.Regexp { + urlPat := fmt.Sprintf(`%s(?:/[a-z0-9._-]+)*`, hostPat) + registryPat := regexp.MustCompile(fmt.Sprintf( + `(?i)['"]?(//%s)/:|registry.{1,50}?['"]?(https?://%s)/?['"]?|@[a-z0-9\-_]{1,50}['"]?[ \t]*(?:=[ \t]*)?['"]?(https?://%s)/?['"]?|\[npmAuth\.['"](https?://%s)/?['"]\]`, urlPat, urlPat, urlPat, urlPat)) + + // Sanity check to make sure the pattern doesn't contain a mistake. + if registryPat.NumSubexp() != 4 { + panic(fmt.Sprintf("Pattern |genericRegistryPat| should have 4 capture groups but has %d", registryPat.NumSubexp())) + } + return registryPat + }() +) + +// findTokenRegistry returns the specific registry associated with the |token| if a high confidence match is found in |data|. +// +// Common configurations: +// - npm: https://docs.npmjs.com/using-private-packages-in-a-ci-cd-workflow#create-and-check-in-a-project-specific-npmrc-file +// - Yarn (TODO) +// - Unity Package Manager (TODO) +func findTokenRegistry(data string, token string) *registryInfo { + // .npmrc stores auth as `//registry.com/path/:authToken=$TOKEN + // Therefore, we should be able to correlate a token to a registry with a high degree of confidence. + registryAuthPat := regexp.MustCompile(fmt.Sprintf( + // language=regexp + `(?i)(//%s(?:/[a-z0-9._-]+)*)/:(?:_auth(?:Token)?|_password).{1,20}%s`, hostPat, token)) + matches := registryAuthPat.FindStringSubmatch(data) + if len(matches) == 0 { + return nil + } + + // A match was found, attempt to parse it. + uri := matches[1] + info := parseKnownRegistryURI(data, uri) + if info == nil { + info = parseUnknownRegistryURI(data, uri) + } + return info +} + +// findAllRegistryURLs returns all instances of URLs that *look like* registries. +// These are not associated with a specific token. +func findAllRegistryURLs(data string) map[string]*registryInfo { + registries := make(map[string]*registryInfo) + + // Look for known high-confidence matches. + for _, matches := range knownRegistryPat.FindAllStringSubmatch(data, -1) { + var ( + _, uri = firstNonEmptyMatch(matches, 2) // first two matches are the entire string and protocol/prefix + info = parseKnownRegistryURI(data, matches[1]+uri) + ) + // Might be unnecessary, |info| is almost guaranteed to not be nil. + if info == nil { + continue + } + if _, ok := registries[info.Uri]; ok { + continue + } + + registries[info.Uri] = info + } + + // Attempt to parse any other low confidence matches. + for _, matches := range genericRegistryPat.FindAllStringSubmatch(data, -1) { + // Skip known registry patterns, those should have already been handled above. + if knownRegistryPat.MatchString(matches[0]) { + continue + } + + _, uri := firstNonEmptyMatch(matches, 1) // first match is the entire string + info := ®istryInfo{ + RegistryType: other, + } + info.Scheme, info.Uri = parseRegistryURLScheme(data, uri) + if _, ok := registries[info.Uri]; ok { + continue + } + + registries[info.Uri] = info + } + + if len(registries) == 0 { + registries[defaultRegistryInfo.Uri] = defaultRegistryInfo + } + return registries +} + +// parseKnownRegistryURI +func parseKnownRegistryURI(data string, registryUri string) *registryInfo { + matches := knownRegistryPat.FindStringSubmatch(registryUri) + if len(matches) == 0 { + return nil + } + + // Skip the first two indices: 1 is the entire string, 2 is the protocol. + index, uri := firstNonEmptyMatch(matches, 2) + info := ®istryInfo{ + RegistryType: registryType(index - 1), + } + info.Scheme, info.Uri = parseRegistryURLScheme(data, uri) + + // Normalize the URI. + if info.RegistryType == npm && info.Scheme != isHttps { + info.Scheme = isHttps + } else if info.RegistryType == artifactoryCloud && info.Scheme != isHttps { + info.Scheme = isHttps + } else if info.RegistryType == github && info.Scheme != isHttps { + info.Scheme = isHttps + } else if info.RegistryType == azure { + if info.Scheme != isHttps { + info.Scheme = isHttps + } + if !strings.HasSuffix(strings.ToLower(info.Uri), "/registry") { + info.Uri = info.Uri + "/registry" + } + } else if info.RegistryType == jetbrains && info.Scheme != isHttps { + info.Scheme = isHttps + } else if info.RegistryType == googleArtifactRegistry && info.Scheme != isHttps { + info.Scheme = isHttps + } else if info.RegistryType == gemfury && info.Scheme != isHttps { + info.Scheme = isHttps + } + + return info +} + +// parseUnknownRegistryURI +func parseUnknownRegistryURI(data string, registryUri string) *registryInfo { + scheme, uri := parseRegistryURLScheme(data, registryUri) + info := ®istryInfo{ + RegistryType: other, + Scheme: scheme, + Uri: uri, + } + return info +} + +// parseRegistryURLScheme attempts to find the scheme of the provided |uri|. +func parseRegistryURLScheme(data string, uri string) (scheme, string) { + var ( + scheme = unknown + uriWithoutScheme string + ) + // If the match starts with "http" or "https", we can be confident about the Scheme. + // Otherwise, it is unknown. + u := strings.ToLower(uri) // for case-insensitive comparison. Might not be the best way. + if strings.HasPrefix(u, "https://") { + scheme = isHttps + uriWithoutScheme = uri[8:] + } else if strings.HasPrefix(u, "http://") { + scheme = isHttp + uriWithoutScheme = uri[7:] + } else if strings.HasPrefix(u, "//") { + uriWithoutScheme = uri[2:] + } else { + uriWithoutScheme = uri + } + + // If the Scheme is unknown, look for other instances of the Uri that might have the Scheme. + // + // Scheme -> registry=https://example.com/repository/npm-proxy/ + // no Scheme -> //example.com/repository/npm-proxy/:_authToken=123456 + if scheme == unknown { + var ( + uriPat = regexp.MustCompile(`(?i)(https?)://` + uriWithoutScheme) + schemes = make(map[string]struct{}) + ) + for _, m := range uriPat.FindAllStringSubmatch(data, -1) { + schemes[strings.ToLower(m[1])] = struct{}{} + } + // Decisively HTTP or HTTPS; nothing or both is equally useless. + if len(schemes) == 1 { + if _, ok := schemes["https"]; ok { + scheme = isHttps + } else { + scheme = isHttp + } + } + } + return scheme, uriWithoutScheme +} diff --git a/pkg/detectors/npm/registry_test.go b/pkg/detectors/npm/registry_test.go new file mode 100644 index 000000000000..f8f4f60fb4e9 --- /dev/null +++ b/pkg/detectors/npm/registry_test.go @@ -0,0 +1,782 @@ +package npm + +import ( + "fmt" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" +) + +func TestNpm_KnownRegistryPat(t *testing.T) { + cases := map[registryType]map[string][]string{ + npm: { + "//registry.npmjs.org/": {"//", "registry.npmjs.org"}, + "https://registry.npmjs.org/": {"https://", "registry.npmjs.org"}, + ` resolved "https://registry.yarnpkg.com/abstract-logging/-/abstract-logging-2.0.1.tgz#6b0c371df212db7129b57d2e7fcf282b8bf1c839"`: {"https://", "registry.yarnpkg.com"}, + }, + artifactoryHosted: { + "https://artifactory.prd.cds.internal.unity3d.com/artifactory/api/npm/upm-npm/": {"https://", "artifactory.prd.cds.internal.unity3d.com/artifactory/api/npm/upm-npm"}, + "registry=http://10.85.59.116/artifactory/v1.0/artifacts/npm/": {"http://", "10.85.59.116/artifactory/v1.0/artifacts/npm"}, + }, + artifactoryCloud: { + "//voomp.jfrog.io/artifactory/api/npm/vk-common-bk/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d": {"//", "voomp.jfrog.io/artifactory/api/npm/vk-common-bk"}, + "//geckorobotics.jfrog.io/geckorobotics/api/npm/npm/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d": {"//", "geckorobotics.jfrog.io/geckorobotics/api/npm/npm"}, + }, + nexusRepo2: { + "http://nexus.zenoss.eng:8081/nexus/content/repositories/npm/": {"http://", "nexus.zenoss.eng:8081/nexus/content/repositories/npm"}, + "http://nexus.pas-mini.io/nexus/content/repositories/npm-private/": {"http://", "nexus.pas-mini.io/nexus/content/repositories/npm-private"}, + }, + nexusRepo3: { + "registry=http://34.125.69.241/repository/npm-group/": {"http://", "34.125.69.241/repository/npm-group"}, + "https://repo.huaweicloud.com/repository/npm/": {"https://", "repo.huaweicloud.com/repository/npm"}, + "http://artifacts.lan.tribe29.com:8081/repository/npm-proxy/@babel/": {"http://", "artifacts.lan.tribe29.com:8081/repository/npm-proxy"}, + "http://10.10.69.203:8081/repository/npm-group/": {"http://", "10.10.69.203:8081/repository/npm-group"}, + "//nexus.public.prd.golf-prod.js-devops.co.uk/repository/luna/": {"//", "nexus.public.prd.golf-prod.js-devops.co.uk/repository/luna"}, + "//ec2-18-222-132-112.us-east-2.compute.amazonaws.com:8081/repository/postboard-server/": {"//", "ec2-18-222-132-112.us-east-2.compute.amazonaws.com:8081/repository/postboard-server"}, + `- name: NPM_PUBLISH_URL + description: "Maven repository url to where jenkins will upload releases artifacts" + required: true + value: "http://nexus3.my-company.tk:8081/repository/npm-releases/" +- name: NPM_PUBLISH_TOKEN + description: "Npm user used when upload artifacts" + required: true + value: "NpmToken.b5505337-ffb2-3fac-8b3a-fcd81b8bb8fb"`: {"http://", "nexus3.my-company.tk:8081/repository/npm-releases"}, + `
1
2
3
[root@nexus3 ~]# cat ~/.npmrc
registry=http://registry.blog.co/repository/npm-group/
//registry.blog.co/repository/npm-group/:_authToken=NpmToken.72b83be3-4b24-3dd1-850f-056cd78bb513
`: {"http://", "registry.blog.co/repository/npm-group"}, + }, + gitlab: { + `"https://gitlab.matrix.org/api/v4/projects/27/packages/npm/@matrix-org/olm/-/@matrix-org/olm-3.2.3.tgz",`: {"https://", "gitlab.matrix.org/api/v4/projects/27/packages/npm"}, + "https://gitlab.com/api/v4/groups/123456/-/packages/npm/": {"https://", "gitlab.com/api/v4/groups/123456/-/packages/npm"}, // couldn't find a real example of this + }, + github: { + "https://npm.pkg.github.com/": {"https://", "npm.pkg.github.com"}, + "https://npm.pkg.github.com/company": {"https://", "npm.pkg.github.com"}, + }, + azure: { + "//pkgs.dev.azure.com/company/_packaging/feed/npm/": {"//", "pkgs.dev.azure.com/company/_packaging/feed/npm"}, + "https://pkgs.dev.azure.com/company/project/_packaging/feed/npm/registry/": {"https://", "pkgs.dev.azure.com/company/project/_packaging/feed/npm/registry"}, + "https://pkgs.dev.azure.com/company/project/_packaging/feed/npm/registry": {"https://", "pkgs.dev.azure.com/company/project/_packaging/feed/npm/registry"}, + "//pkgs.dev.azure.com/company/b675ba30-3f64-43c8-b35d-79c162dc3fd7/_packaging/feed/npm/": {"//", "pkgs.dev.azure.com/company/b675ba30-3f64-43c8-b35d-79c162dc3fd7/_packaging/feed/npm"}, + "//fso-to.pkgs.visualstudio.com/7bc545d8-bf8c-477e-bb91-17a982c30c2e/_packaging/feed/npm/registry/": {"//", "fso-to.pkgs.visualstudio.com/7bc545d8-bf8c-477e-bb91-17a982c30c2e/_packaging/feed/npm/registry"}, + "//company.pkgs.visualstudio.com/project/_packaging/feed/npm/registry/": {"//", "company.pkgs.visualstudio.com/project/_packaging/feed/npm/registry"}, + "//company.pkgs.visualstudio.com/_packaging/feed/npm/registry/:username=bart": {"//", "company.pkgs.visualstudio.com/_packaging/feed/npm/registry"}, + }, + jetbrains: { + "//npm.pkg.jetbrains.space/multiplier/p/multiplier/npm/": {"//", "npm.pkg.jetbrains.space/multiplier/p/multiplier/npm"}, + "https://npm.pkg.jetbrains.space/nridwan/p/main/npmempty/": {"https://", "npm.pkg.jetbrains.space/nridwan/p/main/npmempty"}, + "https://npm.pkg.jetbrains.space/public/p/jetbrains-gamedev/jetbrains-gamedev/": {"https://", "npm.pkg.jetbrains.space/public/p/jetbrains-gamedev/jetbrains-gamedev"}, + }, + googleArtifactRegistry: { + "https://us-west1-npm.pkg.dev/company/project": {"https://", "us-west1-npm.pkg.dev/company/project"}, + "https://npm.pkg.dev/company/project": {"https://", "npm.pkg.dev/company/project"}, + "//europe-west4-npm.pkg.dev/foleon-staging/foleon-libs/:username=oauth2accesstoken": {"//", "europe-west4-npm.pkg.dev/foleon-staging/foleon-libs"}, + }, + gemfury: { + "//npm.fury.io/dependabot/": {"//", "npm.fury.io/dependabot"}, + }, + } + for group, inputs := range cases { + t.Run(group.String(), func(t *testing.T) { + for input, expected := range inputs { + matches := knownRegistryPat.FindStringSubmatch(input) + if len(matches) == 0 { + t.Errorf("no result for %s", input) + return + } + + index, uri := firstNonEmptyMatch(matches, 2) + rType := registryType(index - 1) + if rType != group { + t.Errorf("expected type %s, got %s (%s)", group.String(), rType.String(), input) + } + if matches[1] != expected[0] { + t.Errorf("expected prefix %s, got %s (%s)", expected[0], matches[1], input) + } + if uri != expected[1] { + t.Errorf("expected uri %s, got %s (%s)", expected[1], uri, input) + } + } + }) + } +} + +func TestNpm_GenericRegistryPat(t *testing.T) { + // TODO: Support localhost and other names? + cases := map[string]string{ + // .npmrc + "registry = https://npm.company.de:4873/": "https://npm.company.de:4873", + "registry=https://registry.npm.taobao.org/": "https://registry.npm.taobao.org", + `"registry" "https://registry.npmmirror.com/"`: "https://registry.npmmirror.com", + `@company:registry="https://npm.company.io"`: "https://npm.company.io", + "@marketwall:registry=http://10.0.0.13:4873": "http://10.0.0.13:4873", + `"@fortawesome:registry" "https://npm.fontawesome.com/"`: "https://npm.fontawesome.com", + "@example=https://api.bintray.example/npm/mycompany/myregistry": "https://api.bintray.example/npm/mycompany/myregistry", + `"@example" "https://api.bintray.example/npm/mycompany/myregistry"`: "https://api.bintray.example/npm/mycompany/myregistry", + "//npm.company.com/:_authToken='fake123'": "//npm.company.com", + "//registry-node.company.com/org/1123600651823311/registry/supermap/:_password=123fake": "//registry-node.company.com/org/1123600651823311/registry/supermap", + `"//npm.fontawesome.com/:_authToken" "XXXXXXX-my-token"`: "//npm.fontawesome.com", + `registry=http://55825a54e4454.registry.net:8443/`: "http://55825a54e4454.registry.net:8443", + // yarnrc.yml + `npmScopes: + "my-company": + npmAlwaysAuth: true + npmAuthToken: xxx-xxx + npmRegistryServer: "https://repo.company.org/npm"`: "https://repo.company.org/npm", + ` await fixture.exec("yarn config set npmRegistryServer http://npm.corp.xyz:8080");`: "http://npm.corp.xyz:8080", + `yarn config set npmScopes --json '{ "storybook": { "npmRegistryServer": "http://repo.company.org:6001/" } }'`: "http://repo.company.org:6001", + `yarn config set npmScopes.my-org.npmRegistryServer "https://repo.company.org/npm/nested"`: "https://repo.company.org/npm/nested", + ` npmScopes: + company: + npmRegistryServer: '${METAMASK_NPM_REGISTRY:-https://your.company.com/private/registry}'`: "https://your.company.com/private/registry", + // upmconfig.toml + `[npmAuth."https://api.bintray.com/npm/joe-company/my-registry"]`: "https://api.bintray.com/npm/joe-company/my-registry", + `echo "[npmAuth.'https://your.company.com/private/registry/']" >> ~/.upmconfig.toml`: "https://your.company.com/private/registry", + } + for input, expected := range cases { + if knownRegistryPat.MatchString(input) { + t.Errorf("matches |knownRegistryPat|: %s", input) + continue + } + + matches := genericRegistryPat.FindStringSubmatch(input) + if len(matches) == 0 { + t.Errorf("received no matches for '%s'\n", input) + continue + } + + _, match := firstNonEmptyMatch(matches, 1) + if match != expected { + t.Errorf("expected '%s', got '%s'\n\t(%s)", expected, match, input) + } + } +} + +func TestNpm_FindTokenRegistry(t *testing.T) { + cases := map[string]struct { + data string + token string + expected *registryInfo + }{ + ".npmrc / _auth / top-level / no registry": { + data: "_auth = \"cGFzc3dvcmQ=\"\nemail = john.doe@example.com", + token: "cGFzc3dvcmQ=", + expected: nil, + }, + // TODO: Associate top-level auth with top-level registry. + //".npmrc / _auth / top-level / registry": { + // input: "_auth = \"cGFzc3dvcmQ=\"\nalways-auth = true\nregistry=https://nexus.company.com/repository/npm-group/", + // token: "cGFzc3dvcmQ=", + // expected: ®istryInfo{ + // RegistryType: nexusRepo3, + // Scheme: httpsScheme, + // Uri: "nexus.company.com/repository/npm-group", + // }, + //}, + ".npmrc / _auth / scoped / registry": { + data: "\"//artifactory.company.com/artifactory/api/npm/npm/:_auth\"=cGFzc3dvcmQ=\n", + token: "cGFzc3dvcmQ=", + expected: ®istryInfo{ + RegistryType: artifactoryHosted, + Scheme: unknown, + Uri: "artifactory.company.com/artifactory/api/npm/npm", + }, + }, + + ".npmrc / _authToken / registry": { + data: `"//artifactory.company.com/artifactory/api/npm/npm/:_authToken" "=cGFzc3dvcmQ="`, + token: "cGFzc3dvcmQ=", + expected: ®istryInfo{ + RegistryType: artifactoryHosted, + Scheme: unknown, + Uri: "artifactory.company.com/artifactory/api/npm/npm", + }, + }, + "cli / _authToken / registry": { + data: "npm config set @company:registry=https://npm.pkg.github.com/\nnpm config set //npm.pkg.github.com/:_authToken=ghp_sS3gaQUHaXSdwojeksTlaIAgJ7jWsn4D7gPO\n", + token: "ghp_sS3gaQUHaXSdwojeksTlaIAgJ7jWsn4D7gPO", + expected: ®istryInfo{ + RegistryType: github, + Scheme: isHttps, + Uri: "npm.pkg.github.com", + }, + }, + "cli / _authToken / multiple registries": { + data: "npm config set @other:registry=https://npm.pkg.github.com/\nnpm config set //npm.pkg.github.com/:_authToken=ghp_sS3gaQUHaXSdwojeksTlaIAgJ7jWsn4D7gPO\nnpm config set \"@fortawesome:registry\" https://npm.fontawesome.com/\nnpm config set \"//npm.fontawesome.com/:_authToken\" cGFzc3dvcmQ=", + token: "cGFzc3dvcmQ=", + expected: ®istryInfo{ + RegistryType: other, + Scheme: isHttps, + Uri: "npm.fontawesome.com", + }, + }, + } + + for name, test := range cases { + t.Run(name, func(t *testing.T) { + actual := findTokenRegistry(test.data, test.token) + + ignoreOpts := cmpopts.IgnoreFields(registryInfo{}) + if diff := cmp.Diff(test.expected, actual, ignoreOpts); diff != "" { + t.Errorf("diff: (-expected +actual)\n%s", diff) + } + }) + } +} + +type registryTestCase struct { + input string + expected *registryInfo +} + +func TestNpm_FindAllRegistryURLs_Known(t *testing.T) { + cases := map[string]registryTestCase{ + "npm - default": { + input: `NpmToken.35ea93c4-8c57-4a7c-8526-115b9eeeab8a`, + expected: ®istryInfo{ + RegistryType: npm, + Scheme: isHttps, + Uri: "registry.npmjs.org", + }, + }, + "npm": { + input: "//registry.npmjs.org/:_authToken=cGFzc3dvcmQ=", + expected: ®istryInfo{ + RegistryType: npm, + Scheme: isHttps, + Uri: "registry.npmjs.org", + }, + }, + "artifactoryHosted": { + input: `//repo.company.com/artifactory/api/npm/npm-repo/:_password=cGFzc3dvcmQ=`, + expected: ®istryInfo{ + RegistryType: artifactoryHosted, + Uri: "repo.company.com/artifactory/api/npm/npm-repo", + }, + }, + "artifactoryCloud": { + input: `//company.jfrog.io/company/api/npm/npm/:_authToken=cGFzc3dvcmQ=`, + expected: ®istryInfo{ + RegistryType: artifactoryCloud, + Scheme: isHttps, + Uri: "company.jfrog.io/company/api/npm/npm", + }, + }, + "nexusRepo2 - repository": { + input: `//nexus.company.org:8081/nexus/content/repositories/npm`, + expected: ®istryInfo{ + RegistryType: nexusRepo2, + Uri: "nexus.company.org:8081/nexus/content/repositories/npm", + }, + }, + "nexusRepo2 - group": { + input: `//nexus.company.org:8081/nexus/content/groups/npm`, + expected: ®istryInfo{ + RegistryType: nexusRepo2, + Uri: "nexus.company.org:8081/nexus/content/groups/npm", + }, + }, + "nexusRepo3": { + input: `//nexus.company.com/repository/npm-proxy`, + expected: ®istryInfo{ + RegistryType: nexusRepo3, + Uri: "nexus.company.com/repository/npm-proxy", + }, + }, + "gitlab - project": { + input: `//gitlab.matrix.org/api/v4/projects/27/packages/npm/`, + expected: ®istryInfo{ + RegistryType: gitlab, + Uri: "gitlab.matrix.org/api/v4/projects/27/packages/npm", + }, + }, + "gitlab - group": { + input: `//gitlab.com/api/v4/groups/1234/-/packages/npm/`, + expected: ®istryInfo{ + RegistryType: gitlab, + Uri: "gitlab.com/api/v4/groups/1234/-/packages/npm", + }, + }, + // This is apparently a thing? No idea, found it in the wild though. + "gitlab - top-level": { + input: `"//code.company.com/api/v4/packages/npm/:_authToken" "ZENNP-123456789"`, + expected: ®istryInfo{ + RegistryType: gitlab, + Uri: "code.company.com/api/v4/packages/npm", + }, + }, + "github": { + input: `//npm.pkg.github.com/`, + expected: ®istryInfo{ + RegistryType: github, + Scheme: isHttps, + Uri: "npm.pkg.github.com", + }, + }, + "azure - org": { + input: `//pkgs.dev.azure.com/company/_packaging/feed/npm/registry/`, + expected: ®istryInfo{ + RegistryType: azure, + Scheme: isHttps, + Uri: "pkgs.dev.azure.com/company/_packaging/feed/npm/registry", + }, + }, + "azure - repo": { + input: `//pkgs.dev.azure.com/company/project/_packaging/feed/npm/`, + expected: ®istryInfo{ + RegistryType: azure, + Scheme: isHttps, + Uri: "pkgs.dev.azure.com/company/project/_packaging/feed/npm/registry", + }, + }, + "azure - visualstudio": { + input: `//company.pkgs.visualstudio.com/05337347-30ac-46d4-b46f-5f5cb80c6818/_packaging/feed/npm/registry/`, + expected: ®istryInfo{ + RegistryType: azure, + Scheme: isHttps, + Uri: "company.pkgs.visualstudio.com/05337347-30ac-46d4-b46f-5f5cb80c6818/_packaging/feed/npm/registry", + }, + }, + "google artifact registry": { + input: `@rbl:registry=https://us-central1-npm.pkg.dev/company/project/ +//us-central1-npm.pkg.dev/company/project/:_authToken="ya29.A0ARrdaM9VpQcc5egcSN7zzEGQLzvz5jZiXEkIDmnsV2RW3KBbhbq8qkRHMUcC6gxknE9LuDW3mt4Dz3teWYXfI-4WGr6_mTQqj60BhAg4sPA7wov7PM-E3QonNwTN9De41ARPJUyvfc8Mi2GVoYzle3MJ_8KNYo4" +//us-central1-npm.pkg.dev/company/project/:always-auth=true`, + expected: ®istryInfo{ + RegistryType: googleArtifactRegistry, + Scheme: isHttps, + Uri: "us-central1-npm.pkg.dev/company/project", + }, + }, + "jetbrains": { + input: `//npm.pkg.jetbrains.space/company/p/project/repo/`, + expected: ®istryInfo{ + RegistryType: jetbrains, + Scheme: isHttps, + Uri: "npm.pkg.jetbrains.space/company/p/project/repo", + }, + }, + "gemfury": { + input: `//npm.fury.io/user/`, + expected: ®istryInfo{ + RegistryType: gemfury, + Scheme: isHttps, + Uri: "npm.fury.io/user", + }, + }, + } + + for name, tCase := range cases { + expected := *tCase.expected + + schemes := [...]scheme{unknown, isHttp, isHttps} + for _, scheme := range schemes { + var ( + expected = expected + uri = expected.Uri + input string + ) + + if expected.Scheme == unknown { + expected.Scheme = scheme + } + + if scheme == unknown { + input = tCase.input + } else if scheme == isHttp { + input = fmt.Sprintf("registry=http://%s/\n%s", uri, tCase.input) + } else { + input = fmt.Sprintf("registry=https://%s/\n%s", uri, tCase.input) + } + + t.Run(fmt.Sprintf("%s - %s", name, scheme.String()), func(t *testing.T) { + urls := findAllRegistryURLs(input) + if len(urls) != 1 { + t.Errorf("expected 1 result, got %d", len(urls)) + return + } + + var actual registryInfo + for _, i := range urls { + actual = *i + } + + if diff := cmp.Diff(expected, actual); diff != "" { + t.Errorf("diff: (-expected +actual)\n%s", diff) + } + }) + } + } +} + +func TestNpm_FindAllRegistryURLs_Unknown(t *testing.T) { + cases := map[string]registryTestCase{ + "nothing - default": { + input: `NpmToken.35ea93c4-8c57-4a7c-8526-115b9eeeab8a`, + expected: defaultRegistryInfo, + }, + "package.json - publishConfig": { + input: `"\"publishConfig\": {\n \"registry\": \"http://repository.dsv.myhost/npmjs\"\n },`, + expected: ®istryInfo{ + RegistryType: other, + Scheme: isHttp, + Uri: "repository.dsv.myhost/npmjs", + }, + }, + "cli - publish registry flag": { + input: `//npm publish --registry http://ec2-18-223-132-112.us-east-2.compute.amazonaws.com:8081/npm/`, + expected: ®istryInfo{ + RegistryType: other, + Scheme: isHttp, + Uri: "ec2-18-223-132-112.us-east-2.compute.amazonaws.com:8081/npm", + }, + }, + "cli - publish scoped registry flag": { + input: `//npm publish --@myscope:registry=http://internal.company.com/packages/npmjs-registry/`, + expected: ®istryInfo{ + RegistryType: other, + Scheme: isHttp, + Uri: "internal.company.com/packages/npmjs-registry", + }, + }, + "cli - config registry": { + input: `npm config set registry "https://npm.company.com/"`, + expected: ®istryInfo{ + RegistryType: other, + Scheme: isHttps, + Uri: "npm.company.com", + }, + }, + "cli - config scope registry": { + input: `npm config set "@company:registry" "https://npm.company.com/"`, + expected: ®istryInfo{ + RegistryType: other, + Scheme: isHttps, + Uri: "npm.company.com", + }, + }, + "cli - config authToken": { + input: `npm config set "//npm.company.com/:_authToken" token123`, + expected: ®istryInfo{ + RegistryType: other, + Scheme: unknown, + Uri: "npm.company.com", + }, + }, + ".npmrc - registry": { + input: `"registry=https://npm.company.com/`, + expected: ®istryInfo{ + RegistryType: other, + Scheme: isHttps, + Uri: "npm.company.com", + }, + }, + ".npmrc - scope registry": { + input: `@company:registry = https://repo.company.com:8443/`, + expected: ®istryInfo{ + RegistryType: other, + Scheme: isHttps, + Uri: "repo.company.com:8443", + }, + }, + ".npmrc - scope registry, no equals": { + input: `"@company:registry" "https://artifacts.company.com/npm/"`, + expected: ®istryInfo{ + RegistryType: other, + Scheme: isHttps, + Uri: "artifacts.company.com/npm", + }, + }, + ".npmrc - scope": { + input: `@company = "https://repo.company.com/"`, + expected: ®istryInfo{ + RegistryType: other, + Scheme: isHttps, + Uri: "repo.company.com", + }, + }, + ".npmrc - _auth": { + input: `"//npm.company.com/:_auth" = "cGFzc3dvcmQ="`, + expected: ®istryInfo{ + RegistryType: other, + Scheme: unknown, + Uri: "npm.company.com", + }, + }, + ".npmrc - _auth with https context": { + input: `"//npm.company.com/:_auth" = "cGFzc3dvcmQ=" +registry=https://npm.company.com/`, + expected: ®istryInfo{ + RegistryType: other, + Scheme: isHttps, + Uri: "npm.company.com", + }, + }, + ".npmrc - _auth with http context": { + input: `"//npm.company.com/:_auth" = "cGFzc3dvcmQ=" +registry=http://npm.company.com/`, + expected: ®istryInfo{ + RegistryType: other, + Scheme: isHttp, + Uri: "npm.company.com", + }, + }, + ".npmrc - _password": { + input: `//npm.company.com/:_password=cGFzc3dvcmQ=`, + expected: ®istryInfo{ + RegistryType: other, + Scheme: unknown, + Uri: "npm.company.com", + }, + }, + // https://docs.unity3d.com/Manual/upm-config-scoped.html + ".upmconfig.toml": { + input: `[npmAuth."https://api.bintray.example/npm/mycompany/myregistry"]`, + expected: ®istryInfo{ + RegistryType: other, + Scheme: isHttps, + Uri: "api.bintray.example/npm/mycompany/myregistry", + }, + }, + // TODO: https://github.com/renovatebot/renovate/blob/075a96c00aa53ede32576e924fe81b040789fc14/docs/usage/getting-started/private-packages.md + //"renovatebot": { + // input: ` matchHost: 'https://packages.my-company.com/myregistry/',`, + // expected: ®istryInfo{ + // RegistryType: other, + // Scheme: isHttps, + // Uri: "packages.my-company.com/myregistry", + // }, + //}, + } + + for name, tCase := range cases { + t.Run(name, func(t *testing.T) { + urls := findAllRegistryURLs(tCase.input) + if len(urls) != 1 { + t.Errorf("expected 1 result for %s, got %d (%v)", tCase.input, len(urls), urls) + } + + var actualInfo *registryInfo + for _, i := range urls { + actualInfo = i + } + + if diff := cmp.Diff(tCase.expected, actualInfo); diff != "" { + t.Errorf("diff: (-expected +actual)\n%s", diff) + } + }) + } +} + +func TestNpm_ParseKnownRegistryUri(t *testing.T) { + cases := map[registryType]struct { + data string + uri string + expected *registryInfo + }{ + other: { + data: `//npm.fontawesome.com/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d`, + uri: "npm.fontawesome.com", + expected: nil, + }, + npm: { + data: `//registry.npmjs.org/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d`, + uri: "//registry.npmjs.org", + expected: ®istryInfo{ + Scheme: isHttps, + Uri: "registry.npmjs.org", + }, + }, + artifactoryCloud: { + data: `//company.jfrog.io/company/api/npm/npm/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d`, + uri: "//company.jfrog.io/company/api/npm/npm", + expected: ®istryInfo{ + Scheme: isHttps, + Uri: "company.jfrog.io/company/api/npm/npm", + }, + }, + artifactoryHosted: { + data: "registry=http://artifactory.internal-dev.company.net/artifactory/api/npm/npm/\n//artifactory.internal-dev.company.net/artifactory/api/npm/npm/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d", + uri: "//artifactory.internal-dev.company.net/artifactory/api/npm/npm", + expected: ®istryInfo{ + Scheme: isHttp, + Uri: "artifactory.internal-dev.company.net/artifactory/api/npm/npm", + }, + }, + nexusRepo2: { + data: "registry=http://nexus.corp.org/nexus/content/repositories/npm-group/\n//nexus.corp.org/nexus/content/repositories/npm-group/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d", + uri: "//nexus.corp.org/nexus/content/repositories/npm-group", + expected: ®istryInfo{ + Scheme: isHttp, + Uri: "nexus.corp.org/nexus/content/repositories/npm-group", + }, + }, + nexusRepo3: { + data: "registry=https://nexus.corp.org/repository/npm-hosted/\n//nexus.corp.org/repository/npm-hosted/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d", + uri: "//nexus.corp.org/repository/npm-hosted", + expected: ®istryInfo{ + Scheme: isHttps, + Uri: "nexus.corp.org/repository/npm-hosted", + }, + }, + gitlab: { + data: "@company:registry=https://gitlab.com/api/v4/projects/12354452/packages/npm/\n//gitlab.com/api/v4/projects/12354452/packages/npm/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d", + uri: "//gitlab.com/api/v4/projects/12354452/packages/npm", + expected: ®istryInfo{ + Scheme: isHttps, + Uri: "gitlab.com/api/v4/projects/12354452/packages/npm", + }, + }, + github: { + data: `//npm.pkg.github.com/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d`, + uri: "//npm.pkg.github.com", + expected: ®istryInfo{ + Scheme: isHttps, + Uri: "npm.pkg.github.com", + }, + }, + azure: { + data: `//pkgs.dev.azure.com/company/project/_packaging/feed/npm/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d`, + uri: "//pkgs.dev.azure.com/company/project/_packaging/feed/npm", + expected: ®istryInfo{ + Scheme: isHttps, + Uri: "pkgs.dev.azure.com/company/project/_packaging/feed/npm/registry", + }, + }, + jetbrains: { + data: `//npm.pkg.jetbrains.space/public/p/jetbrains-gamedev/jetbrains-gamedev/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d`, + uri: "//npm.pkg.jetbrains.space/public/p/jetbrains-gamedev/jetbrains-gamedev", + expected: ®istryInfo{ + Scheme: isHttps, + Uri: "npm.pkg.jetbrains.space/public/p/jetbrains-gamedev/jetbrains-gamedev", + }, + }, + googleArtifactRegistry: { + data: `//us-east1-npm.pkg.dev/company-dev-167118/project/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d`, + uri: "//us-east1-npm.pkg.dev/company-dev-167118/project", + expected: ®istryInfo{ + Scheme: isHttps, + Uri: "us-east1-npm.pkg.dev/company-dev-167118/project", + }, + }, + gemfury: { + data: `//npm-proxy.fury.io/user/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d`, + uri: "//npm-proxy.fury.io/user", + expected: ®istryInfo{ + Scheme: isHttps, + Uri: "npm-proxy.fury.io/user", + }, + }, + } + + for group, c := range cases { + t.Run(group.String(), func(t *testing.T) { + actual := parseKnownRegistryURI(c.data, c.uri) + if actual == nil { + if c.expected != nil { + t.Errorf("no result for %s", c.data) + } + return + } + + c.expected.RegistryType = group + if diff := cmp.Diff(c.expected, actual); diff != "" { + t.Errorf("diff: (-expected +actual)\n%s", diff) + } + }) + } +} + +func TestNpm_ParseUnknownRegistryUri(t *testing.T) { + // Not exhaustive, parseUnknownRegistryURI doesn't do much. + cases := []struct { + data string + uri string + expected *registryInfo + }{ + { + data: `//npm.fontawesome.com/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d`, + uri: "npm.fontawesome.com", + expected: ®istryInfo{ + RegistryType: other, + Scheme: unknown, + Uri: "npm.fontawesome.com", + }, + }, + { + data: "@fortawesome:registry=https://npm.fontawesome.com\n//npm.fontawesome.com/:_authToken=e7da2cb5-b625-4aa1-8baf-291a8dfd037d", + uri: "npm.fontawesome.com", + expected: ®istryInfo{ + RegistryType: other, + Scheme: isHttps, + Uri: "npm.fontawesome.com", + }, + }, + } + + for _, c := range cases { + actual := parseUnknownRegistryURI(c.data, c.uri) + if actual == nil { + t.Errorf("no result for %s", c.data) + continue + } + + if diff := cmp.Diff(c.expected, actual); diff != "" { + t.Errorf("diff: (-expected +actual)\n%s", diff) + } + } +} + +func TestNpm_ParseRegistryURLScheme(t *testing.T) { + cases := []struct { + data string + uri string + expectedScheme scheme + expectedUri string + }{ + { + data: `registry=HTTPS://NPM.EXAMPLE.COM`, + uri: "HTTPS://NPM.EXAMPLE.COM", + expectedScheme: isHttps, + expectedUri: "NPM.EXAMPLE.COM", + }, + { + data: `registry=http://npm.example.com/`, + uri: "http://npm.example.com", + expectedScheme: isHttp, + expectedUri: "npm.example.com", + }, + { + data: `//repo.example.com/project/npm/:_authToken=abc123`, + uri: "repo.example.com/project/npm", + expectedScheme: unknown, + expectedUri: "repo.example.com/project/npm", + }, + { + data: `repo.example.com/project/npm`, + uri: "repo.example.com/project/npm", + expectedScheme: unknown, + expectedUri: "repo.example.com/project/npm", + }, + { + data: "registry=httpS://repo.example.com/project/npm\n//repo.example.com/project/npm/:_authToken=abc123", + uri: "repo.example.com/project/npm", + expectedScheme: isHttps, + expectedUri: "repo.example.com/project/npm", + }, + { + data: "registry=htTp://repo.example.com/project/npm\n//repo.example.com/project/npm/:_authToken=abc123", + uri: "repo.example.com/project/npm", + expectedScheme: isHttp, + expectedUri: "repo.example.com/project/npm", + }, + } + + for _, c := range cases { + actualScheme, actualUri := parseRegistryURLScheme(c.data, c.uri) + if actualScheme != c.expectedScheme { + t.Errorf("scheme: expected=%s, actual=%s", c.expectedScheme, actualScheme) + } + if actualUri != c.expectedUri { + t.Errorf("uri: expected=%s, actual=%s", c.expectedUri, actualUri) + } + } +} diff --git a/pkg/detectors/npmtoken/npmtoken.go b/pkg/detectors/npmtoken/npmtoken.go deleted file mode 100644 index 2a624045f55d..000000000000 --- a/pkg/detectors/npmtoken/npmtoken.go +++ /dev/null @@ -1,82 +0,0 @@ -package npmtoken - -import ( - "context" - "fmt" - "net/http" - "regexp" - "strings" - - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" -) - -type Scanner struct{} - -// Ensure the Scanner satisfies the interfaces at compile time. -var _ detectors.Detector = (*Scanner)(nil) -var _ detectors.Versioner = (*Scanner)(nil) - -func (s Scanner) Version() int { return 1 } - -var ( - client = common.SaneHttpClient() - - // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. - keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"npm"}) + `\b([0-9Aa-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\b`) -) - -// Keywords are used for efficiently pre-filtering chunks. -// Use identifiers in the secret preferably, or the provider name. -func (s Scanner) Keywords() []string { - return []string{"npm"} -} - -// FromData will find and optionally verify NpmToken secrets in a given set of bytes. -func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { - dataStr := string(data) - matches := keyPat.FindAllStringSubmatch(dataStr, -1) - for _, match := range matches { - if len(match) != 2 { - continue - } - resMatch := strings.TrimSpace(match[1]) - - s1 := detectors.Result{ - DetectorType: detectorspb.DetectorType_NpmToken, - Raw: []byte(resMatch), - } - s1.ExtraData = map[string]string{ - "rotation_guide": "https://howtorotate.com/docs/tutorials/npm/", - } - - if verify { - req, err := http.NewRequestWithContext(ctx, "GET", "https://registry.npmjs.org/-/whoami", nil) - if err != nil { - continue - } - req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", resMatch)) - res, err := client.Do(req) - if err == nil { - defer res.Body.Close() - if res.StatusCode >= 200 && res.StatusCode < 300 { - s1.Verified = true - } else { - // This function will check false positives for common test words, but also it will make sure the key appears 'random' enough to be a real key. - if detectors.IsKnownFalsePositive(resMatch, detectors.DefaultFalsePositives, true) { - continue - } - } - } - } - - results = append(results, s1) - } - - return -} - -func (s Scanner) Type() detectorspb.DetectorType { - return detectorspb.DetectorType_NpmToken -} diff --git a/pkg/detectors/npmtoken/npmtoken_test.go b/pkg/detectors/npmtoken/npmtoken_test.go deleted file mode 100644 index a2d2b2357366..000000000000 --- a/pkg/detectors/npmtoken/npmtoken_test.go +++ /dev/null @@ -1,120 +0,0 @@ -//go:build detectors -// +build detectors - -package npmtoken - -import ( - "context" - "fmt" - "testing" - "time" - - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" -) - -func TestNpmToken_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("NPMTOKEN") - inactiveSecret := testSecrets.MustGetField("NPMTOKEN_INACTIVE") - - type args struct { - ctx context.Context - data []byte - verify bool - } - tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool - }{ - { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a npmtoken secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_NpmToken, - Verified: true, - }, - }, - wantErr: false, - }, - { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a npmtoken secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_NpmToken, - Verified: false, - }, - }, - wantErr: false, - }, - { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("NpmToken.FromData() error = %v, wantErr %v", err, tt.wantErr) - return - } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil - } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("NpmToken.FromData() %s diff: (-got +want)\n%s", tt.name, diff) - } - }) - } -} - -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) - } - } - }) - } -} diff --git a/pkg/detectors/npmtokenv2/npmtokenv2.go b/pkg/detectors/npmtokenv2/npmtokenv2.go deleted file mode 100644 index 56b2ef036e2a..000000000000 --- a/pkg/detectors/npmtokenv2/npmtokenv2.go +++ /dev/null @@ -1,83 +0,0 @@ -package npmtokenv2 - -import ( - "context" - "fmt" - "net/http" - "regexp" - - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" -) - -type Scanner struct{} - -// Ensure the Scanner satisfies the interfaces at compile time. -var _ detectors.Detector = (*Scanner)(nil) -var _ detectors.Versioner = (*Scanner)(nil) - -func (s Scanner) Version() int { return 2 } - -var ( - client = common.SaneHttpClient() - - // Make sure that your group is surrounded in boundary characters such as below to reduce false positives. - keyPat = regexp.MustCompile(`(npm_[0-9a-zA-Z]{36})`) -) - -// Keywords are used for efficiently pre-filtering chunks. -// Use identifiers in the secret preferably, or the provider name. -func (s Scanner) Keywords() []string { - return []string{"npm_"} -} - -// FromData will find and optionally verify NpmTokenV2 secrets in a given set of bytes. -func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) { - dataStr := string(data) - - matches := keyPat.FindAllStringSubmatch(dataStr, -1) - - for _, match := range matches { - if len(match) != 2 { - continue - } - resMatch := match[1] - - s1 := detectors.Result{ - DetectorType: detectorspb.DetectorType_NpmToken, - Raw: []byte(resMatch), - } - s1.ExtraData = map[string]string{ - "rotation_guide": "https://howtorotate.com/docs/tutorials/npm/", - } - - if verify { - req, err := http.NewRequestWithContext(ctx, "GET", "https://registry.npmjs.org/-/whoami", nil) - if err != nil { - continue - } - req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", resMatch)) - res, err := client.Do(req) - if err == nil { - defer res.Body.Close() - if res.StatusCode >= 200 && res.StatusCode < 300 { - s1.Verified = true - } else { - // This function will check false positives for common test words, but also it will make sure the key appears 'random' enough to be a real key. - if detectors.IsKnownFalsePositive(resMatch, detectors.DefaultFalsePositives, true) { - continue - } - } - } - } - - results = append(results, s1) - } - - return -} - -func (s Scanner) Type() detectorspb.DetectorType { - return detectorspb.DetectorType_NpmToken -} diff --git a/pkg/detectors/npmtokenv2/npmtokenv2_test.go b/pkg/detectors/npmtokenv2/npmtokenv2_test.go deleted file mode 100644 index 0a9d285c8d2e..000000000000 --- a/pkg/detectors/npmtokenv2/npmtokenv2_test.go +++ /dev/null @@ -1,120 +0,0 @@ -//go:build detectors -// +build detectors - -package npmtokenv2 - -import ( - "context" - "fmt" - "testing" - "time" - - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - - "github.com/trufflesecurity/trufflehog/v3/pkg/common" - "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" -) - -func TestNpmToken_New_FromChunk(t *testing.T) { - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) - defer cancel() - testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2") - if err != nil { - t.Fatalf("could not get test secrets from GCP: %s", err) - } - secret := testSecrets.MustGetField("NPMTOKEN_NEW") - inactiveSecret := testSecrets.MustGetField("NPMTOKEN_NEW_INACTIVE") - - type args struct { - ctx context.Context - data []byte - verify bool - } - tests := []struct { - name string - s Scanner - args args - want []detectors.Result - wantErr bool - }{ - { - name: "found, verified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a npmtoken_new secret %s within", secret)), - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_NpmToken, - Verified: true, - }, - }, - wantErr: false, - }, - { - name: "found, unverified", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte(fmt.Sprintf("You can find a npmtoken_new secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation - verify: true, - }, - want: []detectors.Result{ - { - DetectorType: detectorspb.DetectorType_NpmToken, - Verified: false, - }, - }, - wantErr: false, - }, - { - name: "not found", - s: Scanner{}, - args: args{ - ctx: context.Background(), - data: []byte("You cannot find the secret within"), - verify: true, - }, - want: nil, - wantErr: false, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - s := Scanner{} - got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data) - if (err != nil) != tt.wantErr { - t.Errorf("NpmToken_New.FromData() error = %v, wantErr %v", err, tt.wantErr) - return - } - for i := range got { - if len(got[i].Raw) == 0 { - t.Fatalf("no raw secret present: \n %+v", got[i]) - } - got[i].Raw = nil - } - if diff := pretty.Compare(got, tt.want); diff != "" { - t.Errorf("NpmToken_New.FromData() %s diff: (-got +want)\n%s", tt.name, diff) - } - }) - } -} - -func BenchmarkFromData(benchmark *testing.B) { - ctx := context.Background() - s := Scanner{} - for name, data := range detectors.MustGetBenchmarkData() { - benchmark.Run(name, func(b *testing.B) { - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := s.FromData(ctx, false, data) - if err != nil { - b.Fatal(err) - } - } - }) - } -} diff --git a/pkg/engine/ahocorasickcore.go b/pkg/engine/ahocorasick/ahocorasickcore.go similarity index 99% rename from pkg/engine/ahocorasickcore.go rename to pkg/engine/ahocorasick/ahocorasickcore.go index 19fda9d90f35..1dd35e29c994 100644 --- a/pkg/engine/ahocorasickcore.go +++ b/pkg/engine/ahocorasick/ahocorasickcore.go @@ -1,9 +1,10 @@ -package engine +package ahocorasick import ( "strings" ahocorasick "github.com/BobuSumisu/aho-corasick" + "github.com/trufflesecurity/trufflehog/v3/pkg/custom_detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" diff --git a/pkg/engine/ahocorasickcore_test.go b/pkg/engine/ahocorasick/ahocorasickcore_test.go similarity index 99% rename from pkg/engine/ahocorasickcore_test.go rename to pkg/engine/ahocorasick/ahocorasickcore_test.go index 3bf56a754de5..4e5229740385 100644 --- a/pkg/engine/ahocorasickcore_test.go +++ b/pkg/engine/ahocorasick/ahocorasickcore_test.go @@ -1,10 +1,11 @@ -package engine +package ahocorasick import ( "context" "testing" "github.com/stretchr/testify/assert" + "github.com/trufflesecurity/trufflehog/v3/pkg/custom_detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/custom_detectorspb" diff --git a/pkg/engine/defaults.go b/pkg/engine/defaults.go index aec2ef61dce3..d8f3a362357b 100644 --- a/pkg/engine/defaults.go +++ b/pkg/engine/defaults.go @@ -458,8 +458,7 @@ import ( "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/noticeable" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/notion" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/nozbeteams" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/npmtoken" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/npmtokenv2" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/npm" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/nugetapikey" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/numverify" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/nutritionix" @@ -1528,8 +1527,9 @@ func DefaultDetectors() []detectors.Detector { ngc.Scanner{}, gemini.Scanner{}, digitaloceanv2.Scanner{}, - npmtoken.Scanner{}, - npmtokenv2.Scanner{}, + npm.ScannerGeneric{}, + npm.ScannerV1{}, + npm.ScannerV2{}, sqlserver.Scanner{}, redis.Scanner{}, ftp.Scanner{}, diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go index a8113d637c07..a780d837c8b8 100644 --- a/pkg/engine/engine.go +++ b/pkg/engine/engine.go @@ -16,6 +16,7 @@ import ( "github.com/trufflesecurity/trufflehog/v3/pkg/context" "github.com/trufflesecurity/trufflehog/v3/pkg/decoders" "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" + "github.com/trufflesecurity/trufflehog/v3/pkg/engine/ahocorasick" "github.com/trufflesecurity/trufflehog/v3/pkg/giturl" "github.com/trufflesecurity/trufflehog/v3/pkg/output" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" @@ -64,7 +65,7 @@ type Engine struct { printAvgDetectorTime bool // ahoCorasickHandler manages the Aho-Corasick trie and related keyword lookups. - ahoCorasickCore *AhoCorasickCore + ahoCorasickCore *ahocorasick.AhoCorasickCore // Engine synchronization primitives. sourceManager *sources.SourceManager @@ -313,7 +314,7 @@ func (e *Engine) initialize(ctx context.Context, options ...Option) error { ctx.Logger().V(4).Info("engine initialized") ctx.Logger().V(4).Info("setting up aho-corasick core") - e.ahoCorasickCore = NewAhoCorasickCore(e.detectors) + e.ahoCorasickCore = ahocorasick.NewAhoCorasickCore(e.detectors) ctx.Logger().V(4).Info("set up aho-corasick core") return nil @@ -458,7 +459,7 @@ func (e *Engine) detectorWorker(ctx context.Context) { // Reuse the same map to avoid allocations. const avgDetectorsPerChunk = 2 - chunkSpecificDetectors := make(map[DetectorKey]detectors.Detector, avgDetectorsPerChunk) + chunkSpecificDetectors := make(map[ahocorasick.DetectorKey]detectors.Detector, avgDetectorsPerChunk) for originalChunk := range e.ChunksChan() { for chunk := range sources.Chunker(originalChunk) { atomic.AddUint64(&e.metrics.BytesScanned, uint64(len(chunk.Data))) @@ -576,11 +577,11 @@ func (e *Engine) notifyResults(ctx context.Context) { // want to include duplicate results with the same decoder type. // Duplicate results with the same decoder type SHOULD have their own entry in the // results list, this would happen if the same secret is found multiple times. - key := fmt.Sprintf("%s%s%s%+v", r.DetectorType.String(), r.Raw, r.RawV2, r.SourceMetadata) - if val, ok := e.dedupeCache.Get(key); ok { - if res, ok := val.(detectorspb.DecoderType); ok && res != r.DecoderType { - continue - } + key := fmt.Sprintf("%s%s%s", r.DetectorType.String(), r.Raw, r.RawV2) + if _, ok := e.dedupeCache.Get(key); ok { + //if res, ok := val.(detectorspb.DecoderType); ok && res != r.DecoderType { + continue + //} } e.dedupeCache.Add(key, r.DecoderType)