Skip to content

Commit

Permalink
feat(rules): detect Hugging Face access tokens (#1204)
Browse files Browse the repository at this point in the history
  • Loading branch information
rgmz committed Aug 24, 2023
1 parent db4bc0f commit 9fb36b2
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 0 deletions.
2 changes: 2 additions & 0 deletions cmd/generate/config/main.go
Expand Up @@ -89,6 +89,8 @@ func main() {
configRules = append(configRules, rules.Hashicorp())
configRules = append(configRules, rules.Heroku())
configRules = append(configRules, rules.HubSpot())
configRules = append(configRules, rules.HuggingFaceAccessToken())
configRules = append(configRules, rules.HuggingFaceOrganizationApiToken())
configRules = append(configRules, rules.Intercom())
configRules = append(configRules, rules.JFrogAPIKey())
configRules = append(configRules, rules.JFrogIdentityToken())
Expand Down
115 changes: 115 additions & 0 deletions cmd/generate/config/rules/huggingface.go
@@ -0,0 +1,115 @@
package rules

import (
"fmt"
"regexp"

"github.com/zricethezav/gitleaks/v8/cmd/generate/secrets"
"github.com/zricethezav/gitleaks/v8/config"
)

// Reference: https://huggingface.co/docs/hub/security-tokens
//
// Old tokens have the prefix `api_`, however, I am not sure it's worth detecting them as that would be high noise.
// https://huggingface.co/docs/api-inference/quicktour
func HuggingFaceAccessToken() *config.Rule {
// define rule
r := config.Rule{
RuleID: "huggingface-access-token",
Description: "Hugging Face Access token",
Regex: regexp.MustCompile(`(?:^|[\\'"` + "`" + ` >=:])(hf_[a-zA-Z]{34})(?:$|[\\'"` + "`" + ` <])`),
SecretGroup: 1,
Entropy: 1,
Keywords: []string{
"hf_",
},
}

// validate
tps := []string{
`huggingface-cli login --token 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`,
`huggingface-cli login --token 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`,
`huggingface-cli login --token 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`,
`huggingface-cli login --token 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`,
`huggingface-cli login --token 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`,
`huggingface-cli login --token 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`,
`huggingface-cli login --token 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`,
`huggingface-cli login --token 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`,
`-H "Authorization: Bearer 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"`,
`DEV=1 HF_TOKEN=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 poetry run python app.py`,
`use_auth_token='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')`,
`CI_HUB_USER_TOKEN = "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"`,
`- Change line 5 and add your Hugging Face token, that is, instead of 'hf_token = "ADD_YOUR_HUGGING_FACE_TOKEN_HERE"', you will need to change it to something like'hf_token = "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"'`,
` " hf_token = \"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\"\n",`,
`# Not critical, only usable on the sandboxed CI instance.
TOKEN = "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"`,
` parser.add_argument("--hf_token", type=str, default='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', help="Hugging Face Access Token to access PyAnnote gated models")`,
}
fps := []string{
`- (id)hf_requiredCharacteristicTypesForDisplayMetadata;`,
`amazon.de#@#div[data-cel-widget="desktop-rhf_SponsoredProductsRemoteRHFSearchEXPSubsK2ClickPagination"]`,
` _kHMSymptomhf_generatedByHomeAppForDebuggingPurposesKey,`,
` #define OSCHF_DebugGetExpectedAverageCrystalAmplitude NOROM_OSCHF_DebugGetExpectedAverageCrystalAmplitude`,
` M_UINT (ServingCellPriorityParametersDescription_t, H_PRIO, 2, &hf_servingcellpriorityparametersdescription_h_prio),`,
`+HWI-ST565_0092:4:1101:5508:5860#ACTTGA/1
bb_eeeeegfgffhiiiiiiiiiiihiiiiicgafhf_eefghihhiiiifhifhhdhifhiiiihifdgdhggf\bbceceedbcd
@HWI-ST565_0092:4:1101:7621:5770#ACTTGA/1`,
`y{}x|~|}{~}}~|~}||�~|�{��|{}{|~z{}{{|{||{|}|{}{~|y}vjoePbUBJ7&;"; <; :;?!!;<7%$IACa_ecghbfbaebejhahfbhf_ddbficghbgfbhhcghdghfhigiifhhehhdggcgfchf_fgcei^[[.40&54"5666 6`,
` change_dir(cwd)
subdirs = glob.glob('HF_CAASIMULIAComputeServicesBuildTime.HF*.Linux64')
if len(subdirs) == 1:`,
` os.environ.get("HF_AUTH_TOKEN",
"hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"),`,
`# HuggingFace API Token https://huggingface.co/settings/tokens
HUGGINGFACE_API_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,`,
}
return validate(r, tps, fps)
}

// Will be deprecated Aug 1st, 2023.
func HuggingFaceOrganizationApiToken() *config.Rule {
// define rule
r := config.Rule{
RuleID: "huggingface-organization-api-token",
Description: "Hugging Face Organization API token",
Regex: regexp.MustCompile(`(?:^|[\\'"` + "`" + ` >=:\(,)])(api_org_[a-zA-Z]{34})(?:$|[\\'"` + "`" + ` <\),])`),
SecretGroup: 1,
Entropy: 2,
Keywords: []string{
"api_org_",
},
}

// validate
tps := []string{
`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`,
"`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`",
`\'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'\`,
`\"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\"`,
`,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,`,
`(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)`,
`<foo>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</foo>`,
`def test_private_space(self):
hf_token = "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" # Intentionally revealing this key for testing purposes
io = gr.load(`,
`hf_token = "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" # Intentionally revealing this key for testing purposes`,
`"news_train_dataset = datasets.load_dataset('nlpHakdang/aihub-news30k', data_files = \"train_news_text.csv\", use_auth_token='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')\n",`,
`os.environ['HUGGINGFACEHUB_API_TOKEN'] = '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'`,
fmt.Sprintf("api_org_%s", secrets.NewSecret(`[a-zA-Z]{34}`)),
}
fps := []string{
`public static final String API_ORG_EXIST = "APIOrganizationExist";`,
`const api_org_controller = require('../../controllers/api/index').organizations;`,
`API_ORG_CREATE("https://qyapi.weixin.qq.com/cgi-bin/department/create?access_token=ACCESS_TOKEN"),`,
`def test_internal_api_org_inclusion_with_href(api_name, href, expected, monkeypatch, called_with):
monkeypatch.setattr("requests.sessions.Session.request", called_with)`,
` def _api_org_96726c78_4ae3_402f_b08b_7a78c6903d2a(self, method, url, body, headers):
body = self.fixtures.load("api_org_96726c78_4ae3_402f_b08b_7a78c6903d2a.xml")
return httplib.OK, body, headers, httplib.responses[httplib.OK]`,
`<p>You should see a token <code>hf_xxxxx</code> (old tokens are <code>api_XXXXXXXX</code> or <code>api_org_XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX</code>).</p>`,
` From Hugging Face docs:
You should see a token hf_xxxxx (old tokens are api_XXXXXXXX or api_org_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx).
If you do not submit your API token when sending requests to the API, you will not be able to run inference on your private models.`,
}
return validate(r, tps, fps)
}
22 changes: 22 additions & 0 deletions config/gitleaks.toml
Expand Up @@ -2124,6 +2124,26 @@ keywords = [
"hubspot",
]

[[rules]]
id = "huggingface-access-token"
description = "Hugging Face Access token"
regex = '''(?:^|[\\'"` >=:])(hf_[a-zA-Z]{34})(?:$|[\\'"` <])'''
secretGroup = 1
entropy = 1
keywords = [
"hf_",
]

[[rules]]
id = "huggingface-organization-api-token"
description = "Hugging Face Organization API token"
regex = '''(?:^|[\\'"` >=:\(,)])(api_org_[a-zA-Z]{34})(?:$|[\\'"` <\),])'''
secretGroup = 1
entropy = 2
keywords = [
"api_org_",
]

[[rules]]
id = "intercom-api-key"
description = "Intercom API Token"
Expand Down Expand Up @@ -2414,6 +2434,7 @@ id = "plaid-client-id"
description = "Plaid Client ID"
regex = '''(?i)(?:plaid)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{24})(?:['|\"|\n|\r|\s|\x60|;]|$)'''
secretGroup = 1
entropy = 3.5
keywords = [
"plaid",
]
Expand All @@ -2423,6 +2444,7 @@ id = "plaid-secret-key"
description = "Plaid Secret key"
regex = '''(?i)(?:plaid)(?:[0-9a-z\-_\t .]{0,20})(?:[\s|']|[\s|"]){0,3}(?:=|>|:{1,3}=|\|\|:|<=|=>|:|\?=)(?:'|\"|\s|=|\x60){0,5}([a-z0-9]{30})(?:['|\"|\n|\r|\s|\x60|;]|$)'''
secretGroup = 1
entropy = 3.5
keywords = [
"plaid",
]
Expand Down

0 comments on commit 9fb36b2

Please sign in to comment.