package rules
import (
"fmt"
"regexp"
"github.com/zricethezav/gitleaks/v8/cmd/generate/secrets"
"github.com/zricethezav/gitleaks/v8/config"
)
// Reference: https://huggingface.co/docs/hub/security-tokens
//
// Old tokens have the prefix `api_`, however, I am not sure it's worth detecting them as that would be high noise.
// https://huggingface.co/docs/api-inference/quicktour
func HuggingFaceAccessToken() *config.Rule {
// define rule
r := config.Rule{
RuleID: "huggingface-access-token",
Description: "Discovered a Hugging Face Access token, which could lead to unauthorized access to AI models and sensitive data.",
Regex: regexp.MustCompile(`(?:^|[\\'"` + "`" + ` >=:])(hf_[a-zA-Z]{34})(?:$|[\\'"` + "`" + ` <])`),
Entropy: 1,
Keywords: []string{
"hf_",
},
}
// validate
tps := []string{
`huggingface-cli login --token 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`,
`huggingface-cli login --token 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`,
`huggingface-cli login --token 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`,
`huggingface-cli login --token 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`,
`huggingface-cli login --token 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`,
`huggingface-cli login --token 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`,
`huggingface-cli login --token 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`,
`huggingface-cli login --token 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`,
`-H "Authorization: Bearer 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"`,
`DEV=1 HF_TOKEN=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 poetry run python app.py`,
`use_auth_token='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')`,
`CI_HUB_USER_TOKEN = "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"`,
`- Change line 5 and add your Hugging Face token, that is, instead of 'hf_token = "ADD_YOUR_HUGGING_FACE_TOKEN_HERE"', you will need to change it to something like'hf_token = "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"'`,
` " hf_token = \"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\"\n",`,
`# Not critical, only usable on the sandboxed CI instance.
TOKEN = "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"`,
` parser.add_argument("--hf_token", type=str, default='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', help="Hugging Face Access Token to access PyAnnote gated models")`,
}
fps := []string{
`- (id)hf_requiredCharacteristicTypesForDisplayMetadata;`,
`amazon.de#@#div[data-cel-widget="desktop-rhf_SponsoredProductsRemoteRHFSearchEXPSubsK2ClickPagination"]`,
` _kHMSymptomhf_generatedByHomeAppForDebuggingPurposesKey,`,
` #define OSCHF_DebugGetExpectedAverageCrystalAmplitude NOROM_OSCHF_DebugGetExpectedAverageCrystalAmplitude`,
` M_UINT (ServingCellPriorityParametersDescription_t, H_PRIO, 2, &hf_servingcellpriorityparametersdescription_h_prio),`,
`+HWI-ST565_0092:4:1101:5508:5860#ACTTGA/1
bb_eeeeegfgffhiiiiiiiiiiihiiiiicgafhf_eefghihhiiiifhifhhdhifhiiiihifdgdhggf\bbceceedbcd
@HWI-ST565_0092:4:1101:7621:5770#ACTTGA/1`,
`y{}x|~|}{~}}~|~}||�~|�{��|{}{|~z{}{{|{||{|}|{}{~|y}vjoePbUBJ7&;"; <; :;?!!;<7%$IACa_ecghbfbaebejhahfbhf_ddbficghbgfbhhcghdghfhigiifhhehhdggcgfchf_fgcei^[[.40&54"5666 6`,
` change_dir(cwd)
subdirs = glob.glob('HF_CAASIMULIAComputeServicesBuildTime.HF*.Linux64')
if len(subdirs) == 1:`,
` os.environ.get("HF_AUTH_TOKEN",
"hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"),`,
`# HuggingFace API Token https://huggingface.co/settings/tokens
HUGGINGFACE_API_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,`,
}
return validate(r, tps, fps)
}
// Will be deprecated Aug 1st, 2023.
func HuggingFaceOrganizationApiToken() *config.Rule {
// define rule
r := config.Rule{
RuleID: "huggingface-organization-api-token",
Description: "Uncovered a Hugging Face Organization API token, potentially compromising AI organization accounts and associated data.",
Regex: regexp.MustCompile(`(?:^|[\\'"` + "`" + ` >=:\(,)])(api_org_[a-zA-Z]{34})(?:$|[\\'"` + "`" + ` <\),])`),
Entropy: 2,
Keywords: []string{
"api_org_",
},
}
// validate
tps := []string{
`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`,
"`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`",
`\'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'\`,
`\"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\"`,
`,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,`,
`(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)`,
`
You should see a token hf_xxxxx (old tokens are api_XXXXXXXX or api_org_XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX).