huggingface.go 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. package rules
  2. import (
  3. "fmt"
  4. "github.com/zricethezav/gitleaks/v8/cmd/generate/config/utils"
  5. "regexp"
  6. "github.com/zricethezav/gitleaks/v8/cmd/generate/secrets"
  7. "github.com/zricethezav/gitleaks/v8/config"
  8. )
  9. // Reference: https://huggingface.co/docs/hub/security-tokens
  10. //
  11. // Old tokens have the prefix `api_`, however, I am not sure it's worth detecting them as that would be high noise.
  12. // https://huggingface.co/docs/api-inference/quicktour
  13. func HuggingFaceAccessToken() *config.Rule {
  14. // define rule
  15. r := config.Rule{
  16. RuleID: "huggingface-access-token",
  17. Description: "Discovered a Hugging Face Access token, which could lead to unauthorized access to AI models and sensitive data.",
  18. Regex: regexp.MustCompile(`(?:^|[\\'"` + "`" + ` >=:])(hf_[a-zA-Z]{34})(?:$|[\\'"` + "`" + ` <])`),
  19. Entropy: 2,
  20. Keywords: []string{
  21. "hf_",
  22. },
  23. }
  24. // validate
  25. tps := []string{
  26. `huggingface-cli login --token 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`,
  27. `huggingface-cli login --token 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`,
  28. `huggingface-cli login --token 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`,
  29. `huggingface-cli login --token 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`,
  30. `huggingface-cli login --token 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`,
  31. `huggingface-cli login --token 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`,
  32. `huggingface-cli login --token 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`,
  33. `huggingface-cli login --token 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`,
  34. `-H "Authorization: Bearer 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"`,
  35. `DEV=1 HF_TOKEN=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 poetry run python app.py`,
  36. `use_auth_token='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')`,
  37. `CI_HUB_USER_TOKEN = "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"`,
  38. `- Change line 5 and add your Hugging Face token, that is, instead of 'hf_token = "ADD_YOUR_HUGGING_FACE_TOKEN_HERE"', you will need to change it to something like'hf_token = "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"'`,
  39. ` " hf_token = \"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\"\n",`,
  40. `# Not critical, only usable on the sandboxed CI instance.
  41. TOKEN = "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"`,
  42. ` parser.add_argument("--hf_token", type=str, default='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', help="Hugging Face Access Token to access PyAnnote gated models")`,
  43. }
  44. fps := []string{
  45. `- (id)hf_requiredCharacteristicTypesForDisplayMetadata;`,
  46. `amazon.de#@#div[data-cel-widget="desktop-rhf_SponsoredProductsRemoteRHFSearchEXPSubsK2ClickPagination"]`,
  47. ` _kHMSymptomhf_generatedByHomeAppForDebuggingPurposesKey,`,
  48. ` #define OSCHF_DebugGetExpectedAverageCrystalAmplitude NOROM_OSCHF_DebugGetExpectedAverageCrystalAmplitude`,
  49. ` M_UINT (ServingCellPriorityParametersDescription_t, H_PRIO, 2, &hf_servingcellpriorityparametersdescription_h_prio),`,
  50. `+HWI-ST565_0092:4:1101:5508:5860#ACTTGA/1
  51. bb_eeeeegfgffhiiiiiiiiiiihiiiiicgafhf_eefghihhiiiifhifhhdhifhiiiihifdgdhggf\bbceceedbcd
  52. @HWI-ST565_0092:4:1101:7621:5770#ACTTGA/1`,
  53. `y{}x|~|}{~}}~|~}||�~|�{��|{}{|~z{}{{|{||{|}|{}{~|y}vjoePbUBJ7&;"; <; :;?!!;<7%$IACa_ecghbfbaebejhahfbhf_ddbficghbgfbhhcghdghfhigiifhhehhdggcgfchf_fgcei^[[.40&54"5666 6`,
  54. ` change_dir(cwd)
  55. subdirs = glob.glob('HF_CAASIMULIAComputeServicesBuildTime.HF*.Linux64')
  56. if len(subdirs) == 1:`,
  57. ` os.environ.get("HF_AUTH_TOKEN",
  58. "hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"),`,
  59. `# HuggingFace API Token https://huggingface.co/settings/tokens
  60. HUGGINGFACE_API_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,`,
  61. }
  62. return utils.Validate(r, tps, fps)
  63. }
  64. // Will be deprecated Aug 1st, 2023.
  65. func HuggingFaceOrganizationApiToken() *config.Rule {
  66. // define rule
  67. r := config.Rule{
  68. RuleID: "huggingface-organization-api-token",
  69. Description: "Uncovered a Hugging Face Organization API token, potentially compromising AI organization accounts and associated data.",
  70. Regex: regexp.MustCompile(`(?:^|[\\'"` + "`" + ` >=:\(,)])(api_org_[a-zA-Z]{34})(?:$|[\\'"` + "`" + ` <\),])`),
  71. Entropy: 2,
  72. Keywords: []string{
  73. "api_org_",
  74. },
  75. }
  76. // validate
  77. tps := []string{
  78. `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`,
  79. "`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`",
  80. `\'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'\`,
  81. `\"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\"`,
  82. `,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,`,
  83. `(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)`,
  84. `<foo>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</foo>`,
  85. `def test_private_space(self):
  86. hf_token = "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" # Intentionally revealing this key for testing purposes
  87. io = gr.load(`,
  88. `hf_token = "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" # Intentionally revealing this key for testing purposes`,
  89. `"news_train_dataset = datasets.load_dataset('nlpHakdang/aihub-news30k', data_files = \"train_news_text.csv\", use_auth_token='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')\n",`,
  90. `os.environ['HUGGINGFACEHUB_API_TOKEN'] = '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'`,
  91. fmt.Sprintf("api_org_%s", secrets.NewSecret(`[a-zA-Z]{34}`)),
  92. }
  93. fps := []string{
  94. `public static final String API_ORG_EXIST = "APIOrganizationExist";`,
  95. `const api_org_controller = require('../../controllers/api/index').organizations;`,
  96. `API_ORG_CREATE("https://qyapi.weixin.qq.com/cgi-bin/department/create?access_token=ACCESS_TOKEN"),`,
  97. `def test_internal_api_org_inclusion_with_href(api_name, href, expected, monkeypatch, called_with):
  98. monkeypatch.setattr("requests.sessions.Session.request", called_with)`,
  99. ` def _api_org_96726c78_4ae3_402f_b08b_7a78c6903d2a(self, method, url, body, headers):
  100. body = self.fixtures.load("api_org_96726c78_4ae3_402f_b08b_7a78c6903d2a.xml")
  101. return httplib.OK, body, headers, httplib.responses[httplib.OK]`,
  102. `<p>You should see a token <code>hf_xxxxx</code> (old tokens are <code>api_XXXXXXXX</code> or <code>api_org_XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX</code>).</p>`,
  103. ` From Hugging Face docs:
  104. You should see a token hf_xxxxx (old tokens are api_XXXXXXXX or api_org_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx).
  105. If you do not submit your API token when sending requests to the API, you will not be able to run inference on your private models.`,
  106. }
  107. return utils.Validate(r, tps, fps)
  108. }