huggingface.go 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. package rules
  2. import (
  3. "fmt"
  4. "github.com/zricethezav/gitleaks/v8/cmd/generate/config/utils"
  5. "github.com/zricethezav/gitleaks/v8/cmd/generate/secrets"
  6. "github.com/zricethezav/gitleaks/v8/config"
  7. )
  8. // Reference: https://huggingface.co/docs/hub/security-tokens
  9. //
  10. // Old tokens have the prefix `api_`, however, I am not sure it's worth detecting them as that would be high noise.
  11. // https://huggingface.co/docs/api-inference/quicktour
  12. func HuggingFaceAccessToken() *config.Rule {
  13. // define rule
  14. r := config.Rule{
  15. RuleID: "huggingface-access-token",
  16. Description: "Discovered a Hugging Face Access token, which could lead to unauthorized access to AI models and sensitive data.",
  17. Regex: utils.GenerateUniqueTokenRegex("hf_(?i:[a-z]{34})", false),
  18. Entropy: 2,
  19. Keywords: []string{
  20. "hf_",
  21. },
  22. }
  23. // validate
  24. tps := utils.GenerateSampleSecrets("huggingface", "hf_"+secrets.NewSecret("[a-zA-Z]{34}"))
  25. tps = append(tps,
  26. `huggingface-cli login --token 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`,
  27. `huggingface-cli login --token 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`,
  28. `huggingface-cli login --token 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`,
  29. `huggingface-cli login --token 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`,
  30. `huggingface-cli login --token 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`,
  31. `huggingface-cli login --token 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`,
  32. `huggingface-cli login --token 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`,
  33. `huggingface-cli login --token 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`,
  34. `-H "Authorization: Bearer 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"`,
  35. `DEV=1 HF_TOKEN=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 poetry run python app.py`,
  36. `use_auth_token='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')`,
  37. `CI_HUB_USER_TOKEN = "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"`,
  38. `- Change line 5 and add your Hugging Face token, that is, instead of 'hf_token = "ADD_YOUR_HUGGING_FACE_TOKEN_HERE"', you will need to change it to something like'hf_token = "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"'`,
  39. //TODO: ` " hf_token = \"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\"\n",`,
  40. `# Not critical, only usable on the sandboxed CI instance.
  41. TOKEN = "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"`,
  42. ` parser.add_argument("--hf_token", type=str, default='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', help="Hugging Face Access Token to access PyAnnote gated models")`,
  43. )
  44. fps := []string{
  45. `- (id)hf_requiredCharacteristicTypesForDisplayMetadata;`,
  46. `amazon.de#@#div[data-cel-widget="desktop-rhf_SponsoredProductsRemoteRHFSearchEXPSubsK2ClickPagination"]`,
  47. ` _kHMSymptomhf_generatedByHomeAppForDebuggingPurposesKey,`,
  48. ` #define OSCHF_DebugGetExpectedAverageCrystalAmplitude NOROM_OSCHF_DebugGetExpectedAverageCrystalAmplitude`,
  49. ` M_UINT (ServingCellPriorityParametersDescription_t, H_PRIO, 2, &hf_servingcellpriorityparametersdescription_h_prio),`,
  50. `+HWI-ST565_0092:4:1101:5508:5860#ACTTGA/1
  51. bb_eeeeegfgffhiiiiiiiiiiihiiiiicgafhf_eefghihhiiiifhifhhdhifhiiiihifdgdhggf\bbceceedbcd
  52. @HWI-ST565_0092:4:1101:7621:5770#ACTTGA/1`,
  53. `y{}x|~|}{~}}~|~}||�~|�{��|{}{|~z{}{{|{||{|}|{}{~|y}vjoePbUBJ7&;"; <; :;?!!;<7%$IACa_ecghbfbaebejhahfbhf_ddbficghbgfbhhcghdghfhigiifhhehhdggcgfchf_fgcei^[[.40&54"5666 6`,
  54. ` change_dir(cwd)
  55. subdirs = glob.glob('HF_CAASIMULIAComputeServicesBuildTime.HF*.Linux64')
  56. if len(subdirs) == 1:`,
  57. ` os.environ.get("HF_AUTH_TOKEN",
  58. "hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"),`,
  59. `# HuggingFace API Token https://huggingface.co/settings/tokens
  60. HUGGINGFACE_API_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,`,
  61. }
  62. return utils.Validate(r, tps, fps)
  63. }
  64. // Will be deprecated Aug 1st, 2023.
  65. func HuggingFaceOrganizationApiToken() *config.Rule {
  66. // define rule
  67. r := config.Rule{
  68. RuleID: "huggingface-organization-api-token",
  69. Description: "Uncovered a Hugging Face Organization API token, potentially compromising AI organization accounts and associated data.",
  70. Regex: utils.GenerateUniqueTokenRegex("api_org_(?i:[a-z]{34})", false),
  71. Entropy: 2,
  72. Keywords: []string{
  73. "api_org_",
  74. },
  75. }
  76. // validate
  77. tps := utils.GenerateSampleSecrets("huggingface", "api_org_"+secrets.NewSecret("[a-zA-Z]{34}"))
  78. tps = append(tps,
  79. `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`,
  80. "`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`",
  81. `\'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'\`,
  82. //TODO: `\"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\"`,
  83. //TODO: `,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,`,
  84. //TODO: `(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)`,
  85. //TODO: `<foo>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</foo>`,
  86. `def test_private_space(self):
  87. hf_token = "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" # Intentionally revealing this key for testing purposes
  88. io = gr.load(`,
  89. `hf_token = "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" # Intentionally revealing this key for testing purposes`,
  90. `"news_train_dataset = datasets.load_dataset('nlpHakdang/aihub-news30k', data_files = \"train_news_text.csv\", use_auth_token='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')\n",`,
  91. `os.environ['HUGGINGFACEHUB_API_TOKEN'] = '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'`,
  92. fmt.Sprintf("api_org_%s", secrets.NewSecret(`[a-zA-Z]{34}`)),
  93. )
  94. fps := []string{
  95. `public static final String API_ORG_EXIST = "APIOrganizationExist";`,
  96. `const api_org_controller = require('../../controllers/api/index').organizations;`,
  97. `API_ORG_CREATE("https://qyapi.weixin.qq.com/cgi-bin/department/create?access_token=ACCESS_TOKEN"),`,
  98. `def test_internal_api_org_inclusion_with_href(api_name, href, expected, monkeypatch, called_with):
  99. monkeypatch.setattr("requests.sessions.Session.request", called_with)`,
  100. ` def _api_org_96726c78_4ae3_402f_b08b_7a78c6903d2a(self, method, url, body, headers):
  101. body = self.fixtures.load("api_org_96726c78_4ae3_402f_b08b_7a78c6903d2a.xml")
  102. return httplib.OK, body, headers, httplib.responses[httplib.OK]`,
  103. `<p>You should see a token <code>hf_xxxxx</code> (old tokens are <code>api_XXXXXXXX</code> or <code>api_org_XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX</code>).</p>`,
  104. ` From Hugging Face docs:
  105. You should see a token hf_xxxxx (old tokens are api_XXXXXXXX or api_org_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx).
  106. If you do not submit your API token when sending requests to the API, you will not be able to run inference on your private models.`,
  107. }
  108. return utils.Validate(r, tps, fps)
  109. }