Procházet zdrojové kódy

12851 replace bleach with nh3 (#14767)

* 12851 replace bleach with nh3

* Move tags & attributes lists to constants.py

---------

Co-authored-by: Jeremy Stretch <jstretch@netboxlabs.com>
Arthur Hanson před 2 roky
rodič
revize
8254e707b6
4 změnil soubory, kde provedl 35 přidání a 29 odebrání
  1. 4 4
      base_requirements.txt
  2. 24 0
      netbox/utilities/constants.py
  3. 6 24
      netbox/utilities/utils.py
  4. 1 1
      requirements.txt

+ 4 - 4
base_requirements.txt

@@ -1,7 +1,3 @@
-# HTML sanitizer
-# https://github.com/mozilla/bleach/blob/main/CHANGES
-bleach
-
 # The Python web framework on which NetBox is built
 # The Python web framework on which NetBox is built
 # https://docs.djangoproject.com/en/stable/releases/
 # https://docs.djangoproject.com/en/stable/releases/
 Django<5.1
 Django<5.1
@@ -108,6 +104,10 @@ mkdocstrings[python-legacy]
 # https://github.com/netaddr/netaddr/blob/master/CHANGELOG
 # https://github.com/netaddr/netaddr/blob/master/CHANGELOG
 netaddr
 netaddr
 
 
+# Python bindings to the ammonia HTML sanitization library.
+# https://github.com/messense/nh3
+nh3
+
 # Fork of PIL (Python Imaging Library) for image processing
 # Fork of PIL (Python Imaging Library) for image processing
 # https://github.com/python-pillow/Pillow/blob/main/CHANGES.rst
 # https://github.com/python-pillow/Pillow/blob/main/CHANGES.rst
 Pillow
 Pillow

+ 24 - 0
netbox/utilities/constants.py

@@ -69,3 +69,27 @@ CSV_DELIMITERS = {
     'semicolon': ';',
     'semicolon': ';',
     'tab': '\t',
     'tab': '\t',
 }
 }
+
+
+#
+# HTML allowed tags & attributes
+#
+
+HTML_ALLOWED_TAGS = {
+    "a", "b", "blockquote", "br", "code", "dd", "del", "div", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5", "h6",
+    "hr", "i", "img", "li", "ol", "p", "pre", "strong", "table", "tbody", "td", "th", "thead", "tr", "ul"
+}
+
+HTML_ALLOWED_ATTRIBUTES = {
+    "a": {"href", "title"},
+    "div": {"class"},
+    "h1": {"id"},
+    "h2": {"id"},
+    "h3": {"id"},
+    "h4": {"id"},
+    "h5": {"id"},
+    "h6": {"id"},
+    "img": {"alt", "src", "title"},
+    "td": {"align"},
+    "th": {"align"},
+}

+ 6 - 24
netbox/utilities/utils.py

@@ -1,11 +1,11 @@
 import datetime
 import datetime
 import decimal
 import decimal
 import json
 import json
+import nh3
 import re
 import re
 from decimal import Decimal
 from decimal import Decimal
 from itertools import count, groupby
 from itertools import count, groupby
 
 
-import bleach
 from django.contrib.contenttypes.models import ContentType
 from django.contrib.contenttypes.models import ContentType
 from django.core import serializers
 from django.core import serializers
 from django.db.models import Count, ManyToOneRel, OuterRef, Subquery
 from django.db.models import Count, ManyToOneRel, OuterRef, Subquery
@@ -24,6 +24,7 @@ from netbox.config import get_config
 from netbox.plugins import PluginConfig
 from netbox.plugins import PluginConfig
 from urllib.parse import urlencode
 from urllib.parse import urlencode
 from utilities.constants import HTTP_REQUEST_META_SAFE_COPY
 from utilities.constants import HTTP_REQUEST_META_SAFE_COPY
+from .constants import HTML_ALLOWED_ATTRIBUTES, HTML_ALLOWED_TAGS
 
 
 
 
 def title(value):
 def title(value):
@@ -511,30 +512,11 @@ def clean_html(html, schemes):
     Sanitizes HTML based on a whitelist of allowed tags and attributes.
     Sanitizes HTML based on a whitelist of allowed tags and attributes.
     Also takes a list of allowed URI schemes.
     Also takes a list of allowed URI schemes.
     """
     """
-
-    ALLOWED_TAGS = {
-        "div", "pre", "code", "blockquote", "del",
-        "hr", "h1", "h2", "h3", "h4", "h5", "h6",
-        "ul", "ol", "li", "p", "br",
-        "strong", "em", "a", "b", "i", "img",
-        "table", "thead", "tbody", "tr", "th", "td",
-        "dl", "dt", "dd",
-    }
-
-    ALLOWED_ATTRIBUTES = {
-        "div": ['class'],
-        "h1": ["id"], "h2": ["id"], "h3": ["id"], "h4": ["id"], "h5": ["id"], "h6": ["id"],
-        "a": ["href", "title"],
-        "img": ["src", "title", "alt"],
-        "th": ["align"],
-        "td": ["align"],
-    }
-
-    return bleach.clean(
+    return nh3.clean(
         html,
         html,
-        tags=ALLOWED_TAGS,
-        attributes=ALLOWED_ATTRIBUTES,
-        protocols=schemes
+        tags=HTML_ALLOWED_TAGS,
+        attributes=HTML_ALLOWED_ATTRIBUTES,
+        url_schemes=set(schemes)
     )
     )
 
 
 
 

+ 1 - 1
requirements.txt

@@ -1,4 +1,3 @@
-bleach==6.1.0
 Django==5.0.1
 Django==5.0.1
 django-cors-headers==4.3.1
 django-cors-headers==4.3.1
 django-debug-toolbar==4.2.0
 django-debug-toolbar==4.2.0
@@ -24,6 +23,7 @@ Markdown==3.5.1
 mkdocs-material==9.5.3
 mkdocs-material==9.5.3
 mkdocstrings[python-legacy]==0.24.0
 mkdocstrings[python-legacy]==0.24.0
 netaddr==0.9.0
 netaddr==0.9.0
+nh3==0.2.15
 Pillow==10.1.0
 Pillow==10.1.0
 psycopg[binary,pool]==3.1.16
 psycopg[binary,pool]==3.1.16
 PyYAML==6.0.1
 PyYAML==6.0.1