فهرست منبع

Sanitize HTML after rendering markdown

kkthxbye-code 3 سال پیش
والد
کامیت
7c79c90cd2
4فایلهای تغییر یافته به همراه42 افزوده شده و 13 حذف شده
  1. 4 0
      base_requirements.txt
  2. 6 13
      netbox/utilities/templatetags/builtins/filters.py
  3. 31 0
      netbox/utilities/utils.py
  4. 1 0
      requirements.txt

+ 4 - 0
base_requirements.txt

@@ -125,3 +125,7 @@ tablib
 # Timezone data (required by django-timezone-field on Python 3.9+)
 # Timezone data (required by django-timezone-field on Python 3.9+)
 # https://github.com/python/tzdata
 # https://github.com/python/tzdata
 tzdata
 tzdata
+
+# HTML sanitizer
+# https://github.com/mozilla/bleach
+bleach

+ 6 - 13
netbox/utilities/templatetags/builtins/filters.py

@@ -11,7 +11,7 @@ from markdown import markdown
 
 
 from netbox.config import get_config
 from netbox.config import get_config
 from utilities.markdown import StrikethroughExtension
 from utilities.markdown import StrikethroughExtension
-from utilities.utils import foreground_color
+from utilities.utils import clean_html, foreground_color
 
 
 register = template.Library()
 register = template.Library()
 
 
@@ -144,18 +144,6 @@ def render_markdown(value):
 
 
         {{ md_source_text|markdown }}
         {{ md_source_text|markdown }}
     """
     """
-    schemes = '|'.join(get_config().ALLOWED_URL_SCHEMES)
-
-    # Strip HTML tags
-    value = strip_tags(value)
-
-    # Sanitize Markdown links
-    pattern = fr'\[([^\]]+)\]\(\s*(?!({schemes})).*:(.+)\)'
-    value = re.sub(pattern, '[\\1](\\3)', value, flags=re.IGNORECASE)
-
-    # Sanitize Markdown reference links
-    pattern = fr'\[([^\]]+)\]:\s*(?!({schemes}))\w*:(.+)'
-    value = re.sub(pattern, '[\\1]: \\3', value, flags=re.IGNORECASE)
 
 
     # Render Markdown
     # Render Markdown
     html = markdown(value, extensions=['def_list', 'fenced_code', 'tables', StrikethroughExtension()])
     html = markdown(value, extensions=['def_list', 'fenced_code', 'tables', StrikethroughExtension()])
@@ -164,6 +152,11 @@ def render_markdown(value):
     if html:
     if html:
         html = f'<div class="rendered-markdown">{html}</div>'
         html = f'<div class="rendered-markdown">{html}</div>'
 
 
+    schemes = get_config().ALLOWED_URL_SCHEMES
+
+    # Sanitize HTML
+    html = clean_html(html, schemes)
+
     return mark_safe(html)
     return mark_safe(html)
 
 
 
 

+ 31 - 0
netbox/utilities/utils.py

@@ -4,6 +4,7 @@ from collections import OrderedDict
 from decimal import Decimal
 from decimal import Decimal
 from itertools import count, groupby
 from itertools import count, groupby
 
 
+import bleach
 from django.core.serializers import serialize
 from django.core.serializers import serialize
 from django.db.models import Count, OuterRef, Subquery
 from django.db.models import Count, OuterRef, Subquery
 from django.db.models.functions import Coalesce
 from django.db.models.functions import Coalesce
@@ -385,3 +386,33 @@ def copy_safe_request(request):
         'path': request.path,
         'path': request.path,
         'id': getattr(request, 'id', None),  # UUID assigned by middleware
         'id': getattr(request, 'id', None),  # UUID assigned by middleware
     })
     })
+
+
+def clean_html(html, schemes):
+    """
+    Sanitizes HTML based on a whitelist of allowed tags and attributes.
+    Also takes a list of allowed URI schemes.
+    """
+
+    ALLOWED_TAGS = [
+        "div", "pre", "code", "blockquote", "del",
+        "hr", "h1", "h2", "h3", "h4", "h5", "h6",
+        "ul", "ol", "li", "p", "br",
+        "strong", "em", "a", "b", "i", "img",
+        "table", "thead", "tbody", "tr", "th", "td",
+        "dl", "dt", "dd",
+    ]
+
+    ALLOWED_ATTRIBUTES = {
+        "div": ['class'],
+        "h1": ["id"], "h2": ["id"], "h3": ["id"], "h4": ["id"], "h5": ["id"], "h6": ["id"],
+        "a": ["href", "title"],
+        "img": ["src", "title", "alt"],
+    }
+
+    return bleach.clean(
+        html,
+        tags=ALLOWED_TAGS,
+        attributes=ALLOWED_ATTRIBUTES,
+        protocols=schemes
+    )

+ 1 - 0
requirements.txt

@@ -1,3 +1,4 @@
+bleach==5.0.0
 Django==4.0.4
 Django==4.0.4
 django-cors-headers==3.12.0
 django-cors-headers==3.12.0
 django-debug-toolbar==3.2.4
 django-debug-toolbar==3.2.4