Browse Source

feat(sanitizer): validate MathML XML namespace

Frédéric Guillot 10 months ago
parent
commit
d53fd17e10

+ 4 - 0
internal/reader/sanitizer/sanitizer.go

@@ -234,6 +234,10 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute, sa
 			continue
 		}
 
+		if tagName == "math" && attribute.Key == "xmlns" && value != "http://www.w3.org/1998/Math/MathML" {
+			value = "http://www.w3.org/1998/Math/MathML"
+		}
+
 		if tagName == "img" && attribute.Key == "fetchpriority" {
 			if !isValidFetchPriorityValue(value) {
 				continue

+ 10 - 0
internal/reader/sanitizer/sanitizer_test.go

@@ -829,3 +829,13 @@ func TestMathML(t *testing.T) {
 		t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
 	}
 }
+
+func TestInvalidMathMLXMLNamespace(t *testing.T) {
+	input := `<math xmlns="http://example.org"><msup><mi>x</mi><mn>2</mn></msup></math>`
+	expected := `<math xmlns="http://www.w3.org/1998/Math/MathML"><msup><mi>x</mi><mn>2</mn></msup></math>`
+	output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
+
+	if expected != output {
+		t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
+	}
+}