Browse Source

fix(sanitizer): MathML tags are not fully supported by `golang.org/x/net/html`

See https://github.com/golang/net/blob/master/html/atom/gen.go
and https://github.com/golang/net/blob/master/html/atom/table.go
Frédéric Guillot 11 months ago
parent
commit
828a4334db
2 changed files with 20 additions and 2 deletions
  1. 10 2
      internal/reader/sanitizer/sanitizer.go
  2. 10 0
      internal/reader/sanitizer/sanitizer_test.go

+ 10 - 2
internal/reader/sanitizer/sanitizer.go

@@ -82,7 +82,7 @@ var (
 		"annotation":     {},
 		"annotation-xml": {},
 		"maction":        {},
-		"math":           {},
+		"math":           {"xmlns"},
 		"merror":         {},
 		"mfrac":          {},
 		"mi":             {},
@@ -131,7 +131,15 @@ func Sanitize(baseURL, input string) string {
 		}
 
 		token := tokenizer.Token()
-		tagName := token.DataAtom.String()
+
+		// Note: MathML elements are not fully supported by golang.org/x/net/html.
+		// See https://github.com/golang/net/blob/master/html/atom/gen.go
+		// and https://github.com/golang/net/blob/master/html/atom/table.go
+		tagName := token.Data
+		if tagName == "" {
+			continue
+		}
+
 		switch token.Type {
 		case html.TextToken:
 			if len(blockedStack) > 0 {

+ 10 - 0
internal/reader/sanitizer/sanitizer_test.go

@@ -705,3 +705,13 @@ func TestAttributesAreStripped(t *testing.T) {
 		t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
 	}
 }
+
+func TestMathML(t *testing.T) {
+	input := `<math xmlns="http://www.w3.org/1998/Math/MathML"><msup><mi>x</mi><mn>2</mn></msup></math>`
+	expected := `<math xmlns="http://www.w3.org/1998/Math/MathML"><msup><mi>x</mi><mn>2</mn></msup></math>`
+	output := Sanitize("http://example.org/", input)
+
+	if expected != output {
+		t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
+	}
+}