Переглянути джерело

Remove iframe inner HTML contents

An iframe element never has fallback content, as it will always create a nested
browsing context, regardless of whether the specified initial contents are
successfully used.

https://www.w3.org/TR/2010/WD-html5-20101019/the-iframe-element.html#the-iframe-element
Frédéric Guillot 5 роки тому
батько
коміт
0413daf76b
2 змінених файлів з 19 додано та 1 видалено
  1. 9 1
      reader/sanitizer/sanitizer.go
  2. 10 0
      reader/sanitizer/sanitizer_test.go

+ 9 - 1
reader/sanitizer/sanitizer.go

@@ -24,11 +24,12 @@ var (
 
 // Sanitize returns safe HTML.
 func Sanitize(baseURL, input string) string {
-	tokenizer := html.NewTokenizer(bytes.NewBufferString(input))
 	var buffer bytes.Buffer
 	var tagStack []string
+	var parentTag string
 	blacklistedTagDepth := 0
 
+	tokenizer := html.NewTokenizer(bytes.NewBufferString(input))
 	for {
 		if tokenizer.Next() == html.ErrorToken {
 			err := tokenizer.Err()
@@ -46,9 +47,16 @@ func Sanitize(baseURL, input string) string {
 				continue
 			}
 
+			// An iframe element never has fallback content.
+			// See https://www.w3.org/TR/2010/WD-html5-20101019/the-iframe-element.html#the-iframe-element
+			if parentTag == "iframe" {
+				continue
+			}
+
 			buffer.WriteString(html.EscapeString(token.Data))
 		case html.StartTagToken:
 			tagName := token.DataAtom.String()
+			parentTag = tagName
 
 			if !isPixelTracker(tagName, token.Attr) && isValidTag(tagName) {
 				attrNames, htmlAttributes := sanitizeAttributes(baseURL, tagName, token.Attr)

+ 10 - 0
reader/sanitizer/sanitizer_test.go

@@ -173,6 +173,16 @@ func TestInvalidIFrame(t *testing.T) {
 	}
 }
 
+func TestIFrameWithChildElements(t *testing.T) {
+	input := `<iframe src="https://www.youtube.com/"><p>test</p></iframe>`
+	expected := `<iframe src="https://www.youtube.com/" sandbox="allow-scripts allow-same-origin allow-popups" loading="lazy"></iframe>`
+	output := Sanitize("http://example.com/", input)
+
+	if expected != output {
+		t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
+	}
+}
+
 func TestInvalidURLScheme(t *testing.T) {
 	input := `<p>This link is <a src="file:///etc/passwd">not valid</a></p>`
 	expected := `<p>This link is not valid</p>`