Selaa lähdekoodia

sanitizer: add support for HTML `hidden` attribute

This commit adjusts the `Sanitize` function to skip tags with the
`hidden` attribute, similar to how it skips blocked tags and their
contents.
JohnnyJayJay 1 vuosi sitten
vanhempi
commit
ee5e18ea9f

+ 10 - 9
internal/reader/sanitizer/sanitizer.go

@@ -82,7 +82,7 @@ func Sanitize(baseURL, input string) string {
 	var buffer strings.Builder
 	var tagStack []string
 	var parentTag string
-	blacklistedTagDepth := 0
+	var blockedStack []string
 
 	tokenizer := html.NewTokenizer(strings.NewReader(input))
 	for {
@@ -98,7 +98,7 @@ func Sanitize(baseURL, input string) string {
 		token := tokenizer.Token()
 		switch token.Type {
 		case html.TextToken:
-			if blacklistedTagDepth > 0 {
+			if len(blockedStack) > 0 {
 				continue
 			}
 
@@ -116,7 +116,10 @@ func Sanitize(baseURL, input string) string {
 			if isPixelTracker(tagName, token.Attr) {
 				continue
 			}
-			if isValidTag(tagName) {
+
+			if isBlockedTag(tagName) || slices.ContainsFunc(token.Attr, func(attr html.Attribute) bool { return attr.Key == "hidden" }) {
+				blockedStack = append(blockedStack, tagName)
+			} else if len(blockedStack) == 0 && isValidTag(tagName) {
 				attrNames, htmlAttributes := sanitizeAttributes(baseURL, tagName, token.Attr)
 
 				if hasRequiredAttributes(tagName, attrNames) {
@@ -128,22 +131,20 @@ func Sanitize(baseURL, input string) string {
 
 					tagStack = append(tagStack, tagName)
 				}
-			} else if isBlockedTag(tagName) {
-				blacklistedTagDepth++
 			}
 		case html.EndTagToken:
 			tagName := token.DataAtom.String()
-			if isValidTag(tagName) && slices.Contains(tagStack, tagName) {
+			if len(blockedStack) > 0 && blockedStack[len(blockedStack)-1] == tagName {
+				blockedStack = blockedStack[:len(blockedStack)-1]
+			} else if len(blockedStack) == 0 && isValidTag(tagName) && slices.Contains(tagStack, tagName) {
 				buffer.WriteString("</" + tagName + ">")
-			} else if isBlockedTag(tagName) {
-				blacklistedTagDepth--
 			}
 		case html.SelfClosingTagToken:
 			tagName := token.DataAtom.String()
 			if isPixelTracker(tagName, token.Attr) {
 				continue
 			}
-			if isValidTag(tagName) {
+			if isValidTag(tagName) && len(blockedStack) == 0 {
 				attrNames, htmlAttributes := sanitizeAttributes(baseURL, tagName, token.Attr)
 				if hasRequiredAttributes(tagName, attrNames) {
 					if len(attrNames) > 0 {

+ 10 - 0
internal/reader/sanitizer/sanitizer_test.go

@@ -630,3 +630,13 @@ func TestReplaceStyle(t *testing.T) {
 		t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
 	}
 }
+
+func TestHiddenParagraph(t *testing.T) {
+	input := `<p>Before paragraph.</p><p hidden>This should <em>not</em> appear in the <strong>output</strong></p><p>After paragraph.</p>`
+	expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
+	output := Sanitize("http://example.org/", input)
+
+	if expected != output {
+		t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
+	}
+}