Browse Source

perf(sanitizer): improve the sanitizer's performances twofold

Previously, every allowed attribute had to allocate memory for a slice,
concatenated in `key=…`, and a final strings.Join call. Instead of doing all of
this, a single string.Builder is used, with two simple local functions. It
doesn't significantly complexify the code, while improving the performances.
Now, I know that this isn't really a bottleneck in miniflux, but the
improvement is around 50% for the wikipedia/github benchmark (BenchmarkSanitize),
so I think it's worth it, given that the sanitizeAttributes function is call
for every attribute a feed items.

Before:

```
goos: linux
goarch: arm64
pkg: miniflux.app/v2/internal/reader/sanitizer
BenchmarkSanitizeImageHeavyNoQuery-8     	    6326	    567676 ns/op	   78305 B/op	     918 allocs/op
BenchmarkSanitizeImageHeavyWithQuery-8   	    4186	   1028312 ns/op	  124353 B/op	    1366 allocs/op
BenchmarkSanitize-8                      	      50	  21440566 ns/op
PASS
ok  	miniflux.app/v2/internal/reader/sanitizer	9.039s
```

After:

```
goos: linux
goarch: arm64
pkg: miniflux.app/v2/internal/reader/sanitizer
BenchmarkSanitizeImageHeavyNoQuery-8     	    7512	    530973 ns/op	   73186 B/op	     790 allocs/op
BenchmarkSanitizeImageHeavyWithQuery-8   	    1173	   1023078 ns/op	  118209 B/op	    1238 allocs/op
BenchmarkSanitize-8                      	      92	  13265599 ns/op
PASS
ok  	miniflux.app/v2/internal/reader/sanitizer	6.553s
```
jvoisin 2 weeks ago
parent
commit
622d757a32
1 changed files with 24 additions and 9 deletions
  1. 24 9
      internal/reader/sanitizer/sanitizer.go

+ 24 - 9
internal/reader/sanitizer/sanitizer.go

@@ -448,7 +448,20 @@ func trackAttributes(s *mandatoryAttributesStruct, attributeName string) {
 }
 
 func sanitizeAttributes(parsedBaseUrl *url.URL, tagName string, attributes []html.Attribute, sanitizerOptions *SanitizerOptions) (string, bool) {
-	htmlAttrs := make([]string, 0, len(attributes))
+	var htmlAttrs strings.Builder
+	// Rough estimate: most attributes are short; ~24 bytes (key + ="value") is
+	// a reasonable starting point. Avoids early grows for typical elements.
+	htmlAttrs.Grow(len(attributes) * 24)
+
+	// writeAttr appends key="value" to htmlAttrs, prefixing with a single
+	// space when not the first written attribute. value is HTML-escaped.
+	writeAttr := func(key, value string) {
+		htmlAttrs.WriteByte(' ')
+		htmlAttrs.WriteString(key)
+		htmlAttrs.WriteString(`="`)
+		htmlAttrs.WriteString(html.EscapeString(value))
+		htmlAttrs.WriteByte('"')
+	}
 
 	// Keep track of mandatory attributes for some tags
 	mandatoryAttributes := mandatoryAttributesStruct{false, false, false}
@@ -548,7 +561,7 @@ func sanitizeAttributes(parsedBaseUrl *url.URL, tagName string, attributes []htm
 		}
 
 		trackAttributes(&mandatoryAttributes, attribute.Key)
-		htmlAttrs = append(htmlAttrs, attribute.Key+`="`+html.EscapeString(value)+`"`)
+		writeAttr(attribute.Key, value)
 	}
 
 	if !hasRequiredAttributes(&mandatoryAttributes, tagName) {
@@ -558,27 +571,29 @@ func sanitizeAttributes(parsedBaseUrl *url.URL, tagName string, attributes []htm
 	if !isAnchorLink {
 		switch tagName {
 		case "a":
-			htmlAttrs = append(htmlAttrs, `rel="noopener noreferrer"`, `referrerpolicy="no-referrer"`)
+			writeAttr("rel", "noopener noreferrer")
+			writeAttr("referrerpolicy", "no-referrer")
 			if sanitizerOptions.OpenLinksInNewTab {
-				htmlAttrs = append(htmlAttrs, `target="_blank"`)
+				writeAttr("target", "_blank")
 			}
 		case "video", "audio":
-			htmlAttrs = append(htmlAttrs, "controls")
+			htmlAttrs.WriteString(" controls")
 		case "iframe":
-			htmlAttrs = append(htmlAttrs, `sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox"`, `loading="lazy"`)
+			writeAttr("sandbox", "allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox")
+			writeAttr("loading", "lazy")
 
 			// Note: the referrerpolicy seems to be required to avoid YouTube error 153 video player configuration error
 			// See https://developers.google.com/youtube/terms/required-minimum-functionality#embedded-player-api-client-identity
 			if isYouTubeEmbed {
-				htmlAttrs = append(htmlAttrs, `referrerpolicy="strict-origin-when-cross-origin"`)
+				writeAttr("referrerpolicy", "strict-origin-when-cross-origin")
 			}
 
 		case "img":
-			htmlAttrs = append(htmlAttrs, `loading="lazy"`)
+			writeAttr("loading", "lazy")
 		}
 	}
 
-	return strings.Join(htmlAttrs, " "), true
+	return strings.TrimLeft(htmlAttrs.String(), " "), true
 }
 
 func sanitizeSrcsetAttr(parsedBaseURL *url.URL, value string) string {