Răsfoiți Sursa

refactor(sanitizer): simplify `hasValidURIScheme` and `isBlockedResource` functions

- use an array instead of a map for the schemes, as the overwhelming majority
  of them will be either http or https, which we can place in front of the
  array. This is faster than using a map.
- Simplify hasValidURIScheme by using strings.HasPrefix instead of doing
  strings.IndexByte
- Simplify isBlockedResource by using a simple for loop, instead of a weird
  slices.ContainsFunc+strings.Contains construct.

On my noisy system:

```
goos: linux
goarch: arm64
pkg: miniflux.app/v2/internal/reader/sanitizer
           │   old.txt   │            new.txt            │
           │   sec/op    │   sec/op     vs base          │
Sanitize-8   22.19m ± 4%   21.97m ± 4%  ~ (p=0.948 n=50)
```
Julien Voisin 6 luni în urmă
părinte
comite
5a97bf8b5e
1 a modificat fișierele cu 55 adăugiri și 51 ștergeri
  1. 55 51
      internal/reader/sanitizer/sanitizer.go

+ 55 - 51
internal/reader/sanitizer/sanitizer.go

@@ -138,46 +138,51 @@ var (
 		"linkedin.com/shareArticle",
 	}
 
-	validURISchemes = map[string]struct{}{
-		"apt":       {},
-		"bitcoin":   {},
-		"callto":    {},
-		"dav":       {},
-		"davs":      {},
-		"ed2k":      {},
-		"facetime":  {},
-		"feed":      {},
-		"ftp":       {},
-		"geo":       {},
-		"git":       {},
-		"gopher":    {},
-		"http":      {},
-		"https":     {},
-		"irc":       {},
-		"irc6":      {},
-		"ircs":      {},
-		"itms-apps": {},
-		"itms":      {},
-		"magnet":    {},
-		"mailto":    {},
-		"news":      {},
-		"nntp":      {},
-		"rtmp":      {},
-		"sftp":      {},
-		"sip":       {},
-		"sips":      {},
-		"skype":     {},
-		"spotify":   {},
-		"ssh":       {},
-		"steam":     {},
-		"svn":       {},
-		"svn+ssh":   {},
-		"tel":       {},
-		"webcal":    {},
-		"xmpp":      {},
+	// See https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
+	validURISchemes = []string{
+		// Most commong schemes on top.
+		"https:",
+		"http:",
+
+		// Then the rest.
+		"apt:",
+		"bitcoin:",
+		"callto:",
+		"dav:",
+		"davs:",
+		"ed2k:",
+		"facetime:",
+		"feed:",
+		"ftp:",
+		"geo:",
+		"git:",
+		"gopher:",
+		"irc:",
+		"irc6:",
+		"ircs:",
+		"itms-apps:",
+		"itms:",
+		"magnet:",
+		"mailto:",
+		"news:",
+		"nntp:",
+		"rtmp:",
+		"sftp:",
+		"sip:",
+		"sips:",
+		"skype:",
+		"spotify:",
+		"ssh:",
+		"steam:",
+		"svn:",
+		"svn+ssh:",
+		"tel:",
+		"webcal:",
+		"xmpp:",
+
 		// iOS Apps
-		"opener": {}, // https://www.opener.link
-		"hack":   {}, // https://apps.apple.com/it/app/hack-for-hacker-news-reader/id1464477788?l=en-GB
+		"opener:", // https://www.opener.link
+		"hack:",   // https://apps.apple.com/it/app/hack-for-hacker-news-reader/id1464477788?l=en-GB
 	}
 
 	dataAttributeAllowedPrefixes = []string{
@@ -467,23 +472,22 @@ func hasRequiredAttributes(tagName string, attributes []string) bool {
 	}
 }
 
-// See https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
 func hasValidURIScheme(absoluteURL string) bool {
-	colonIndex := strings.IndexByte(absoluteURL, ':')
-	// Scheme must exist (colonIndex > 0). An empty scheme (e.g. ":foo") is not allowed.
-	if colonIndex <= 0 {
-		return false
+	for _, scheme := range validURISchemes {
+		if strings.HasPrefix(absoluteURL, scheme) {
+			return true
+		}
 	}
-
-	scheme := absoluteURL[:colonIndex]
-	_, ok := validURISchemes[strings.ToLower(scheme)]
-	return ok
+	return false
 }
 
 func isBlockedResource(absoluteURL string) bool {
-	return slices.ContainsFunc(blockedResourceURLSubstrings, func(element string) bool {
-		return strings.Contains(absoluteURL, element)
-	})
+	for _, blockedURL := range blockedResourceURLSubstrings {
+		if strings.Contains(absoluteURL, blockedURL) {
+			return true
+		}
+	}
+	return false
 }
 
 func isValidIframeSource(iframeSourceURL string) bool {