Browse Source

fix(sanitizer): match URI schemes case-insensitively

Per RFC 3986 §3.1, URI schemes are case-insensitive. HasValidURIScheme
previously did a literal HasPrefix check, so inputs like "HTTPS://..."
were rejected. Use strings.Cut to extract the scheme and compare each
allowlisted entry with strings.EqualFold.
Frédéric Guillot 1 month ago
parent
commit
6543d652a6
2 changed files with 53 additions and 44 deletions
  1. 46 41
      internal/reader/sanitizer/url.go
  2. 7 3
      internal/reader/sanitizer/url_test.go

+ 46 - 41
internal/reader/sanitizer/url.go

@@ -12,55 +12,60 @@ import "strings"
 // See https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
 var validURISchemes = []string{
 	// Most commong schemes on top.
-	"https:",
-	"http:",
+	"https",
+	"http",
 
 	// Then the rest.
-	"apt:",
-	"bitcoin:",
-	"callto:",
-	"dav:",
-	"davs:",
-	"ed2k:",
-	"facetime:",
-	"feed:",
-	"ftp:",
-	"geo:",
-	"git:",
-	"gopher:",
-	"irc:",
-	"irc6:",
-	"ircs:",
-	"itms-apps:",
-	"itms:",
-	"magnet:",
-	"mailto:",
-	"news:",
-	"nntp:",
-	"rtmp:",
-	"sftp:",
-	"sip:",
-	"sips:",
-	"shortcuts:",
-	"skype:",
-	"spotify:",
-	"ssh:",
-	"steam:",
-	"svn:",
-	"svn+ssh:",
-	"tel:",
-	"webcal:",
-	"xmpp:",
+	"apt",
+	"bitcoin",
+	"callto",
+	"dav",
+	"davs",
+	"ed2k",
+	"facetime",
+	"feed",
+	"ftp",
+	"geo",
+	"git",
+	"gopher",
+	"irc",
+	"irc6",
+	"ircs",
+	"itms-apps",
+	"itms",
+	"magnet",
+	"mailto",
+	"news",
+	"nntp",
+	"rtmp",
+	"sftp",
+	"sip",
+	"sips",
+	"shortcuts",
+	"skype",
+	"spotify",
+	"ssh",
+	"steam",
+	"svn",
+	"svn+ssh",
+	"tel",
+	"webcal",
+	"xmpp",
 
 	// iOS Apps
-	"opener:", // https://www.opener.link
-	"hack:",   // https://apps.apple.com/it/app/hack-for-hacker-news-reader/id1464477788?l=en-GB
+	"opener", // https://www.opener.link
+	"hack",   // https://apps.apple.com/it/app/hack-for-hacker-news-reader/id1464477788?l=en-GB
 }
 
 // HasValidURIScheme reports whether the URL begins with an allowed scheme.
+// The scheme comparison is case-insensitive per RFC 3986 §3.1.
 func HasValidURIScheme(absoluteURL string) bool {
-	for _, scheme := range validURISchemes {
-		if strings.HasPrefix(absoluteURL, scheme) {
+	scheme, _, ok := strings.Cut(absoluteURL, ":")
+	if !ok || scheme == "" {
+		return false
+	}
+	for _, validScheme := range validURISchemes {
+		if strings.EqualFold(scheme, validScheme) {
 			return true
 		}
 	}

+ 7 - 3
internal/reader/sanitizer/url_test.go

@@ -31,10 +31,14 @@ func TestHasValidURIScheme(t *testing.T) {
 		"/relative/path":          false,
 		"//evil.example.org/path": false,
 
-		// Rejected: case-sensitive match (callers are expected to pass
-		// already-normalized URLs, e.g. via net/url which lowercases the scheme).
-		"HTTPS://example.org": false,
+		// Allowed: scheme matching is case-insensitive (RFC 3986 §3.1).
+		"HTTPS://example.org":   true,
+		"MailTo:author@host":    true,
+		"SVN+SSH://example.org": true,
+
+		// Rejected: case-insensitive match still rejects disallowed schemes.
 		"JavaScript:alert(1)": false,
+		"VBScript:msgbox(1)":  false,
 	}
 
 	for input, expected := range scenarios {