1 день назад · 65cd6cd25d
--- a/internal/reader/language/language.go
+++ b/internal/reader/language/language.go
@@ -5,13 +5,42 @@ package language // import "miniflux.app/v2/internal/reader/language"
 
				 
			
 
				 import "strings"
			
 
				 
			
 
				+// maxLength bounds accepted language tags. RFC 5646 recommends supporting
			
 
				+// tags of at least 35 characters; anything much longer is garbage.
			
 
				+const maxLength = 50
			
 
				+
			
 
				 // Normalize cleans up a language tag declared by a feed so it is
			
 
				 // suitable for use as an HTML lang attribute. It trims surrounding
			
 
				 // whitespace, lower-cases the value, and replaces underscores with hyphens
			
 
				 // (e.g. "en_US" -> "en-us"). No strict BCP-47 validation is performed:
			
 
				 // many real feeds use loose values and silently dropping them yields worse
			
 
				 // downstream behaviour than passing them through.
			
 
				+//
			
 
				+// The value is feed-controlled and is persisted and rendered as-is, so
			
 
				+// anything outside the BCP-47 tag alphabet ([a-z0-9-]) or longer than
			
 
				+// maxLength is rejected: such a value carries no usable language
			
 
				+// information, and stripping bad characters could assemble a wrong tag.
			
 
				 func Normalize(s string) string {
			
 
				-	s = strings.ToLower(strings.TrimSpace(s))
			
 
				-	return strings.ReplaceAll(s, "_", "-")
			
 
				+	s = strings.TrimSpace(s)
			
 
				+	if len(s) > maxLength {
			
 
				+		return ""
			
 
				+	}
			
 
				+
			
 
				+	// Lower-case ASCII-only, in the same pass as the charset check.
			
 
				+	// Unicode case folding (strings.ToLower) would map some non-ASCII
			
 
				+	// characters to ASCII (e.g. the Kelvin sign U+212A to "k"), turning
			
 
				+	// input the filter should reject into an apparently valid tag.
			
 
				+	b := []byte(s)
			
 
				+	for i, c := range b {
			
 
				+		switch {
			
 
				+		case c >= 'A' && c <= 'Z':
			
 
				+			b[i] = c + 'a' - 'A'
			
 
				+		case c == '_':
			
 
				+			b[i] = '-'
			
 
				+		case (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '-':
			
 
				+		default:
			
 
				+			return ""
			
 
				+		}
			
 
				+	}
			
 
				+	return string(b)
			
 
				 }
			
--- a/internal/reader/language/language_test.go
+++ b/internal/reader/language/language_test.go
@@ -3,7 +3,10 @@
 
				 
			
 
				 package language // import "miniflux.app/v2/internal/reader/language"
			
 
				 
			
 
				-import "testing"
			
 
				+import (
			
 
				+	"strings"
			
 
				+	"testing"
			
 
				+)
			
 
				 
			
 
				 func TestNormalize(t *testing.T) {
			
 
				 	cases := []struct {
			
@@ -17,6 +20,25 @@ func TestNormalize(t *testing.T) {
 
				 		{"EN-us", "en-us"},
			
 
				 		{"pt-BR", "pt-br"},
			
 
				 		{"  fr-FR  ", "fr-fr"},
			
 
				+		{"zh-hant-cn-x-private1-private2", "zh-hant-cn-x-private1-private2"},
			
 
				+
			
 
				+		// Values outside the tag alphabet are rejected, not stripped.
			
 
				+		{"en US", ""},
			
 
				+		{"en-US, de-DE", ""},
			
 
				+		{"en\x00us", ""},
			
 
				+		{"en\u202eus", ""},
			
 
				+		{"français", ""},
			
 
				+		{`"><script>`, ""},
			
 
				+
			
 
				+		// Non-ASCII input must be rejected even when Unicode case
			
 
				+		// folding would map it to ASCII (U+212A Kelvin sign -> "k",
			
 
				+		// U+0130 dotted capital I -> "i").
			
 
				+		{"KO", ""},
			
 
				+		{"İ-en", ""},
			
 
				+
			
 
				+		// Values longer than 50 characters are rejected.
			
 
				+		{strings.Repeat("a", 51), ""},
			
 
				+		{"en-" + strings.Repeat("a", 100), ""},
			
 
				 	}
			
 
				 	for _, c := range cases {
			
 
				 		if got := Normalize(c.in); got != c.want {