Przeglądaj źródła

feat(reader): inherit feed language on RSS, RDF, and JSON Feed entries

Entries without their own language now take the feed-level value,
matching the behaviour introduced for Atom. For JSON Feed this is
spec-mandated: an item declares a language only when it differs from
the primary language of the feed. For RSS and RDF, items are part of
the channel's content, and API consumers previously saw an empty
entries.language even when the channel declared one.

The RSS channel now also reads <dc:language>: hybrid feeds commonly
declare the channel language via Dublin Core instead of <language>.
Fred 17 godzin temu
rodzic
commit
5f710f916d

+ 7 - 0
internal/reader/json/adapter.go

@@ -71,7 +71,14 @@ func (j *JSONAdapter) BuildFeed(baseURL string) *model.Feed {
 
 	for _, item := range j.jsonFeed.Items {
 		entry := model.NewEntry()
+
+		// Populate the entry language. Per the JSON Feed spec, an item
+		// declares a language only when it differs from the primary
+		// language of the feed.
 		entry.Language = language.Normalize(item.Language)
+		if entry.Language == "" {
+			entry.Language = feed.Language
+		}
 
 		for _, itemURL := range []string{item.URL, item.ExternalURL} {
 			if itemURL = strings.TrimSpace(itemURL); itemURL == "" {

+ 3 - 3
internal/reader/json/parser_test.go

@@ -1174,7 +1174,7 @@ func TestParseItemWithLanguage(t *testing.T) {
 	}
 }
 
-func TestParseItemWithoutLanguage(t *testing.T) {
+func TestParseItemWithoutLanguageInheritsFeedLanguage(t *testing.T) {
 	data := `{
 		"version": "https://jsonfeed.org/version/1.1",
 		"title": "Example",
@@ -1199,7 +1199,7 @@ func TestParseItemWithoutLanguage(t *testing.T) {
 		t.Fatalf("Expected 1 entry, got: %d", len(feed.Entries))
 	}
 
-	if feed.Entries[0].Language != "" {
-		t.Errorf("Expected empty entry language, got: %q", feed.Entries[0].Language)
+	if feed.Entries[0].Language != "en-us" {
+		t.Errorf("Expected entry to inherit feed language, got: %q", feed.Entries[0].Language)
 	}
 }

+ 5 - 0
internal/reader/rdf/adapter.go

@@ -102,7 +102,12 @@ func (r *rdfAdapter) buildFeed(baseURL string) *model.Feed {
 			entry.Author = sanitizer.StripTags(r.rdf.Channel.DublinCoreCreator)
 		}
 
+		// Populate the entry language, falling back to the channel
+		// language: items are part of the channel's content.
 		entry.Language = language.Normalize(item.DublinCoreLanguage)
+		if entry.Language == "" {
+			entry.Language = feed.Language
+		}
 
 		feed.Entries = append(feed.Entries, entry)
 	}

+ 2 - 2
internal/reader/rdf/parser_test.go

@@ -876,8 +876,8 @@ func TestParseFeedWithChannelLanguage(t *testing.T) {
 		t.Fatalf(`Unexpected entry count, got: %d`, len(feed.Entries))
 	}
 
-	if feed.Entries[0].Language != "" {
-		t.Errorf(`Expected empty entry language, got: %q`, feed.Entries[0].Language)
+	if feed.Entries[0].Language != "en-us" {
+		t.Errorf(`Expected entry to inherit channel language, got: %q`, feed.Entries[0].Language)
 	}
 }
 

+ 11 - 0
internal/reader/rss/adapter.go

@@ -35,6 +35,12 @@ func (r *rssAdapter) buildFeed(baseURL string) *model.Feed {
 		Language:    language.Normalize(r.rss.Channel.Language),
 	}
 
+	// Hybrid feeds declare the channel language with <dc:language>
+	// instead of <language>.
+	if feed.Language == "" {
+		feed.Language = language.Normalize(r.rss.Channel.DublinCoreLanguage)
+	}
+
 	// Ensure the Site URL is absolute.
 	if absoluteSiteURL, err := urllib.ResolveToAbsoluteURL(baseURL, feed.SiteURL); err == nil {
 		feed.SiteURL = absoluteSiteURL
@@ -115,7 +121,12 @@ func (r *rssAdapter) buildFeed(baseURL string) *model.Feed {
 			entry.Author = findFeedAuthor(&r.rss.Channel)
 		}
 
+		// Populate the entry language, falling back to the channel
+		// language: items are part of the channel's content.
 		entry.Language = language.Normalize(item.DublinCoreLanguage)
+		if entry.Language == "" {
+			entry.Language = feed.Language
+		}
 
 		// Generate the entry hash.
 		//

+ 27 - 3
internal/reader/rss/parser_test.go

@@ -2383,7 +2383,31 @@ func TestParseItemWithDublinCoreLanguage(t *testing.T) {
 	}
 }
 
-func TestParseItemWithoutDublinCoreLanguage(t *testing.T) {
+func TestParseFeedWithDublinCoreChannelLanguage(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+		<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
+		<channel>
+			<title>Example</title>
+			<link>https://example.org/</link>
+			<dc:language>fr-FR</dc:language>
+			<item>
+				<title>Item</title>
+				<link>https://example.org/item</link>
+			</item>
+		</channel>
+		</rss>`
+
+	feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if feed.Language != "fr-fr" {
+		t.Errorf("Incorrect feed language, got: %q", feed.Language)
+	}
+}
+
+func TestParseItemWithoutDublinCoreLanguageInheritsChannelLanguage(t *testing.T) {
 	data := `<?xml version="1.0" encoding="utf-8"?>
 		<rss version="2.0">
 		<channel>
@@ -2406,7 +2430,7 @@ func TestParseItemWithoutDublinCoreLanguage(t *testing.T) {
 		t.Fatalf("Expected 1 entry, got: %d", len(feed.Entries))
 	}
 
-	if feed.Entries[0].Language != "" {
-		t.Errorf("Expected empty entry language, got: %q", feed.Entries[0].Language)
+	if feed.Entries[0].Language != "en-us" {
+		t.Errorf("Expected entry to inherit channel language, got: %q", feed.Entries[0].Language)
 	}
 }

+ 4 - 0
internal/reader/rss/rss.go

@@ -38,6 +38,10 @@ type rssChannel struct {
 	// You may also use values defined by the W3C: https://www.w3.org/TR/REC-html40/struct/dirlang.html#langcodes.
 	Language string `xml:"rss language"`
 
+	// Hybrid feeds declare the channel language with <dc:language>
+	// instead of <language>.
+	dublincore.DublinCoreChannelElement
+
 	// Copyright is a string indicating the copyright.
 	Copyright string `xml:"rss copyRight"`