Просмотр исходного кода

feat(reader): inherit feed language on RSS, RDF, and JSON Feed entries

Entries without their own language now take the feed-level value,
matching the behaviour introduced for Atom. For JSON Feed this is
spec-mandated: an item declares a language only when it differs from
the primary language of the feed. For RSS and RDF, items are part of
the channel's content, and API consumers previously saw an empty
entries.language even when the channel declared one.

The RSS channel now also reads <dc:language>: hybrid feeds commonly
declare the channel language via Dublin Core instead of <language>.
Fred 17 часов назад
Родитель
Сommit
5f710f916d

+ 7 - 0
internal/reader/json/adapter.go

@@ -71,7 +71,14 @@ func (j *JSONAdapter) BuildFeed(baseURL string) *model.Feed {
 
 
 	for _, item := range j.jsonFeed.Items {
 	for _, item := range j.jsonFeed.Items {
 		entry := model.NewEntry()
 		entry := model.NewEntry()
+
+		// Populate the entry language. Per the JSON Feed spec, an item
+		// declares a language only when it differs from the primary
+		// language of the feed.
 		entry.Language = language.Normalize(item.Language)
 		entry.Language = language.Normalize(item.Language)
+		if entry.Language == "" {
+			entry.Language = feed.Language
+		}
 
 
 		for _, itemURL := range []string{item.URL, item.ExternalURL} {
 		for _, itemURL := range []string{item.URL, item.ExternalURL} {
 			if itemURL = strings.TrimSpace(itemURL); itemURL == "" {
 			if itemURL = strings.TrimSpace(itemURL); itemURL == "" {

+ 3 - 3
internal/reader/json/parser_test.go

@@ -1174,7 +1174,7 @@ func TestParseItemWithLanguage(t *testing.T) {
 	}
 	}
 }
 }
 
 
-func TestParseItemWithoutLanguage(t *testing.T) {
+func TestParseItemWithoutLanguageInheritsFeedLanguage(t *testing.T) {
 	data := `{
 	data := `{
 		"version": "https://jsonfeed.org/version/1.1",
 		"version": "https://jsonfeed.org/version/1.1",
 		"title": "Example",
 		"title": "Example",
@@ -1199,7 +1199,7 @@ func TestParseItemWithoutLanguage(t *testing.T) {
 		t.Fatalf("Expected 1 entry, got: %d", len(feed.Entries))
 		t.Fatalf("Expected 1 entry, got: %d", len(feed.Entries))
 	}
 	}
 
 
-	if feed.Entries[0].Language != "" {
-		t.Errorf("Expected empty entry language, got: %q", feed.Entries[0].Language)
+	if feed.Entries[0].Language != "en-us" {
+		t.Errorf("Expected entry to inherit feed language, got: %q", feed.Entries[0].Language)
 	}
 	}
 }
 }

+ 5 - 0
internal/reader/rdf/adapter.go

@@ -102,7 +102,12 @@ func (r *rdfAdapter) buildFeed(baseURL string) *model.Feed {
 			entry.Author = sanitizer.StripTags(r.rdf.Channel.DublinCoreCreator)
 			entry.Author = sanitizer.StripTags(r.rdf.Channel.DublinCoreCreator)
 		}
 		}
 
 
+		// Populate the entry language, falling back to the channel
+		// language: items are part of the channel's content.
 		entry.Language = language.Normalize(item.DublinCoreLanguage)
 		entry.Language = language.Normalize(item.DublinCoreLanguage)
+		if entry.Language == "" {
+			entry.Language = feed.Language
+		}
 
 
 		feed.Entries = append(feed.Entries, entry)
 		feed.Entries = append(feed.Entries, entry)
 	}
 	}

+ 2 - 2
internal/reader/rdf/parser_test.go

@@ -876,8 +876,8 @@ func TestParseFeedWithChannelLanguage(t *testing.T) {
 		t.Fatalf(`Unexpected entry count, got: %d`, len(feed.Entries))
 		t.Fatalf(`Unexpected entry count, got: %d`, len(feed.Entries))
 	}
 	}
 
 
-	if feed.Entries[0].Language != "" {
-		t.Errorf(`Expected empty entry language, got: %q`, feed.Entries[0].Language)
+	if feed.Entries[0].Language != "en-us" {
+		t.Errorf(`Expected entry to inherit channel language, got: %q`, feed.Entries[0].Language)
 	}
 	}
 }
 }
 
 

+ 11 - 0
internal/reader/rss/adapter.go

@@ -35,6 +35,12 @@ func (r *rssAdapter) buildFeed(baseURL string) *model.Feed {
 		Language:    language.Normalize(r.rss.Channel.Language),
 		Language:    language.Normalize(r.rss.Channel.Language),
 	}
 	}
 
 
+	// Hybrid feeds declare the channel language with <dc:language>
+	// instead of <language>.
+	if feed.Language == "" {
+		feed.Language = language.Normalize(r.rss.Channel.DublinCoreLanguage)
+	}
+
 	// Ensure the Site URL is absolute.
 	// Ensure the Site URL is absolute.
 	if absoluteSiteURL, err := urllib.ResolveToAbsoluteURL(baseURL, feed.SiteURL); err == nil {
 	if absoluteSiteURL, err := urllib.ResolveToAbsoluteURL(baseURL, feed.SiteURL); err == nil {
 		feed.SiteURL = absoluteSiteURL
 		feed.SiteURL = absoluteSiteURL
@@ -115,7 +121,12 @@ func (r *rssAdapter) buildFeed(baseURL string) *model.Feed {
 			entry.Author = findFeedAuthor(&r.rss.Channel)
 			entry.Author = findFeedAuthor(&r.rss.Channel)
 		}
 		}
 
 
+		// Populate the entry language, falling back to the channel
+		// language: items are part of the channel's content.
 		entry.Language = language.Normalize(item.DublinCoreLanguage)
 		entry.Language = language.Normalize(item.DublinCoreLanguage)
+		if entry.Language == "" {
+			entry.Language = feed.Language
+		}
 
 
 		// Generate the entry hash.
 		// Generate the entry hash.
 		//
 		//

+ 27 - 3
internal/reader/rss/parser_test.go

@@ -2383,7 +2383,31 @@ func TestParseItemWithDublinCoreLanguage(t *testing.T) {
 	}
 	}
 }
 }
 
 
-func TestParseItemWithoutDublinCoreLanguage(t *testing.T) {
+func TestParseFeedWithDublinCoreChannelLanguage(t *testing.T) {
+	data := `<?xml version="1.0" encoding="utf-8"?>
+		<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
+		<channel>
+			<title>Example</title>
+			<link>https://example.org/</link>
+			<dc:language>fr-FR</dc:language>
+			<item>
+				<title>Item</title>
+				<link>https://example.org/item</link>
+			</item>
+		</channel>
+		</rss>`
+
+	feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if feed.Language != "fr-fr" {
+		t.Errorf("Incorrect feed language, got: %q", feed.Language)
+	}
+}
+
+func TestParseItemWithoutDublinCoreLanguageInheritsChannelLanguage(t *testing.T) {
 	data := `<?xml version="1.0" encoding="utf-8"?>
 	data := `<?xml version="1.0" encoding="utf-8"?>
 		<rss version="2.0">
 		<rss version="2.0">
 		<channel>
 		<channel>
@@ -2406,7 +2430,7 @@ func TestParseItemWithoutDublinCoreLanguage(t *testing.T) {
 		t.Fatalf("Expected 1 entry, got: %d", len(feed.Entries))
 		t.Fatalf("Expected 1 entry, got: %d", len(feed.Entries))
 	}
 	}
 
 
-	if feed.Entries[0].Language != "" {
-		t.Errorf("Expected empty entry language, got: %q", feed.Entries[0].Language)
+	if feed.Entries[0].Language != "en-us" {
+		t.Errorf("Expected entry to inherit channel language, got: %q", feed.Entries[0].Language)
 	}
 	}
 }
 }

+ 4 - 0
internal/reader/rss/rss.go

@@ -38,6 +38,10 @@ type rssChannel struct {
 	// You may also use values defined by the W3C: https://www.w3.org/TR/REC-html40/struct/dirlang.html#langcodes.
 	// You may also use values defined by the W3C: https://www.w3.org/TR/REC-html40/struct/dirlang.html#langcodes.
 	Language string `xml:"rss language"`
 	Language string `xml:"rss language"`
 
 
+	// Hybrid feeds declare the channel language with <dc:language>
+	// instead of <language>.
+	dublincore.DublinCoreChannelElement
+
 	// Copyright is a string indicating the copyright.
 	// Copyright is a string indicating the copyright.
 	Copyright string `xml:"rss copyRight"`
 	Copyright string `xml:"rss copyRight"`